From 54a1c0e2bde259e3acc9c0a8ec8da3c7704e80ca Mon Sep 17 00:00:00 2001 From: Paul Baltescu Date: Tue, 19 Feb 2013 21:23:48 +0000 Subject: Timing every part of the extractor. --- python/src/sa/rulefactory.pxi | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) (limited to 'python/src/sa/rulefactory.pxi') diff --git a/python/src/sa/rulefactory.pxi b/python/src/sa/rulefactory.pxi index 5006a838..2d996581 100644 --- a/python/src/sa/rulefactory.pxi +++ b/python/src/sa/rulefactory.pxi @@ -250,6 +250,7 @@ cdef class HieroCachingRuleFactory: cdef prev_norm_prefix cdef float extract_time + cdef float intersect_time cdef SuffixArray fsa cdef DataArray fda cdef DataArray eda @@ -940,10 +941,11 @@ cdef class HieroCachingRuleFactory: cdef IntList sample, chunklen cdef Matching matching cdef Phrase hiero_phrase - + flen = len(fwords) start_time = monitor_cpu() self.extract_time = 0.0 + self.intersect_time = 0.0 nodes_isteps_away_buffer = {} hit = 0 reachable_buffer = {} @@ -1028,7 +1030,10 @@ cdef class HieroCachingRuleFactory: else: if arity > 0: # Intersecting because of arity > 0 + intersect_start_time = monitor_cpu() phrase_location = self.intersect(node, node.suffix_link.children[word_id], hiero_phrase) + intersect_stop_time = monitor_cpu() + self.intersect_time += intersect_stop_time - intersect_start_time else: # Suffix array search phrase_location = node.phrase_location @@ -1132,6 +1137,7 @@ cdef class HieroCachingRuleFactory: logger.info("Total time for rule lookup, extraction, and scoring = %f seconds", (stop_time - start_time)) gc.collect() logger.info(" Extract time = %f seconds", self.extract_time) + logger.info(" Intersect time = %f seconds", self.intersect_time) cdef int find_fixpoint(self, @@ -1603,7 +1609,6 @@ cdef class HieroCachingRuleFactory: for (phrase2,eindexes) in phrase_list: als1 = self.create_alignments(sent_links,num_links,self.findexes,eindexes) extracts.append((fphr, phrase2, pair_count, tuple(als1))) - if (num_gaps < self.max_nonterminals and phrase_len < self.max_length and f_back_high - f_back_low + self.train_min_gap_size <= self.train_max_initial_size): @@ -1738,12 +1743,12 @@ cdef class HieroCachingRuleFactory: met_constraints = 0 if (met_constraints and - self.find_fixpoint(f_x_low, f_x_high, + (self.find_fixpoint(f_x_low, f_x_high, f_links_low, f_links_high, e_links_low, e_links_high, e_low, e_high, &e_x_low, &e_x_high, &f_x_low, &f_x_high, f_sent_len, e_sent_len, self.train_max_initial_size, self.train_max_initial_size, - 1, 1, 2, 1, 1, 1, 1) and + 1, 1, 2, 1, 1, 1, 1) == 1) and ((not self.tight_phrases) or (f_links_low[f_x_low] != -1 and f_links_low[f_x_high-1] != -1)) and self.find_fixpoint(f_x_low, f_low, f_links_low, f_links_high, e_links_low, e_links_high, -- cgit v1.2.3