From abcd6865a25944a1cd07c9224db2fd7a729f02e6 Mon Sep 17 00:00:00 2001 From: Michael Denkowski Date: Fri, 7 Mar 2014 01:52:09 -0800 Subject: More online bilex updates --- python/cdec/sa/rulefactory.pxi | 42 +----------------------------------------- 1 file changed, 1 insertion(+), 41 deletions(-) (limited to 'python/cdec/sa/rulefactory.pxi') diff --git a/python/cdec/sa/rulefactory.pxi b/python/cdec/sa/rulefactory.pxi index 86b994b9..ca3321a4 100644 --- a/python/cdec/sa/rulefactory.pxi +++ b/python/cdec/sa/rulefactory.pxi @@ -31,10 +31,6 @@ OnlineFeatureContext = namedtuple('OnlineFeatureContext', ['fcount', 'fsample_count', 'paircount', - 'bilex_f', - 'bilex_e', - 'bilex_fe', - 'bilex_ef' ]) cdef class OnlineStats: @@ -43,10 +39,6 @@ cdef class OnlineStats: cdef public phrases_e cdef public phrases_fe cdef public phrases_al - cdef public bilex_f - cdef public bilex_e - cdef public bilex_fe - cdef public bilex_ef def __cinit__(self): # Keep track of everything that can be sampled: @@ -58,12 +50,6 @@ cdef class OnlineStats: self.phrases_fe = defaultdict(lambda: defaultdict(int)) self.phrases_al = defaultdict(lambda: defaultdict(tuple)) - # Bilexical counts - self.bilex_f = defaultdict(int) - self.bilex_e = defaultdict(int) - self.bilex_fe = defaultdict(lambda: defaultdict(int)) - self.bilex_ef = defaultdict(lambda: defaultdict(int)) - cdef int PRECOMPUTE = 0 cdef int MERGE = 1 cdef int BAEZA_YATES = 2 @@ -2054,32 +2040,6 @@ cdef class HieroCachingRuleFactory: if not stats.phrases_al[f_ph][e_ph]: stats.phrases_al[f_ph][e_ph] = al - # Update Bilexical counts - aligned_fe = [list() for _ in range(len(f_words))] - aligned_ef = [list() for _ in range(len(e_words))] - null_word = sym_fromstring('NULL', True) - for (i, j) in alignment: - aligned_fe[i].append(j) - aligned_ef[j].append(i) - for f_i in range(len(f_words)): - stats.bilex_f[f_words[f_i]] += 1 - e_i_aligned = aligned_fe[f_i] - lc = len(e_i_aligned) - if lc > 0: - for e_i in e_i_aligned: - stats.bilex_fe[f_words[f_i]][e_words[e_i]] += (1.0) / lc - else: - stats.bilex_fe[f_words[f_i]][null_word] += 1 - for e_i in range(len(e_words)): - stats.bilex_e[e_words[e_i]] += 1 - f_i_aligned = aligned_ef[e_i] - lc = len(f_i_aligned) - if lc > 0: - for f_i in f_i_aligned: - stats.bilex_ef[e_words[e_i]][f_words[f_i]] += (1.0) / lc - else: - stats.bilex_ef[e_words[e_i]][null_word] += 1 - # Create a rule from source, target, non-terminals, and alignments def form_rule(self, f_i, e_i, f_span, e_span, nt, al): @@ -2159,7 +2119,7 @@ cdef class HieroCachingRuleFactory: fsample_count = stats.samples_f.get(f, 0) d = stats.phrases_fe.get(f, None) paircount = d.get(e, 0) if d else 0 - return OnlineFeatureContext(fcount, fsample_count, paircount, stats.bilex_f, stats.bilex_e, stats.bilex_fe) + return OnlineFeatureContext(fcount, fsample_count, paircount) return None # Find all phrases that we might try to extract -- cgit v1.2.3