diff options
author | Michael Denkowski <mdenkows@cs.cmu.edu> | 2014-03-06 15:35:10 -0800 |
---|---|---|
committer | Michael Denkowski <mdenkows@cs.cmu.edu> | 2014-03-06 15:35:10 -0800 |
commit | a49f3a5b19547e7e46a652b22fab601da8fc210f (patch) | |
tree | e601ada49b2751344d14175b005182ca842a29cf /python/cdec/sa/rulefactory.pxi | |
parent | 18aa808143ab06da361a557350f6b3dd964717ce (diff) |
Compile count-based bilex table for online grammar extraction.
Diffstat (limited to 'python/cdec/sa/rulefactory.pxi')
-rw-r--r-- | python/cdec/sa/rulefactory.pxi | 9 |
1 files changed, 7 insertions, 2 deletions
diff --git a/python/cdec/sa/rulefactory.pxi b/python/cdec/sa/rulefactory.pxi index 044a78c8..86b994b9 100644 --- a/python/cdec/sa/rulefactory.pxi +++ b/python/cdec/sa/rulefactory.pxi @@ -2057,23 +2057,28 @@ cdef class HieroCachingRuleFactory: # Update Bilexical counts aligned_fe = [list() for _ in range(len(f_words))] aligned_ef = [list() for _ in range(len(e_words))] + null_word = sym_fromstring('NULL', True) for (i, j) in alignment: aligned_fe[i].append(j) aligned_ef[j].append(i) for f_i in range(len(f_words)): + stats.bilex_f[f_words[f_i]] += 1 e_i_aligned = aligned_fe[f_i] lc = len(e_i_aligned) if lc > 0: - stats.bilex_f[f_words[f_i]] += 1 for e_i in e_i_aligned: stats.bilex_fe[f_words[f_i]][e_words[e_i]] += (1.0) / lc + else: + stats.bilex_fe[f_words[f_i]][null_word] += 1 for e_i in range(len(e_words)): + stats.bilex_e[e_words[e_i]] += 1 f_i_aligned = aligned_ef[e_i] lc = len(f_i_aligned) if lc > 0: - stats.bilex_e[e_words[e_i]] += 1 for f_i in f_i_aligned: stats.bilex_ef[e_words[e_i]][f_words[f_i]] += (1.0) / lc + else: + stats.bilex_ef[e_words[e_i]][null_word] += 1 # Create a rule from source, target, non-terminals, and alignments def form_rule(self, f_i, e_i, f_span, e_span, nt, al): |