diff options
author | mjdenkowski <michael.j.denkowski@gmail.com> | 2014-03-11 15:46:30 -0400 |
---|---|---|
committer | mjdenkowski <michael.j.denkowski@gmail.com> | 2014-03-11 15:46:30 -0400 |
commit | ba0a7d0cd688ee136c7c7b7776e68cb9603585b2 (patch) | |
tree | 51b37e4aa4f39e9a6a6941111d57f10ad0738816 /python/cdec/sa/rulefactory.pxi | |
parent | fcd6fe0123e5a12c926e344ed93e17f021674edc (diff) |
Update lexical weights in online grammar extraction
Diffstat (limited to 'python/cdec/sa/rulefactory.pxi')
-rw-r--r-- | python/cdec/sa/rulefactory.pxi | 14 |
1 files changed, 13 insertions, 1 deletions
diff --git a/python/cdec/sa/rulefactory.pxi b/python/cdec/sa/rulefactory.pxi index ca3321a4..635cca10 100644 --- a/python/cdec/sa/rulefactory.pxi +++ b/python/cdec/sa/rulefactory.pxi @@ -291,10 +291,13 @@ cdef class HieroCachingRuleFactory: cdef bint online cdef online_stats + cdef bilex def __cinit__(self, # compiled alignment object (REQUIRED) Alignment alignment, + # bilexical dictionary if online + bilex=None, # parameter for double-binary search; doesn't seem to matter much float by_slack_factor=1.0, # name of generic nonterminal used by Hiero @@ -400,7 +403,10 @@ cdef class HieroCachingRuleFactory: self.findexes1 = IntList(initial_len=10) # Online stats - + + # None if not online + self.bilex = bilex + # True after data is added self.online = False self.online_stats = defaultdict(OnlineStats) @@ -2039,6 +2045,12 @@ cdef class HieroCachingRuleFactory: stats.phrases_fe[f_ph][e_ph] += 1 if not stats.phrases_al[f_ph][e_ph]: stats.phrases_al[f_ph][e_ph] = al + + # Update bilexical dictionary (if exists) + if self.bilex: + self.bilex.update(f_words, e_words, alignment) + else: + logger.warning('No online bilexical dictionary specified, not updating lexical weights') # Create a rule from source, target, non-terminals, and alignments def form_rule(self, f_i, e_i, f_span, e_span, nt, al): |