diff options
author | Chris Dyer <redpony@gmail.com> | 2014-03-12 02:30:32 -0400 |
---|---|---|
committer | Chris Dyer <redpony@gmail.com> | 2014-03-12 02:30:32 -0400 |
commit | bcff95cd2879fa20a0bfd00e64a2555f6eab1c2b (patch) | |
tree | 3f833b5e0efc819a5b923353a9045485a98c4910 /python/cdec/sa/rulefactory.pxi | |
parent | 10a668822715cee024a7e7391c62caa8e078e840 (diff) | |
parent | efbc43b40c8c3204245814b65a7be280498281bd (diff) |
Merge branch 'master' of https://github.com/redpony/cdec
Diffstat (limited to 'python/cdec/sa/rulefactory.pxi')
-rw-r--r-- | python/cdec/sa/rulefactory.pxi | 14 |
1 files changed, 13 insertions, 1 deletions
diff --git a/python/cdec/sa/rulefactory.pxi b/python/cdec/sa/rulefactory.pxi index ca3321a4..635cca10 100644 --- a/python/cdec/sa/rulefactory.pxi +++ b/python/cdec/sa/rulefactory.pxi @@ -291,10 +291,13 @@ cdef class HieroCachingRuleFactory: cdef bint online cdef online_stats + cdef bilex def __cinit__(self, # compiled alignment object (REQUIRED) Alignment alignment, + # bilexical dictionary if online + bilex=None, # parameter for double-binary search; doesn't seem to matter much float by_slack_factor=1.0, # name of generic nonterminal used by Hiero @@ -400,7 +403,10 @@ cdef class HieroCachingRuleFactory: self.findexes1 = IntList(initial_len=10) # Online stats - + + # None if not online + self.bilex = bilex + # True after data is added self.online = False self.online_stats = defaultdict(OnlineStats) @@ -2039,6 +2045,12 @@ cdef class HieroCachingRuleFactory: stats.phrases_fe[f_ph][e_ph] += 1 if not stats.phrases_al[f_ph][e_ph]: stats.phrases_al[f_ph][e_ph] = al + + # Update bilexical dictionary (if exists) + if self.bilex: + self.bilex.update(f_words, e_words, alignment) + else: + logger.warning('No online bilexical dictionary specified, not updating lexical weights') # Create a rule from source, target, non-terminals, and alignments def form_rule(self, f_i, e_i, f_span, e_span, nt, al): |