diff options
author | Michael Denkowski <michael.j.denkowski@gmail.com> | 2013-01-28 14:21:22 -0500 |
---|---|---|
committer | Michael Denkowski <michael.j.denkowski@gmail.com> | 2013-01-28 14:21:22 -0500 |
commit | 143afb1e6210ae6105426d47d7f225cbfa695753 (patch) | |
tree | 241db8508ebf72c2767751cbc92257a680bfff3c /python/pkg/cdec/sa/features.py | |
parent | 5e7a99f9ce09a31092e194c06dd51368e18b3aed (diff) |
Bilexical scores for online rules
Diffstat (limited to 'python/pkg/cdec/sa/features.py')
-rw-r--r-- | python/pkg/cdec/sa/features.py | 44 |
1 files changed, 36 insertions, 8 deletions
diff --git a/python/pkg/cdec/sa/features.py b/python/pkg/cdec/sa/features.py index 49064f73..a89499d4 100644 --- a/python/pkg/cdec/sa/features.py +++ b/python/pkg/cdec/sa/features.py @@ -1,6 +1,8 @@ from __future__ import division import math +from cdec.sa import isvar + MAXSCORE = 99 def EgivenF(ctx): # p(e|f) = c(e, f)/c(f) @@ -42,22 +44,48 @@ def MaxLexEgivenF(ttable): def MaxLexEgivenF(ctx): fwords = ctx.fphrase.words fwords.append('NULL') - def score(): + if not ctx.online: + maxOffScore = 0.0 + for e in ctx.ephrase.words: + maxScore = max(ttable.get_score(f, e, 0) for f in fwords) + maxOffScore += -math.log10(maxScore) if maxScore > 0 else MAXSCORE + return maxOffScore + else: + # For now, straight average + maxOffScore = 0.0 + maxOnScore = 0.0 for e in ctx.ephrase.words: - maxScore = max(ttable.get_score(f, e, 0) for f in fwords) - yield -math.log10(maxScore) if maxScore > 0 else MAXSCORE - return sum(score()) + maxScore = max(ttable.get_score(f, e, 0) for f in fwords) + maxOffScore += -math.log10(maxScore) if maxScore > 0 else MAXSCORE + for e in ctx.ephrase: + if not isvar(e): + maxScore = max((ctx.online.bilex_fe[f][e] / ctx.online.bilex_f[f]) for f in ctx.fphrase if not isvar(f)) + maxOnScore += -math.log10(maxScore) if maxScore > 0 else MAXSCORE + return (maxOffScore + maxOnScore) / 2 return MaxLexEgivenF def MaxLexFgivenE(ttable): def MaxLexFgivenE(ctx): ewords = ctx.ephrase.words ewords.append('NULL') - def score(): + if not ctx.online: + maxOffScore = 0.0 + for f in ctx.fphrase.words: + maxScore = max(ttable.get_score(f, e, 1) for e in ewords) + maxOffScore += -math.log10(maxScore) if maxScore > 0 else MAXSCORE + return maxOffScore + else: + # For now, straight average + maxOffScore = 0.0 + maxOnScore = 0.0 for f in ctx.fphrase.words: - maxScore = max(ttable.get_score(f, e, 1) for e in ewords) - yield -math.log10(maxScore) if maxScore > 0 else MAXSCORE - return sum(score()) + maxScore = max(ttable.get_score(f, e, 1) for e in ewords) + maxOffScore += -math.log10(maxScore) if maxScore > 0 else MAXSCORE + for f in ctx.fphrase: + if not isvar(f): + maxScore = max((ctx.online.bilex_fe[f][e] / ctx.online.bilex_e[e]) for e in ctx.ephrase if not isvar(e)) + maxOnScore += -math.log10(maxScore) if maxScore > 0 else MAXSCORE + return (maxOffScore + maxOnScore) / 2 return MaxLexFgivenE def IsSingletonF(ctx): |