summaryrefslogtreecommitdiff
path: root/python/pkg/cdec/sa/features.py
diff options
context:
space:
mode:
authorMichael Denkowski <michael.j.denkowski@gmail.com>2013-01-28 14:21:22 -0500
committerMichael Denkowski <michael.j.denkowski@gmail.com>2013-01-28 14:21:22 -0500
commit143afb1e6210ae6105426d47d7f225cbfa695753 (patch)
tree241db8508ebf72c2767751cbc92257a680bfff3c /python/pkg/cdec/sa/features.py
parent5e7a99f9ce09a31092e194c06dd51368e18b3aed (diff)
Bilexical scores for online rules
Diffstat (limited to 'python/pkg/cdec/sa/features.py')
-rw-r--r--python/pkg/cdec/sa/features.py44
1 files changed, 36 insertions, 8 deletions
diff --git a/python/pkg/cdec/sa/features.py b/python/pkg/cdec/sa/features.py
index 49064f73..a89499d4 100644
--- a/python/pkg/cdec/sa/features.py
+++ b/python/pkg/cdec/sa/features.py
@@ -1,6 +1,8 @@
from __future__ import division
import math
+from cdec.sa import isvar
+
MAXSCORE = 99
def EgivenF(ctx): # p(e|f) = c(e, f)/c(f)
@@ -42,22 +44,48 @@ def MaxLexEgivenF(ttable):
def MaxLexEgivenF(ctx):
fwords = ctx.fphrase.words
fwords.append('NULL')
- def score():
+ if not ctx.online:
+ maxOffScore = 0.0
+ for e in ctx.ephrase.words:
+ maxScore = max(ttable.get_score(f, e, 0) for f in fwords)
+ maxOffScore += -math.log10(maxScore) if maxScore > 0 else MAXSCORE
+ return maxOffScore
+ else:
+ # For now, straight average
+ maxOffScore = 0.0
+ maxOnScore = 0.0
for e in ctx.ephrase.words:
- maxScore = max(ttable.get_score(f, e, 0) for f in fwords)
- yield -math.log10(maxScore) if maxScore > 0 else MAXSCORE
- return sum(score())
+ maxScore = max(ttable.get_score(f, e, 0) for f in fwords)
+ maxOffScore += -math.log10(maxScore) if maxScore > 0 else MAXSCORE
+ for e in ctx.ephrase:
+ if not isvar(e):
+ maxScore = max((ctx.online.bilex_fe[f][e] / ctx.online.bilex_f[f]) for f in ctx.fphrase if not isvar(f))
+ maxOnScore += -math.log10(maxScore) if maxScore > 0 else MAXSCORE
+ return (maxOffScore + maxOnScore) / 2
return MaxLexEgivenF
def MaxLexFgivenE(ttable):
def MaxLexFgivenE(ctx):
ewords = ctx.ephrase.words
ewords.append('NULL')
- def score():
+ if not ctx.online:
+ maxOffScore = 0.0
+ for f in ctx.fphrase.words:
+ maxScore = max(ttable.get_score(f, e, 1) for e in ewords)
+ maxOffScore += -math.log10(maxScore) if maxScore > 0 else MAXSCORE
+ return maxOffScore
+ else:
+ # For now, straight average
+ maxOffScore = 0.0
+ maxOnScore = 0.0
for f in ctx.fphrase.words:
- maxScore = max(ttable.get_score(f, e, 1) for e in ewords)
- yield -math.log10(maxScore) if maxScore > 0 else MAXSCORE
- return sum(score())
+ maxScore = max(ttable.get_score(f, e, 1) for e in ewords)
+ maxOffScore += -math.log10(maxScore) if maxScore > 0 else MAXSCORE
+ for f in ctx.fphrase:
+ if not isvar(f):
+ maxScore = max((ctx.online.bilex_fe[f][e] / ctx.online.bilex_e[e]) for e in ctx.ephrase if not isvar(e))
+ maxOnScore += -math.log10(maxScore) if maxScore > 0 else MAXSCORE
+ return (maxOffScore + maxOnScore) / 2
return MaxLexFgivenE
def IsSingletonF(ctx):