summaryrefslogtreecommitdiff
path: root/python/cdec/sa/features.py
diff options
context:
space:
mode:
authormjdenkowski <michael.j.denkowski@gmail.com>2014-04-10 16:42:50 -0400
committermjdenkowski <michael.j.denkowski@gmail.com>2014-04-10 16:42:50 -0400
commit83b450a6cd7c472783d4dcdb0239de9f297cd5b5 (patch)
tree2141c3e19cf1e0af475616fe912bf90d0f4b6f23 /python/cdec/sa/features.py
parent24f0fdb8a646f7aff501c757abe16ca8585444f8 (diff)
New feature: working implementation (online bilex)
Diffstat (limited to 'python/cdec/sa/features.py')
-rw-r--r--python/cdec/sa/features.py25
1 files changed, 23 insertions, 2 deletions
diff --git a/python/cdec/sa/features.py b/python/cdec/sa/features.py
index 1779f2f9..e19a18c0 100644
--- a/python/cdec/sa/features.py
+++ b/python/cdec/sa/features.py
@@ -40,13 +40,31 @@ def CoherenceProb(ctx): # c(f) / sample c(f)
prob = (ctx.fcount + ctx.online.fcount) / (ctx.fsample_count + ctx.online.fsample_count)
return -math.log10(prob)
+# Not a feature, used for MaxLex
+# bilex get_score for multiple instances
+def get_lex_online(f, e, dir, bilex_list):
+ num = 0
+ denom = 0
+ for bilex in bilex_list:
+ if dir == 0:
+ denom += bilex.f.get(f, 0)
+ else:
+ denom += bilex.e.get(e, 0)
+ num += bilex.fe.get((f, e), 0)
+ if (not num) or (not denom):
+ return None
+ return num / denom
+
def MaxLexEgivenF(ttable):
def MaxLexEgivenF(ctx):
fwords = ctx.fphrase.words
fwords.append('NULL')
maxOffScore = 0.0
for e in ctx.ephrase.words:
- maxScore = max(ttable.get_score(f, e, 0) for f in fwords)
+ if ctx.online:
+ maxScore = max(get_lex_online(f, e, 0, (ttable, ctx.online.bilex)) for f in fwords)
+ else:
+ maxScore = max(ttable.get_score(f, e, 0) for f in fwords)
maxOffScore += -math.log10(maxScore) if maxScore > 0 else MAXSCORE
return maxOffScore
return MaxLexEgivenF
@@ -57,7 +75,10 @@ def MaxLexFgivenE(ttable):
ewords.append('NULL')
maxOffScore = 0.0
for f in ctx.fphrase.words:
- maxScore = max(ttable.get_score(f, e, 1) for e in ewords)
+ if ctx.online:
+ maxScore = max(get_lex_online(f, e, 1, (ttable, ctx.online.bilex)) for e in ewords)
+ else:
+ maxScore = max(ttable.get_score(f, e, 1) for e in ewords)
maxOffScore += -math.log10(maxScore) if maxScore > 0 else MAXSCORE
return maxOffScore
return MaxLexFgivenE