fixed conflicts

author: Avneesh Saluja <asaluja@gmail.com> 2013-03-28 18:28:16 -0700
committer: Avneesh Saluja <asaluja@gmail.com> 2013-03-28 18:28:16 -0700
commit: 3d8d656fa7911524e0e6885647173474524e0784 (patch)
tree: 81b1ee2fcb67980376d03f0aa48e42e53abff222 /python/pkg/cdec/sa/features.py
parent: be7f57fdd484e063775d7abf083b9fa4c403b610 (diff)
parent: 96fedabebafe7a38a6d5928be8fff767e411d705 (diff)
1 files changed, 101 insertions, 16 deletions
diff --git a/python/pkg/cdec/sa/features.py b/python/pkg/cdec/sa/features.py
index a4ae23e8..46412cd5 100644
--- a/python/pkg/cdec/sa/features.py
+++ b/python/pkg/cdec/sa/features.py
@@ -1,57 +1,142 @@
 from __future__ import division
 import math
 
+from cdec.sa import isvar
+
 MAXSCORE = 99
 
 def EgivenF(ctx): # p(e|f) = c(e, f)/c(f)
-    return -math.log10(ctx.paircount/ctx.fcount)
+    if not ctx.online:
+        prob = ctx.paircount/ctx.fcount
+    else:
+        prob = (ctx.paircount + ctx.online.paircount) / (ctx.fcount + ctx.online.fcount)
+    return -math.log10(prob)
 
 def CountEF(ctx): # c(e, f)
-    return math.log10(1 + ctx.paircount)
+    if not ctx.online:
+        count = 1 + ctx.paircount
+    else:
+        count = 1 + ctx.paircount + ctx.online.paircount
+    return math.log10(count)
 
 def SampleCountF(ctx): # sample c(f)
-    return math.log10(1 + ctx.fsample_count)
+    if not ctx.online:
+        count = 1 + ctx.fsample_count
+    else:
+        count = 1 + ctx.fsample_count + ctx.online.fsample_count
+    return math.log10(count)
 
 def EgivenFCoherent(ctx): # c(e, f) / sample c(f)
-    prob = ctx.paircount/ctx.fsample_count
+    if not ctx.online:
+        prob = ctx.paircount/ctx.fsample_count
+    else:
+        prob = (ctx.paircount + ctx.online.paircount) / (ctx.fsample_count + ctx.online.fsample_count)
     return -math.log10(prob) if prob > 0 else MAXSCORE
 
 def CoherenceProb(ctx): # c(f) / sample c(f)
-    return -math.log10(ctx.fcount/ctx.fsample_count)
+    if not ctx.online:
+        prob = ctx.fcount/ctx.fsample_count
+    else:
+        prob = (ctx.fcount + ctx.online.fcount) / (ctx.fsample_count + ctx.online.fsample_count)
+    return -math.log10(prob)
 
 def MaxLexEgivenF(ttable):
     def MaxLexEgivenF(ctx):
         fwords = ctx.fphrase.words
         fwords.append('NULL')
-        def score():
+        # Always use this for now
+        if not ctx.online or ctx.online:
+            maxOffScore = 0.0
+            for e in ctx.ephrase.words:
+                maxScore = max(ttable.get_score(f, e, 0) for f in fwords)
+                maxOffScore += -math.log10(maxScore) if maxScore > 0 else MAXSCORE
+            return maxOffScore
+        else:
+            # For now, straight average
+            maxOffScore = 0.0
+            maxOnScore = 0.0
             for e in ctx.ephrase.words:
-              maxScore = max(ttable.get_score(f, e, 0) for f in fwords)
-              yield -math.log10(maxScore) if maxScore > 0 else MAXSCORE
-        return sum(score())
+                maxScore = max(ttable.get_score(f, e, 0) for f in fwords)
+                maxOffScore += -math.log10(maxScore) if maxScore > 0 else MAXSCORE
+            for e in ctx.ephrase:
+                if not isvar(e):
+                    maxScore = 0.0
+                    for f in ctx.fphrase:
+                        if not isvar(f):
+                            b_f = ctx.online.bilex_f.get(f, 0)
+                            if b_f:
+                                maxScore = max(maxScore, ctx.online.bilex_fe.get(f, {}).get(e))
+                    maxOnScore += -math.log10(maxScore) if maxScore > 0 else MAXSCORE
+            return (maxOffScore + maxOnScore) / 2
     return MaxLexEgivenF
 
 def MaxLexFgivenE(ttable):
     def MaxLexFgivenE(ctx):
         ewords = ctx.ephrase.words
         ewords.append('NULL')
-        def score():
+        # Always use this for now
+        if not ctx.online or ctx.online:
+            maxOffScore = 0.0
             for f in ctx.fphrase.words:
-              maxScore = max(ttable.get_score(f, e, 1) for e in ewords)
-              yield -math.log10(maxScore) if maxScore > 0 else MAXSCORE
-        return sum(score())
+                maxScore = max(ttable.get_score(f, e, 1) for e in ewords)
+                maxOffScore += -math.log10(maxScore) if maxScore > 0 else MAXSCORE
+            return maxOffScore
+        else:
+            # For now, straight average
+            maxOffScore = 0.0
+            maxOnScore = 0.0
+            for f in ctx.fphrase.words:
+                maxScore = max(ttable.get_score(f, e, 1) for e in ewords)
+                maxOffScore += -math.log10(maxScore) if maxScore > 0 else MAXSCORE
+            for f in ctx.fphrase:
+                if not isvar(f):
+                    maxScore = 0.0
+                    for e in ctx.ephrase:
+                        if not isvar(e):
+                            b_e = ctx.online.bilex_e.get(e, 0)
+                            if b_e:
+                                maxScore = max(maxScore, ctx.online.bilex_fe.get(f, {}).get(e, 0) / b_e )
+                    maxOnScore += -math.log10(maxScore) if maxScore > 0 else MAXSCORE
+            return (maxOffScore + maxOnScore) / 2
     return MaxLexFgivenE
 
 def IsSingletonF(ctx):
-    return (ctx.fcount == 1)
+    if not ctx.online:
+        count = ctx.fcount
+    else:
+        count = ctx.fcount + ctx.online.fcount  
+    return (count == 1)
 
 def IsSingletonFE(ctx):
-    return (ctx.paircount == 1)
+    if not ctx.online:
+        count = ctx.paircount
+    else:
+        count = ctx.paircount + ctx.online.paircount
+    return (count == 1)
 
 def IsNotSingletonF(ctx):
-    return (ctx.fcount > 1)
+    if not ctx.online:
+        count = ctx.fcount
+    else:
+        count = ctx.fcount + ctx.online.fcount
+    return (count > 1)
 
 def IsNotSingletonFE(ctx):
+    if not ctx.online:
+        count = ctx.paircount
+    else:
+        count = ctx.paircount + ctx.online.paircount
     return (ctx.paircount > 1)
 
 def IsFEGreaterThanZero(ctx):
+    if not ctx.online:
+        count = ctx.paircount
+    else:
+        count = ctx.paircount + ctx.online.paircount
     return (ctx.paircount > 0.01)
+
+def IsSupportedOnline(ctx): # Occurs in online data?
+    if ctx.online:
+        return (ctx.online.paircount > 0.01)
+    else:
+        return False
+\ No newline at end of file
author	Avneesh Saluja <asaluja@gmail.com>	2013-03-28 18:28:16 -0700
committer	Avneesh Saluja <asaluja@gmail.com>	2013-03-28 18:28:16 -0700
commit	3d8d656fa7911524e0e6885647173474524e0784 (patch)
tree	81b1ee2fcb67980376d03f0aa48e42e53abff222 /python/pkg/cdec/sa/features.py
parent	be7f57fdd484e063775d7abf083b9fa4c403b610 (diff)
parent	96fedabebafe7a38a6d5928be8fff767e411d705 (diff)