More online bilex updates

author: Michael Denkowski <mdenkows@cs.cmu.edu> 2014-03-07 01:52:09 -0800
committer: Michael Denkowski <mdenkows@cs.cmu.edu> 2014-03-07 01:52:09 -0800
commit: abcd6865a25944a1cd07c9224db2fd7a729f02e6 (patch)
tree: 5fd9e5298bd55f91a497c5bd46c35a3414aa5c5c /python/cdec/sa/features.py
parent: a49f3a5b19547e7e46a652b22fab601da8fc210f (diff)
1 files changed, 10 insertions, 53 deletions
diff --git a/python/cdec/sa/features.py b/python/cdec/sa/features.py
index 3e6af859..1779f2f9 100644
--- a/python/cdec/sa/features.py
+++ b/python/cdec/sa/features.py
@@ -44,60 +44,22 @@ def MaxLexEgivenF(ttable):
     def MaxLexEgivenF(ctx):
         fwords = ctx.fphrase.words
         fwords.append('NULL')
-        # Always use this for now
-        if not ctx.online or ctx.online:
-            maxOffScore = 0.0
-            for e in ctx.ephrase.words:
-                maxScore = max(ttable.get_score(f, e, 0) for f in fwords)
-                maxOffScore += -math.log10(maxScore) if maxScore > 0 else MAXSCORE
-            return maxOffScore
-        else:
-            # For now, straight average
-            maxOffScore = 0.0
-            maxOnScore = 0.0
-            for e in ctx.ephrase.words:
-                maxScore = max(ttable.get_score(f, e, 0) for f in fwords)
-                maxOffScore += -math.log10(maxScore) if maxScore > 0 else MAXSCORE
-            for e in ctx.ephrase:
-                if not isvar(e):
-                    maxScore = 0.0
-                    for f in ctx.fphrase:
-                        if not isvar(f):
-                            b_f = ctx.online.bilex_f.get(f, 0)
-                            if b_f:
-                                maxScore = max(maxScore, ctx.online.bilex_fe.get(f, {}).get(e))
-                    maxOnScore += -math.log10(maxScore) if maxScore > 0 else MAXSCORE
-            return (maxOffScore + maxOnScore) / 2
+        maxOffScore = 0.0
+        for e in ctx.ephrase.words:
+            maxScore = max(ttable.get_score(f, e, 0) for f in fwords)
+            maxOffScore += -math.log10(maxScore) if maxScore > 0 else MAXSCORE
+        return maxOffScore
     return MaxLexEgivenF
 
 def MaxLexFgivenE(ttable):
     def MaxLexFgivenE(ctx):
         ewords = ctx.ephrase.words
         ewords.append('NULL')
-        # Always use this for now
-        if not ctx.online or ctx.online:
-            maxOffScore = 0.0
-            for f in ctx.fphrase.words:
-                maxScore = max(ttable.get_score(f, e, 1) for e in ewords)
-                maxOffScore += -math.log10(maxScore) if maxScore > 0 else MAXSCORE
-            return maxOffScore
-        else:
-            # For now, straight average
-            maxOffScore = 0.0
-            maxOnScore = 0.0
-            for f in ctx.fphrase.words:
-                maxScore = max(ttable.get_score(f, e, 1) for e in ewords)
-                maxOffScore += -math.log10(maxScore) if maxScore > 0 else MAXSCORE
-            for f in ctx.fphrase:
-                if not isvar(f):
-                    maxScore = 0.0
-                    for e in ctx.ephrase:
-                        if not isvar(e):
-                            b_e = ctx.online.bilex_e.get(e, 0)
-                            if b_e:
-                                maxScore = max(maxScore, ctx.online.bilex_fe.get(f, {}).get(e, 0) / b_e )
-                    maxOnScore += -math.log10(maxScore) if maxScore > 0 else MAXSCORE
-            return (maxOffScore + maxOnScore) / 2
+        maxOffScore = 0.0
+        for f in ctx.fphrase.words:
+            maxScore = max(ttable.get_score(f, e, 1) for e in ewords)
+            maxOffScore += -math.log10(maxScore) if maxScore > 0 else MAXSCORE
+        return maxOffScore
     return MaxLexFgivenE
 
 def IsSingletonF(ctx):
@@ -140,8 +102,3 @@ def IsSupportedOnline(ctx): # Occurs in online data?
         return (ctx.online.paircount > 0.01)
     else:
         return False
-
-def CountExceptLM(vocab):
-    def CountExceptLM(ctx): # Word count in bitext (inc online data) but NOT mono text
-        return sum(1 for e in ctx.ephrase.words if e not in vocab)
-    return CountExceptLM
author	Michael Denkowski <mdenkows@cs.cmu.edu>	2014-03-07 01:52:09 -0800
committer	Michael Denkowski <mdenkows@cs.cmu.edu>	2014-03-07 01:52:09 -0800
commit	abcd6865a25944a1cd07c9224db2fd7a729f02e6 (patch)
tree	5fd9e5298bd55f91a497c5bd46c35a3414aa5c5c /python/cdec/sa/features.py
parent	a49f3a5b19547e7e46a652b22fab601da8fc210f (diff)