Merge remote-tracking branch 'upstream/master'

author: Patrick Simianer <p@simianer.de> 2014-03-16 17:48:48 +0100
committer: Patrick Simianer <p@simianer.de> 2014-03-16 17:48:48 +0100
commit: 5250fd67a4b8f242068cff87f0a6a4211f8b0fcf (patch)
tree: f1401c1fd3eeae8671e59baf0d2169d1eb721cb7 /python/cdec/sa/features.py
parent: 3eedf96b5a08b3e3414888d328c505814b84d8db (diff)
parent: cc87bfed0697583b7c11243913254dde3c0047d4 (diff)
1 files changed, 10 insertions, 59 deletions
diff --git a/python/cdec/sa/features.py b/python/cdec/sa/features.py
index dcc60401..1779f2f9 100644
--- a/python/cdec/sa/features.py
+++ b/python/cdec/sa/features.py
@@ -44,60 +44,22 @@ def MaxLexEgivenF(ttable):
     def MaxLexEgivenF(ctx):
         fwords = ctx.fphrase.words
         fwords.append('NULL')
-        # Always use this for now
-        if not ctx.online or ctx.online:
-            maxOffScore = 0.0
-            for e in ctx.ephrase.words:
-                maxScore = max(ttable.get_score(f, e, 0) for f in fwords)
-                maxOffScore += -math.log10(maxScore) if maxScore > 0 else MAXSCORE
-            return maxOffScore
-        else:
-            # For now, straight average
-            maxOffScore = 0.0
-            maxOnScore = 0.0
-            for e in ctx.ephrase.words:
-                maxScore = max(ttable.get_score(f, e, 0) for f in fwords)
-                maxOffScore += -math.log10(maxScore) if maxScore > 0 else MAXSCORE
-            for e in ctx.ephrase:
-                if not isvar(e):
-                    maxScore = 0.0
-                    for f in ctx.fphrase:
-                        if not isvar(f):
-                            b_f = ctx.online.bilex_f.get(f, 0)
-                            if b_f:
-                                maxScore = max(maxScore, ctx.online.bilex_fe.get(f, {}).get(e))
-                    maxOnScore += -math.log10(maxScore) if maxScore > 0 else MAXSCORE
-            return (maxOffScore + maxOnScore) / 2
+        maxOffScore = 0.0
+        for e in ctx.ephrase.words:
+            maxScore = max(ttable.get_score(f, e, 0) for f in fwords)
+            maxOffScore += -math.log10(maxScore) if maxScore > 0 else MAXSCORE
+        return maxOffScore
     return MaxLexEgivenF
 
 def MaxLexFgivenE(ttable):
     def MaxLexFgivenE(ctx):
         ewords = ctx.ephrase.words
         ewords.append('NULL')
-        # Always use this for now
-        if not ctx.online or ctx.online:
-            maxOffScore = 0.0
-            for f in ctx.fphrase.words:
-                maxScore = max(ttable.get_score(f, e, 1) for e in ewords)
-                maxOffScore += -math.log10(maxScore) if maxScore > 0 else MAXSCORE
-            return maxOffScore
-        else:
-            # For now, straight average
-            maxOffScore = 0.0
-            maxOnScore = 0.0
-            for f in ctx.fphrase.words:
-                maxScore = max(ttable.get_score(f, e, 1) for e in ewords)
-                maxOffScore += -math.log10(maxScore) if maxScore > 0 else MAXSCORE
-            for f in ctx.fphrase:
-                if not isvar(f):
-                    maxScore = 0.0
-                    for e in ctx.ephrase:
-                        if not isvar(e):
-                            b_e = ctx.online.bilex_e.get(e, 0)
-                            if b_e:
-                                maxScore = max(maxScore, ctx.online.bilex_fe.get(f, {}).get(e, 0) / b_e )
-                    maxOnScore += -math.log10(maxScore) if maxScore > 0 else MAXSCORE
-            return (maxOffScore + maxOnScore) / 2
+        maxOffScore = 0.0
+        for f in ctx.fphrase.words:
+            maxScore = max(ttable.get_score(f, e, 1) for e in ewords)
+            maxOffScore += -math.log10(maxScore) if maxScore > 0 else MAXSCORE
+        return maxOffScore
     return MaxLexFgivenE
 
 def IsSingletonF(ctx):
@@ -140,14 +102,3 @@ def IsSupportedOnline(ctx): # Occurs in online data?
         return (ctx.online.paircount > 0.01)
     else:
         return False
-
-def CountExceptLM(vocab):
-    def CountExceptLM(ctx): # Word count in bitext (inc online data) but NOT mono text
-        return sum(1 for e in ctx.ephrase.words if e not in vocab)
-    return CountExceptLM
-
-def CountExceptLex(ttable):
-    def CountExceptLex(ctx): # Word count in online data but NOT aligned in original bitext
-        # TODO: Check that online data actually contains aligned word when rulefactory TODO is addressed.
-        return sum(1 for e in ctx.ephrase.words if not ttable.contains_e_word(e))
-    return CountExceptLex
author	Patrick Simianer <p@simianer.de>	2014-03-16 17:48:48 +0100
committer	Patrick Simianer <p@simianer.de>	2014-03-16 17:48:48 +0100
commit	5250fd67a4b8f242068cff87f0a6a4211f8b0fcf (patch)
tree	f1401c1fd3eeae8671e59baf0d2169d1eb721cb7 /python/cdec/sa/features.py
parent	3eedf96b5a08b3e3414888d328c505814b84d8db (diff)
parent	cc87bfed0697583b7c11243913254dde3c0047d4 (diff)