summaryrefslogtreecommitdiff
path: root/python/pkg/cdec/sa/features.py
diff options
context:
space:
mode:
authorPatrick Simianer <simianer@cl.uni-heidelberg.de>2012-08-01 17:32:37 +0200
committerPatrick Simianer <simianer@cl.uni-heidelberg.de>2012-08-01 17:32:37 +0200
commit3f8e33cfe481a09c121a410e66a6074b5d05683e (patch)
treea41ecaf0bbb69fa91a581623abe89d41219c04f8 /python/pkg/cdec/sa/features.py
parentc139ce495861bb341e1b86a85ad4559f9ad53c14 (diff)
parent9fe0219562e5db25171cce8776381600ff9a5649 (diff)
Merge remote-tracking branch 'upstream/master'
Diffstat (limited to 'python/pkg/cdec/sa/features.py')
-rw-r--r--python/pkg/cdec/sa/features.py57
1 files changed, 57 insertions, 0 deletions
diff --git a/python/pkg/cdec/sa/features.py b/python/pkg/cdec/sa/features.py
new file mode 100644
index 00000000..325b9e13
--- /dev/null
+++ b/python/pkg/cdec/sa/features.py
@@ -0,0 +1,57 @@
+from __future__ import division
+import math
+
+MAXSCORE = 99
+
+def EgivenF(fphrase, ephrase, paircount, fcount, fsample_count): # p(e|f)
+ return -math.log10(paircount/fcount)
+
+def CountEF(fphrase, ephrase, paircount, fcount, fsample_count):
+ return math.log10(1 + paircount)
+
+def SampleCountF(fphrase, ephrase, paircount, fcount, fsample_count):
+ return math.log10(1 + fsample_count)
+
+def EgivenFCoherent(fphrase, ephrase, paircount, fcount, fsample_count):
+ prob = paircount/fsample_count
+ return -math.log10(prob) if prob > 0 else MAXSCORE
+
+def CoherenceProb(fphrase, ephrase, paircount, fcount, fsample_count):
+ return -math.log10(fcount/fsample_count)
+
+def MaxLexEgivenF(ttable):
+ def feature(fphrase, ephrase, paircount, fcount, fsample_count):
+ fwords = fphrase.words
+ fwords.append('NULL')
+ def score():
+ for e in ephrase.words:
+ maxScore = max(ttable.get_score(f, e, 0) for f in fwords)
+ yield -math.log10(maxScore) if maxScore > 0 else MAXSCORE
+ return sum(score())
+ return feature
+
+def MaxLexFgivenE(ttable):
+ def feature(fphrase, ephrase, paircount, fcount, fsample_count):
+ ewords = ephrase.words
+ ewords.append('NULL')
+ def score():
+ for f in fphrase.words:
+ maxScore = max(ttable.get_score(f, e, 1) for e in ewords)
+ yield -math.log10(maxScore) if maxScore > 0 else MAXSCORE
+ return sum(score())
+ return feature
+
+def IsSingletonF(fphrase, ephrase, paircount, fcount, fsample_count):
+ return (fcount == 1)
+
+def IsSingletonFE(fphrase, ephrase, paircount, fcount, fsample_count):
+ return (paircount == 1)
+
+def IsNotSingletonF(fphrase, ephrase, paircount, fcount, fsample_count):
+ return (fcount > 1)
+
+def IsNotSingletonFE(fphrase, ephrase, paircount, fcount, fsample_count):
+ return (paircount > 1)
+
+def IsFEGreaterThanZero(fphrase, ephrase, paircount, fcount, fsample_count):
+ return (paircount > 0.01)