summaryrefslogtreecommitdiff
path: root/python
diff options
context:
space:
mode:
authormjdenkowski <michael.j.denkowski@gmail.com>2014-04-10 16:58:46 -0400
committermjdenkowski <michael.j.denkowski@gmail.com>2014-04-10 16:58:46 -0400
commit659ea32efb9ad0c1d8ad0d1dc4ead67be9859e6b (patch)
tree09c1a368701d55c87eaf4b846368ec315da33282 /python
parent1014d39fa347ec51dd2e588bae16b8692e188382 (diff)
Refactoring
Diffstat (limited to 'python')
-rw-r--r--python/cdec/sa/_sa.cpp2
-rw-r--r--python/cdec/sa/features.py21
-rw-r--r--python/cdec/sa/online.py14
3 files changed, 19 insertions, 18 deletions
diff --git a/python/cdec/sa/_sa.cpp b/python/cdec/sa/_sa.cpp
index 652261fe..bbea8c9c 100644
--- a/python/cdec/sa/_sa.cpp
+++ b/python/cdec/sa/_sa.cpp
@@ -1,4 +1,4 @@
-/* Generated by Cython 0.20.1 on Thu Apr 10 16:38:02 2014 */
+/* Generated by Cython 0.20.1 on Thu Apr 10 16:55:21 2014 */
#define PY_SSIZE_T_CLEAN
#ifndef CYTHON_USE_PYLONG_INTERNALS
diff --git a/python/cdec/sa/features.py b/python/cdec/sa/features.py
index e19a18c0..92e23889 100644
--- a/python/cdec/sa/features.py
+++ b/python/cdec/sa/features.py
@@ -3,6 +3,8 @@ import math
from cdec.sa import isvar
+from online import get_score_multilex
+
MAXSCORE = 99
def EgivenF(ctx): # p(e|f) = c(e, f)/c(f)
@@ -40,21 +42,6 @@ def CoherenceProb(ctx): # c(f) / sample c(f)
prob = (ctx.fcount + ctx.online.fcount) / (ctx.fsample_count + ctx.online.fsample_count)
return -math.log10(prob)
-# Not a feature, used for MaxLex
-# bilex get_score for multiple instances
-def get_lex_online(f, e, dir, bilex_list):
- num = 0
- denom = 0
- for bilex in bilex_list:
- if dir == 0:
- denom += bilex.f.get(f, 0)
- else:
- denom += bilex.e.get(e, 0)
- num += bilex.fe.get((f, e), 0)
- if (not num) or (not denom):
- return None
- return num / denom
-
def MaxLexEgivenF(ttable):
def MaxLexEgivenF(ctx):
fwords = ctx.fphrase.words
@@ -62,7 +49,7 @@ def MaxLexEgivenF(ttable):
maxOffScore = 0.0
for e in ctx.ephrase.words:
if ctx.online:
- maxScore = max(get_lex_online(f, e, 0, (ttable, ctx.online.bilex)) for f in fwords)
+ maxScore = max(get_score_multilex(f, e, 0, (ttable, ctx.online.bilex)) for f in fwords)
else:
maxScore = max(ttable.get_score(f, e, 0) for f in fwords)
maxOffScore += -math.log10(maxScore) if maxScore > 0 else MAXSCORE
@@ -76,7 +63,7 @@ def MaxLexFgivenE(ttable):
maxOffScore = 0.0
for f in ctx.fphrase.words:
if ctx.online:
- maxScore = max(get_lex_online(f, e, 1, (ttable, ctx.online.bilex)) for e in ewords)
+ maxScore = max(get_score_multilex(f, e, 1, (ttable, ctx.online.bilex)) for e in ewords)
else:
maxScore = max(ttable.get_score(f, e, 1) for e in ewords)
maxOffScore += -math.log10(maxScore) if maxScore > 0 else MAXSCORE
diff --git a/python/cdec/sa/online.py b/python/cdec/sa/online.py
index d3f967e8..98c3459b 100644
--- a/python/cdec/sa/online.py
+++ b/python/cdec/sa/online.py
@@ -126,3 +126,17 @@ class Bilex:
break
(f, e, c) = line.split()
self.fe[(f, e)] = float(c)
+
+# Bilex get_score for multiple instances
+def get_score_multilex(f, e, dir, bilex_list):
+ num = 0
+ denom = 0
+ for bilex in bilex_list:
+ if dir == 0:
+ denom += bilex.f.get(f, 0)
+ else:
+ denom += bilex.e.get(e, 0)
+ num += bilex.fe.get((f, e), 0)
+ if (not num) or (not denom):
+ return None
+ return num / denom