diff options
Diffstat (limited to 'python')
| -rw-r--r-- | python/cdec/sa/_sa.cpp | 2 | ||||
| -rw-r--r-- | python/cdec/sa/features.py | 21 | ||||
| -rw-r--r-- | python/cdec/sa/online.py | 14 | 
3 files changed, 19 insertions, 18 deletions
| diff --git a/python/cdec/sa/_sa.cpp b/python/cdec/sa/_sa.cpp index 652261fe..bbea8c9c 100644 --- a/python/cdec/sa/_sa.cpp +++ b/python/cdec/sa/_sa.cpp @@ -1,4 +1,4 @@ -/* Generated by Cython 0.20.1 on Thu Apr 10 16:38:02 2014 */ +/* Generated by Cython 0.20.1 on Thu Apr 10 16:55:21 2014 */  #define PY_SSIZE_T_CLEAN  #ifndef CYTHON_USE_PYLONG_INTERNALS diff --git a/python/cdec/sa/features.py b/python/cdec/sa/features.py index e19a18c0..92e23889 100644 --- a/python/cdec/sa/features.py +++ b/python/cdec/sa/features.py @@ -3,6 +3,8 @@ import math  from cdec.sa import isvar +from online import get_score_multilex +  MAXSCORE = 99  def EgivenF(ctx): # p(e|f) = c(e, f)/c(f) @@ -40,21 +42,6 @@ def CoherenceProb(ctx): # c(f) / sample c(f)          prob = (ctx.fcount + ctx.online.fcount) / (ctx.fsample_count + ctx.online.fsample_count)      return -math.log10(prob) -# Not a feature, used for MaxLex -# bilex get_score for multiple instances -def get_lex_online(f, e, dir, bilex_list): -    num = 0 -    denom = 0 -    for bilex in bilex_list: -        if dir == 0: -            denom += bilex.f.get(f, 0) -        else: -            denom += bilex.e.get(e, 0) -        num += bilex.fe.get((f, e), 0) -    if (not num) or (not denom): -        return None -    return num / denom -  def MaxLexEgivenF(ttable):      def MaxLexEgivenF(ctx):          fwords = ctx.fphrase.words @@ -62,7 +49,7 @@ def MaxLexEgivenF(ttable):          maxOffScore = 0.0          for e in ctx.ephrase.words:              if ctx.online: -                maxScore = max(get_lex_online(f, e, 0, (ttable, ctx.online.bilex)) for f in fwords) +                maxScore = max(get_score_multilex(f, e, 0, (ttable, ctx.online.bilex)) for f in fwords)              else:                  maxScore = max(ttable.get_score(f, e, 0) for f in fwords)              maxOffScore += -math.log10(maxScore) if maxScore > 0 else MAXSCORE @@ -76,7 +63,7 @@ def MaxLexFgivenE(ttable):          maxOffScore = 0.0          for f in ctx.fphrase.words:              if ctx.online: -                maxScore = max(get_lex_online(f, e, 1, (ttable, ctx.online.bilex)) for e in ewords) +                maxScore = max(get_score_multilex(f, e, 1, (ttable, ctx.online.bilex)) for e in ewords)              else:                  maxScore = max(ttable.get_score(f, e, 1) for e in ewords)              maxOffScore += -math.log10(maxScore) if maxScore > 0 else MAXSCORE diff --git a/python/cdec/sa/online.py b/python/cdec/sa/online.py index d3f967e8..98c3459b 100644 --- a/python/cdec/sa/online.py +++ b/python/cdec/sa/online.py @@ -126,3 +126,17 @@ class Bilex:                      break                  (f, e, c) = line.split()                  self.fe[(f, e)] = float(c) + +# Bilex get_score for multiple instances +def get_score_multilex(f, e, dir, bilex_list): +    num = 0 +    denom = 0 +    for bilex in bilex_list: +        if dir == 0: +            denom += bilex.f.get(f, 0) +        else: +            denom += bilex.e.get(e, 0) +        num += bilex.fe.get((f, e), 0) +    if (not num) or (not denom): +        return None +    return num / denom | 
