diff options
author | Michael Denkowski <mdenkows@cs.cmu.edu> | 2014-03-07 01:52:09 -0800 |
---|---|---|
committer | Michael Denkowski <mdenkows@cs.cmu.edu> | 2014-03-07 01:52:09 -0800 |
commit | abcd6865a25944a1cd07c9224db2fd7a729f02e6 (patch) | |
tree | 5fd9e5298bd55f91a497c5bd46c35a3414aa5c5c /python/cdec/sa/features.py | |
parent | a49f3a5b19547e7e46a652b22fab601da8fc210f (diff) |
More online bilex updates
Diffstat (limited to 'python/cdec/sa/features.py')
-rw-r--r-- | python/cdec/sa/features.py | 63 |
1 files changed, 10 insertions, 53 deletions
diff --git a/python/cdec/sa/features.py b/python/cdec/sa/features.py index 3e6af859..1779f2f9 100644 --- a/python/cdec/sa/features.py +++ b/python/cdec/sa/features.py @@ -44,60 +44,22 @@ def MaxLexEgivenF(ttable): def MaxLexEgivenF(ctx): fwords = ctx.fphrase.words fwords.append('NULL') - # Always use this for now - if not ctx.online or ctx.online: - maxOffScore = 0.0 - for e in ctx.ephrase.words: - maxScore = max(ttable.get_score(f, e, 0) for f in fwords) - maxOffScore += -math.log10(maxScore) if maxScore > 0 else MAXSCORE - return maxOffScore - else: - # For now, straight average - maxOffScore = 0.0 - maxOnScore = 0.0 - for e in ctx.ephrase.words: - maxScore = max(ttable.get_score(f, e, 0) for f in fwords) - maxOffScore += -math.log10(maxScore) if maxScore > 0 else MAXSCORE - for e in ctx.ephrase: - if not isvar(e): - maxScore = 0.0 - for f in ctx.fphrase: - if not isvar(f): - b_f = ctx.online.bilex_f.get(f, 0) - if b_f: - maxScore = max(maxScore, ctx.online.bilex_fe.get(f, {}).get(e)) - maxOnScore += -math.log10(maxScore) if maxScore > 0 else MAXSCORE - return (maxOffScore + maxOnScore) / 2 + maxOffScore = 0.0 + for e in ctx.ephrase.words: + maxScore = max(ttable.get_score(f, e, 0) for f in fwords) + maxOffScore += -math.log10(maxScore) if maxScore > 0 else MAXSCORE + return maxOffScore return MaxLexEgivenF def MaxLexFgivenE(ttable): def MaxLexFgivenE(ctx): ewords = ctx.ephrase.words ewords.append('NULL') - # Always use this for now - if not ctx.online or ctx.online: - maxOffScore = 0.0 - for f in ctx.fphrase.words: - maxScore = max(ttable.get_score(f, e, 1) for e in ewords) - maxOffScore += -math.log10(maxScore) if maxScore > 0 else MAXSCORE - return maxOffScore - else: - # For now, straight average - maxOffScore = 0.0 - maxOnScore = 0.0 - for f in ctx.fphrase.words: - maxScore = max(ttable.get_score(f, e, 1) for e in ewords) - maxOffScore += -math.log10(maxScore) if maxScore > 0 else MAXSCORE - for f in ctx.fphrase: - if not isvar(f): - maxScore = 0.0 - for e in ctx.ephrase: - if not isvar(e): - b_e = ctx.online.bilex_e.get(e, 0) - if b_e: - maxScore = max(maxScore, ctx.online.bilex_fe.get(f, {}).get(e, 0) / b_e ) - maxOnScore += -math.log10(maxScore) if maxScore > 0 else MAXSCORE - return (maxOffScore + maxOnScore) / 2 + maxOffScore = 0.0 + for f in ctx.fphrase.words: + maxScore = max(ttable.get_score(f, e, 1) for e in ewords) + maxOffScore += -math.log10(maxScore) if maxScore > 0 else MAXSCORE + return maxOffScore return MaxLexFgivenE def IsSingletonF(ctx): @@ -140,8 +102,3 @@ def IsSupportedOnline(ctx): # Occurs in online data? return (ctx.online.paircount > 0.01) else: return False - -def CountExceptLM(vocab): - def CountExceptLM(ctx): # Word count in bitext (inc online data) but NOT mono text - return sum(1 for e in ctx.ephrase.words if e not in vocab) - return CountExceptLM |