From abcd6865a25944a1cd07c9224db2fd7a729f02e6 Mon Sep 17 00:00:00 2001 From: Michael Denkowski Date: Fri, 7 Mar 2014 01:52:09 -0800 Subject: More online bilex updates --- python/cdec/sa/features.py | 63 ++++++++-------------------------------------- 1 file changed, 10 insertions(+), 53 deletions(-) (limited to 'python/cdec/sa/features.py') diff --git a/python/cdec/sa/features.py b/python/cdec/sa/features.py index 3e6af859..1779f2f9 100644 --- a/python/cdec/sa/features.py +++ b/python/cdec/sa/features.py @@ -44,60 +44,22 @@ def MaxLexEgivenF(ttable): def MaxLexEgivenF(ctx): fwords = ctx.fphrase.words fwords.append('NULL') - # Always use this for now - if not ctx.online or ctx.online: - maxOffScore = 0.0 - for e in ctx.ephrase.words: - maxScore = max(ttable.get_score(f, e, 0) for f in fwords) - maxOffScore += -math.log10(maxScore) if maxScore > 0 else MAXSCORE - return maxOffScore - else: - # For now, straight average - maxOffScore = 0.0 - maxOnScore = 0.0 - for e in ctx.ephrase.words: - maxScore = max(ttable.get_score(f, e, 0) for f in fwords) - maxOffScore += -math.log10(maxScore) if maxScore > 0 else MAXSCORE - for e in ctx.ephrase: - if not isvar(e): - maxScore = 0.0 - for f in ctx.fphrase: - if not isvar(f): - b_f = ctx.online.bilex_f.get(f, 0) - if b_f: - maxScore = max(maxScore, ctx.online.bilex_fe.get(f, {}).get(e)) - maxOnScore += -math.log10(maxScore) if maxScore > 0 else MAXSCORE - return (maxOffScore + maxOnScore) / 2 + maxOffScore = 0.0 + for e in ctx.ephrase.words: + maxScore = max(ttable.get_score(f, e, 0) for f in fwords) + maxOffScore += -math.log10(maxScore) if maxScore > 0 else MAXSCORE + return maxOffScore return MaxLexEgivenF def MaxLexFgivenE(ttable): def MaxLexFgivenE(ctx): ewords = ctx.ephrase.words ewords.append('NULL') - # Always use this for now - if not ctx.online or ctx.online: - maxOffScore = 0.0 - for f in ctx.fphrase.words: - maxScore = max(ttable.get_score(f, e, 1) for e in ewords) - maxOffScore += -math.log10(maxScore) if maxScore > 0 else MAXSCORE - return maxOffScore - else: - # For now, straight average - maxOffScore = 0.0 - maxOnScore = 0.0 - for f in ctx.fphrase.words: - maxScore = max(ttable.get_score(f, e, 1) for e in ewords) - maxOffScore += -math.log10(maxScore) if maxScore > 0 else MAXSCORE - for f in ctx.fphrase: - if not isvar(f): - maxScore = 0.0 - for e in ctx.ephrase: - if not isvar(e): - b_e = ctx.online.bilex_e.get(e, 0) - if b_e: - maxScore = max(maxScore, ctx.online.bilex_fe.get(f, {}).get(e, 0) / b_e ) - maxOnScore += -math.log10(maxScore) if maxScore > 0 else MAXSCORE - return (maxOffScore + maxOnScore) / 2 + maxOffScore = 0.0 + for f in ctx.fphrase.words: + maxScore = max(ttable.get_score(f, e, 1) for e in ewords) + maxOffScore += -math.log10(maxScore) if maxScore > 0 else MAXSCORE + return maxOffScore return MaxLexFgivenE def IsSingletonF(ctx): @@ -140,8 +102,3 @@ def IsSupportedOnline(ctx): # Occurs in online data? return (ctx.online.paircount > 0.01) else: return False - -def CountExceptLM(vocab): - def CountExceptLM(ctx): # Word count in bitext (inc online data) but NOT mono text - return sum(1 for e in ctx.ephrase.words if e not in vocab) - return CountExceptLM -- cgit v1.2.3