From 1014d39fa347ec51dd2e588bae16b8692e188382 Mon Sep 17 00:00:00 2001 From: mjdenkowski Date: Thu, 10 Apr 2014 16:42:50 -0400 Subject: New feature: working implementation (online bilex) --- python/cdec/sa/features.py | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) (limited to 'python/cdec/sa/features.py') diff --git a/python/cdec/sa/features.py b/python/cdec/sa/features.py index 1779f2f9..e19a18c0 100644 --- a/python/cdec/sa/features.py +++ b/python/cdec/sa/features.py @@ -40,13 +40,31 @@ def CoherenceProb(ctx): # c(f) / sample c(f) prob = (ctx.fcount + ctx.online.fcount) / (ctx.fsample_count + ctx.online.fsample_count) return -math.log10(prob) +# Not a feature, used for MaxLex +# bilex get_score for multiple instances +def get_lex_online(f, e, dir, bilex_list): + num = 0 + denom = 0 + for bilex in bilex_list: + if dir == 0: + denom += bilex.f.get(f, 0) + else: + denom += bilex.e.get(e, 0) + num += bilex.fe.get((f, e), 0) + if (not num) or (not denom): + return None + return num / denom + def MaxLexEgivenF(ttable): def MaxLexEgivenF(ctx): fwords = ctx.fphrase.words fwords.append('NULL') maxOffScore = 0.0 for e in ctx.ephrase.words: - maxScore = max(ttable.get_score(f, e, 0) for f in fwords) + if ctx.online: + maxScore = max(get_lex_online(f, e, 0, (ttable, ctx.online.bilex)) for f in fwords) + else: + maxScore = max(ttable.get_score(f, e, 0) for f in fwords) maxOffScore += -math.log10(maxScore) if maxScore > 0 else MAXSCORE return maxOffScore return MaxLexEgivenF @@ -57,7 +75,10 @@ def MaxLexFgivenE(ttable): ewords.append('NULL') maxOffScore = 0.0 for f in ctx.fphrase.words: - maxScore = max(ttable.get_score(f, e, 1) for e in ewords) + if ctx.online: + maxScore = max(get_lex_online(f, e, 1, (ttable, ctx.online.bilex)) for e in ewords) + else: + maxScore = max(ttable.get_score(f, e, 1) for e in ewords) maxOffScore += -math.log10(maxScore) if maxScore > 0 else MAXSCORE return maxOffScore return MaxLexFgivenE -- cgit v1.2.3 From 659ea32efb9ad0c1d8ad0d1dc4ead67be9859e6b Mon Sep 17 00:00:00 2001 From: mjdenkowski Date: Thu, 10 Apr 2014 16:58:46 -0400 Subject: Refactoring --- python/cdec/sa/_sa.cpp | 2 +- python/cdec/sa/features.py | 21 ++++----------------- python/cdec/sa/online.py | 14 ++++++++++++++ 3 files changed, 19 insertions(+), 18 deletions(-) (limited to 'python/cdec/sa/features.py') diff --git a/python/cdec/sa/_sa.cpp b/python/cdec/sa/_sa.cpp index 652261fe..bbea8c9c 100644 --- a/python/cdec/sa/_sa.cpp +++ b/python/cdec/sa/_sa.cpp @@ -1,4 +1,4 @@ -/* Generated by Cython 0.20.1 on Thu Apr 10 16:38:02 2014 */ +/* Generated by Cython 0.20.1 on Thu Apr 10 16:55:21 2014 */ #define PY_SSIZE_T_CLEAN #ifndef CYTHON_USE_PYLONG_INTERNALS diff --git a/python/cdec/sa/features.py b/python/cdec/sa/features.py index e19a18c0..92e23889 100644 --- a/python/cdec/sa/features.py +++ b/python/cdec/sa/features.py @@ -3,6 +3,8 @@ import math from cdec.sa import isvar +from online import get_score_multilex + MAXSCORE = 99 def EgivenF(ctx): # p(e|f) = c(e, f)/c(f) @@ -40,21 +42,6 @@ def CoherenceProb(ctx): # c(f) / sample c(f) prob = (ctx.fcount + ctx.online.fcount) / (ctx.fsample_count + ctx.online.fsample_count) return -math.log10(prob) -# Not a feature, used for MaxLex -# bilex get_score for multiple instances -def get_lex_online(f, e, dir, bilex_list): - num = 0 - denom = 0 - for bilex in bilex_list: - if dir == 0: - denom += bilex.f.get(f, 0) - else: - denom += bilex.e.get(e, 0) - num += bilex.fe.get((f, e), 0) - if (not num) or (not denom): - return None - return num / denom - def MaxLexEgivenF(ttable): def MaxLexEgivenF(ctx): fwords = ctx.fphrase.words @@ -62,7 +49,7 @@ def MaxLexEgivenF(ttable): maxOffScore = 0.0 for e in ctx.ephrase.words: if ctx.online: - maxScore = max(get_lex_online(f, e, 0, (ttable, ctx.online.bilex)) for f in fwords) + maxScore = max(get_score_multilex(f, e, 0, (ttable, ctx.online.bilex)) for f in fwords) else: maxScore = max(ttable.get_score(f, e, 0) for f in fwords) maxOffScore += -math.log10(maxScore) if maxScore > 0 else MAXSCORE @@ -76,7 +63,7 @@ def MaxLexFgivenE(ttable): maxOffScore = 0.0 for f in ctx.fphrase.words: if ctx.online: - maxScore = max(get_lex_online(f, e, 1, (ttable, ctx.online.bilex)) for e in ewords) + maxScore = max(get_score_multilex(f, e, 1, (ttable, ctx.online.bilex)) for e in ewords) else: maxScore = max(ttable.get_score(f, e, 1) for e in ewords) maxOffScore += -math.log10(maxScore) if maxScore > 0 else MAXSCORE diff --git a/python/cdec/sa/online.py b/python/cdec/sa/online.py index d3f967e8..98c3459b 100644 --- a/python/cdec/sa/online.py +++ b/python/cdec/sa/online.py @@ -126,3 +126,17 @@ class Bilex: break (f, e, c) = line.split() self.fe[(f, e)] = float(c) + +# Bilex get_score for multiple instances +def get_score_multilex(f, e, dir, bilex_list): + num = 0 + denom = 0 + for bilex in bilex_list: + if dir == 0: + denom += bilex.f.get(f, 0) + else: + denom += bilex.e.get(e, 0) + num += bilex.fe.get((f, e), 0) + if (not num) or (not denom): + return None + return num / denom -- cgit v1.2.3