diff options
author | Michael Denkowski <michael.j.denkowski@gmail.com> | 2013-01-28 15:26:58 -0500 |
---|---|---|
committer | Michael Denkowski <michael.j.denkowski@gmail.com> | 2013-01-28 15:26:58 -0500 |
commit | b6fb7d74fe72c2a569b8c0448fc66ede6f1cfd67 (patch) | |
tree | a3315b05417b19b463b9e773576dcc0cfdeb619d | |
parent | 2dd5feffac709c2628a593d76eaff590f22f226c (diff) |
For now, don't use online bilex counts
-rw-r--r-- | python/pkg/cdec/sa/extractor.py | 1 | ||||
-rw-r--r-- | python/pkg/cdec/sa/features.py | 20 | ||||
-rw-r--r-- | python/src/sa/_sa.c | 2 |
3 files changed, 17 insertions, 6 deletions
diff --git a/python/pkg/cdec/sa/extractor.py b/python/pkg/cdec/sa/extractor.py index cd3ab899..acc13cbc 100644 --- a/python/pkg/cdec/sa/extractor.py +++ b/python/pkg/cdec/sa/extractor.py @@ -60,7 +60,6 @@ class GrammarExtractor: # TODO: clean this up extended_features = [] - extended_features.append(IsSupportedOnline) if online: extended_features.append(IsSupportedOnline) diff --git a/python/pkg/cdec/sa/features.py b/python/pkg/cdec/sa/features.py index a89499d4..cbea5dd1 100644 --- a/python/pkg/cdec/sa/features.py +++ b/python/pkg/cdec/sa/features.py @@ -44,7 +44,8 @@ def MaxLexEgivenF(ttable): def MaxLexEgivenF(ctx): fwords = ctx.fphrase.words fwords.append('NULL') - if not ctx.online: + # Always use this for now + if not ctx.online or ctx.online: maxOffScore = 0.0 for e in ctx.ephrase.words: maxScore = max(ttable.get_score(f, e, 0) for f in fwords) @@ -59,7 +60,12 @@ def MaxLexEgivenF(ttable): maxOffScore += -math.log10(maxScore) if maxScore > 0 else MAXSCORE for e in ctx.ephrase: if not isvar(e): - maxScore = max((ctx.online.bilex_fe[f][e] / ctx.online.bilex_f[f]) for f in ctx.fphrase if not isvar(f)) + maxScore = 0.0 + for f in ctx.fphrase: + if not isvar(f): + b_f = ctx.online.bilex_f.get(f, 0) + if b_f: + maxScore = max(maxScore, ctx.online.bilex_fe.get(f, {}).get(e)) maxOnScore += -math.log10(maxScore) if maxScore > 0 else MAXSCORE return (maxOffScore + maxOnScore) / 2 return MaxLexEgivenF @@ -68,7 +74,8 @@ def MaxLexFgivenE(ttable): def MaxLexFgivenE(ctx): ewords = ctx.ephrase.words ewords.append('NULL') - if not ctx.online: + # Always use this for now + if not ctx.online or ctx.online: maxOffScore = 0.0 for f in ctx.fphrase.words: maxScore = max(ttable.get_score(f, e, 1) for e in ewords) @@ -83,7 +90,12 @@ def MaxLexFgivenE(ttable): maxOffScore += -math.log10(maxScore) if maxScore > 0 else MAXSCORE for f in ctx.fphrase: if not isvar(f): - maxScore = max((ctx.online.bilex_fe[f][e] / ctx.online.bilex_e[e]) for e in ctx.ephrase if not isvar(e)) + maxScore = 0.0 + for e in ctx.ephrase: + if not isvar(e): + b_e = ctx.online.bilex_e.get(e, 0) + if b_e: + maxScore = max(maxScore, ctx.online.bilex_fe.get(f, {}).get(e, 0) / b_e ) maxOnScore += -math.log10(maxScore) if maxScore > 0 else MAXSCORE return (maxOffScore + maxOnScore) / 2 return MaxLexFgivenE diff --git a/python/src/sa/_sa.c b/python/src/sa/_sa.c index 7d73b3b7..89445b45 100644 --- a/python/src/sa/_sa.c +++ b/python/src/sa/_sa.c @@ -1,4 +1,4 @@ -/* Generated by Cython 0.17.1 on Mon Jan 28 11:56:59 2013 */ +/* Generated by Cython 0.17.1 on Mon Jan 28 14:28:01 2013 */ #define PY_SSIZE_T_CLEAN #include "Python.h" |