summaryrefslogtreecommitdiff
path: root/python/pkg/cdec
diff options
context:
space:
mode:
Diffstat (limited to 'python/pkg/cdec')
-rw-r--r--python/pkg/cdec/sa/__init__.py2
-rw-r--r--python/pkg/cdec/sa/extractor.py5
-rw-r--r--python/pkg/cdec/sa/features.py44
3 files changed, 40 insertions, 11 deletions
diff --git a/python/pkg/cdec/sa/__init__.py b/python/pkg/cdec/sa/__init__.py
index 418531d9..14ba5ecb 100644
--- a/python/pkg/cdec/sa/__init__.py
+++ b/python/pkg/cdec/sa/__init__.py
@@ -1,5 +1,5 @@
from cdec.sa._sa import make_lattice, decode_lattice, decode_sentence,\
- encode_words, decode_words,\
+ encode_words, decode_words, isvar,\
SuffixArray, DataArray, LCP, Precomputation, Alignment, BiLex,\
HieroCachingRuleFactory, Sampler, Scorer
from cdec.sa.extractor import GrammarExtractor
diff --git a/python/pkg/cdec/sa/extractor.py b/python/pkg/cdec/sa/extractor.py
index bb552c49..cd3ab899 100644
--- a/python/pkg/cdec/sa/extractor.py
+++ b/python/pkg/cdec/sa/extractor.py
@@ -61,13 +61,14 @@ class GrammarExtractor:
# TODO: clean this up
extended_features = []
extended_features.append(IsSupportedOnline)
- #if online:
- # extended_features.append(IsSupportedOnline)
+ if online:
+ extended_features.append(IsSupportedOnline)
# TODO: use @cdec.sa.features decorator for standard features too
# + add a mask to disable features
for f in cdec.sa._SA_FEATURES:
extended_features.append(f)
+
scorer = cdec.sa.Scorer(EgivenFCoherent, SampleCountF, CountEF,
MaxLexFgivenE(tt), MaxLexEgivenF(tt), IsSingletonF, IsSingletonFE,
*extended_features)
diff --git a/python/pkg/cdec/sa/features.py b/python/pkg/cdec/sa/features.py
index 49064f73..a89499d4 100644
--- a/python/pkg/cdec/sa/features.py
+++ b/python/pkg/cdec/sa/features.py
@@ -1,6 +1,8 @@
from __future__ import division
import math
+from cdec.sa import isvar
+
MAXSCORE = 99
def EgivenF(ctx): # p(e|f) = c(e, f)/c(f)
@@ -42,22 +44,48 @@ def MaxLexEgivenF(ttable):
def MaxLexEgivenF(ctx):
fwords = ctx.fphrase.words
fwords.append('NULL')
- def score():
+ if not ctx.online:
+ maxOffScore = 0.0
+ for e in ctx.ephrase.words:
+ maxScore = max(ttable.get_score(f, e, 0) for f in fwords)
+ maxOffScore += -math.log10(maxScore) if maxScore > 0 else MAXSCORE
+ return maxOffScore
+ else:
+ # For now, straight average
+ maxOffScore = 0.0
+ maxOnScore = 0.0
for e in ctx.ephrase.words:
- maxScore = max(ttable.get_score(f, e, 0) for f in fwords)
- yield -math.log10(maxScore) if maxScore > 0 else MAXSCORE
- return sum(score())
+ maxScore = max(ttable.get_score(f, e, 0) for f in fwords)
+ maxOffScore += -math.log10(maxScore) if maxScore > 0 else MAXSCORE
+ for e in ctx.ephrase:
+ if not isvar(e):
+ maxScore = max((ctx.online.bilex_fe[f][e] / ctx.online.bilex_f[f]) for f in ctx.fphrase if not isvar(f))
+ maxOnScore += -math.log10(maxScore) if maxScore > 0 else MAXSCORE
+ return (maxOffScore + maxOnScore) / 2
return MaxLexEgivenF
def MaxLexFgivenE(ttable):
def MaxLexFgivenE(ctx):
ewords = ctx.ephrase.words
ewords.append('NULL')
- def score():
+ if not ctx.online:
+ maxOffScore = 0.0
+ for f in ctx.fphrase.words:
+ maxScore = max(ttable.get_score(f, e, 1) for e in ewords)
+ maxOffScore += -math.log10(maxScore) if maxScore > 0 else MAXSCORE
+ return maxOffScore
+ else:
+ # For now, straight average
+ maxOffScore = 0.0
+ maxOnScore = 0.0
for f in ctx.fphrase.words:
- maxScore = max(ttable.get_score(f, e, 1) for e in ewords)
- yield -math.log10(maxScore) if maxScore > 0 else MAXSCORE
- return sum(score())
+ maxScore = max(ttable.get_score(f, e, 1) for e in ewords)
+ maxOffScore += -math.log10(maxScore) if maxScore > 0 else MAXSCORE
+ for f in ctx.fphrase:
+ if not isvar(f):
+ maxScore = max((ctx.online.bilex_fe[f][e] / ctx.online.bilex_e[e]) for e in ctx.ephrase if not isvar(e))
+ maxOnScore += -math.log10(maxScore) if maxScore > 0 else MAXSCORE
+ return (maxOffScore + maxOnScore) / 2
return MaxLexFgivenE
def IsSingletonF(ctx):