diff options
Diffstat (limited to 'python/pkg')
| -rw-r--r-- | python/pkg/cdec/sa/__init__.py | 2 | ||||
| -rw-r--r-- | python/pkg/cdec/sa/extractor.py | 5 | ||||
| -rw-r--r-- | python/pkg/cdec/sa/features.py | 44 | 
3 files changed, 40 insertions, 11 deletions
diff --git a/python/pkg/cdec/sa/__init__.py b/python/pkg/cdec/sa/__init__.py index 418531d9..14ba5ecb 100644 --- a/python/pkg/cdec/sa/__init__.py +++ b/python/pkg/cdec/sa/__init__.py @@ -1,5 +1,5 @@  from cdec.sa._sa import make_lattice, decode_lattice, decode_sentence,\ -        encode_words, decode_words,\ +        encode_words, decode_words, isvar,\          SuffixArray, DataArray, LCP, Precomputation, Alignment, BiLex,\          HieroCachingRuleFactory, Sampler, Scorer  from cdec.sa.extractor import GrammarExtractor diff --git a/python/pkg/cdec/sa/extractor.py b/python/pkg/cdec/sa/extractor.py index bb552c49..cd3ab899 100644 --- a/python/pkg/cdec/sa/extractor.py +++ b/python/pkg/cdec/sa/extractor.py @@ -61,13 +61,14 @@ class GrammarExtractor:          # TODO: clean this up          extended_features = []          extended_features.append(IsSupportedOnline) -        #if online: -        #    extended_features.append(IsSupportedOnline) +        if online: +            extended_features.append(IsSupportedOnline)          # TODO: use @cdec.sa.features decorator for standard features too          # + add a mask to disable features          for f in cdec.sa._SA_FEATURES:              extended_features.append(f) +                      scorer = cdec.sa.Scorer(EgivenFCoherent, SampleCountF, CountEF,               MaxLexFgivenE(tt), MaxLexEgivenF(tt), IsSingletonF, IsSingletonFE,              *extended_features) diff --git a/python/pkg/cdec/sa/features.py b/python/pkg/cdec/sa/features.py index 49064f73..a89499d4 100644 --- a/python/pkg/cdec/sa/features.py +++ b/python/pkg/cdec/sa/features.py @@ -1,6 +1,8 @@  from __future__ import division  import math +from cdec.sa import isvar +  MAXSCORE = 99  def EgivenF(ctx): # p(e|f) = c(e, f)/c(f) @@ -42,22 +44,48 @@ def MaxLexEgivenF(ttable):      def MaxLexEgivenF(ctx):          fwords = ctx.fphrase.words          fwords.append('NULL') -        def score(): +        if not ctx.online: +            maxOffScore = 0.0 +            for e in ctx.ephrase.words: +                maxScore = max(ttable.get_score(f, e, 0) for f in fwords) +                maxOffScore += -math.log10(maxScore) if maxScore > 0 else MAXSCORE +            return maxOffScore +        else: +            # For now, straight average +            maxOffScore = 0.0 +            maxOnScore = 0.0              for e in ctx.ephrase.words: -              maxScore = max(ttable.get_score(f, e, 0) for f in fwords) -              yield -math.log10(maxScore) if maxScore > 0 else MAXSCORE -        return sum(score()) +                maxScore = max(ttable.get_score(f, e, 0) for f in fwords) +                maxOffScore += -math.log10(maxScore) if maxScore > 0 else MAXSCORE +            for e in ctx.ephrase: +                if not isvar(e): +                    maxScore = max((ctx.online.bilex_fe[f][e] / ctx.online.bilex_f[f]) for f in ctx.fphrase if not isvar(f)) +                    maxOnScore += -math.log10(maxScore) if maxScore > 0 else MAXSCORE +            return (maxOffScore + maxOnScore) / 2      return MaxLexEgivenF  def MaxLexFgivenE(ttable):      def MaxLexFgivenE(ctx):          ewords = ctx.ephrase.words          ewords.append('NULL') -        def score(): +        if not ctx.online: +            maxOffScore = 0.0 +            for f in ctx.fphrase.words: +                maxScore = max(ttable.get_score(f, e, 1) for e in ewords) +                maxOffScore += -math.log10(maxScore) if maxScore > 0 else MAXSCORE +            return maxOffScore +        else: +            # For now, straight average +            maxOffScore = 0.0 +            maxOnScore = 0.0              for f in ctx.fphrase.words: -              maxScore = max(ttable.get_score(f, e, 1) for e in ewords) -              yield -math.log10(maxScore) if maxScore > 0 else MAXSCORE -        return sum(score()) +                maxScore = max(ttable.get_score(f, e, 1) for e in ewords) +                maxOffScore += -math.log10(maxScore) if maxScore > 0 else MAXSCORE +            for f in ctx.fphrase: +                if not isvar(f): +                    maxScore = max((ctx.online.bilex_fe[f][e] / ctx.online.bilex_e[e]) for e in ctx.ephrase if not isvar(e)) +                    maxOnScore += -math.log10(maxScore) if maxScore > 0 else MAXSCORE +            return (maxOffScore + maxOnScore) / 2      return MaxLexFgivenE  def IsSingletonF(ctx):  | 
