from __future__ import division import math import cdec.sa MAXSCORE = 99 def EgivenF(fphrase, ephrase, paircount, fcount, fsample_count): # p(e|f) return -math.log10(paircount/fcount) def CountEF(fphrase, ephrase, paircount, fcount, fsample_count): return math.log10(1 + paircount) def SampleCountF(fphrase, ephrase, paircount, fcount, fsample_count): return math.log10(1 + fsample_count) def EgivenFCoherent(fphrase, ephrase, paircount, fcount, fsample_count): prob = paircount/fsample_count return -math.log10(prob) if prob > 0 else MAXSCORE def CoherenceProb(fphrase, ephrase, paircount, fcount, fsample_count): return -math.log10(fcount/fsample_count) def MaxLexEgivenF(ttable): def feature(fphrase, ephrase, paircount, fcount, fsample_count): fwords = [cdec.sa.sym_tostring(w) for w in fphrase if not cdec.sa.sym_isvar(w)] fwords.append('NULL') ewords = (cdec.sa.sym_tostring(w) for w in ephrase if not cdec.sa.sym_isvar(w)) def score(): for e in ewords: maxScore = max(ttable.get_score(f, e, 0) for f in fwords) yield -math.log10(maxScore) if maxScore > 0 else MAXSCORE return sum(score()) return feature def MaxLexFgivenE(ttable): def feature(fphrase, ephrase, paircount, fcount, fsample_count): fwords = (cdec.sa.sym_tostring(w) for w in fphrase if not cdec.sa.sym_isvar(w)) ewords = [cdec.sa.sym_tostring(w) for w in ephrase if not cdec.sa.sym_isvar(w)] ewords.append('NULL') def score(): for f in fwords: maxScore = max(ttable.get_score(f, e, 1) for e in ewords) yield -math.log10(maxScore) if maxScore > 0 else MAXSCORE return sum(score()) return feature def IsSingletonF(fphrase, ephrase, paircount, fcount, fsample_count): return (fcount == 1) def IsSingletonFE(fphrase, ephrase, paircount, fcount, fsample_count): return (paircount == 1) def IsNotSingletonF(fphrase, ephrase, paircount, fcount, fsample_count): return (fcount > 1) def IsNotSingletonFE(fphrase, ephrase, paircount, fcount, fsample_count): return (paircount > 1) def IsFEGreaterThanZero(fphrase, ephrase, paircount, fcount, fsample_count): return (paircount > 0.01)