diff options
author | Chris Dyer <cdyer@cs.cmu.edu> | 2012-07-28 12:11:44 -0400 |
---|---|---|
committer | Chris Dyer <cdyer@cs.cmu.edu> | 2012-07-28 12:11:44 -0400 |
commit | 306e0ba4754c6c4f460536cfe8c3f118dc1cc175 (patch) | |
tree | ad5ea3b0a5370ac613d1bad715fe0f5ab8c91c11 /python/pkg/cdec/sa/features.py | |
parent | 934e55dc12c3f374684bc6a0797e6f85c7abb85a (diff) | |
parent | ee5e376e263d9aeabdeee6968b4457f53d3fc772 (diff) |
Merge branch 'master' of github.com:redpony/cdec
Diffstat (limited to 'python/pkg/cdec/sa/features.py')
-rw-r--r-- | python/pkg/cdec/sa/features.py | 57 |
1 files changed, 57 insertions, 0 deletions
diff --git a/python/pkg/cdec/sa/features.py b/python/pkg/cdec/sa/features.py new file mode 100644 index 00000000..325b9e13 --- /dev/null +++ b/python/pkg/cdec/sa/features.py @@ -0,0 +1,57 @@ +from __future__ import division +import math + +MAXSCORE = 99 + +def EgivenF(fphrase, ephrase, paircount, fcount, fsample_count): # p(e|f) + return -math.log10(paircount/fcount) + +def CountEF(fphrase, ephrase, paircount, fcount, fsample_count): + return math.log10(1 + paircount) + +def SampleCountF(fphrase, ephrase, paircount, fcount, fsample_count): + return math.log10(1 + fsample_count) + +def EgivenFCoherent(fphrase, ephrase, paircount, fcount, fsample_count): + prob = paircount/fsample_count + return -math.log10(prob) if prob > 0 else MAXSCORE + +def CoherenceProb(fphrase, ephrase, paircount, fcount, fsample_count): + return -math.log10(fcount/fsample_count) + +def MaxLexEgivenF(ttable): + def feature(fphrase, ephrase, paircount, fcount, fsample_count): + fwords = fphrase.words + fwords.append('NULL') + def score(): + for e in ephrase.words: + maxScore = max(ttable.get_score(f, e, 0) for f in fwords) + yield -math.log10(maxScore) if maxScore > 0 else MAXSCORE + return sum(score()) + return feature + +def MaxLexFgivenE(ttable): + def feature(fphrase, ephrase, paircount, fcount, fsample_count): + ewords = ephrase.words + ewords.append('NULL') + def score(): + for f in fphrase.words: + maxScore = max(ttable.get_score(f, e, 1) for e in ewords) + yield -math.log10(maxScore) if maxScore > 0 else MAXSCORE + return sum(score()) + return feature + +def IsSingletonF(fphrase, ephrase, paircount, fcount, fsample_count): + return (fcount == 1) + +def IsSingletonFE(fphrase, ephrase, paircount, fcount, fsample_count): + return (paircount == 1) + +def IsNotSingletonF(fphrase, ephrase, paircount, fcount, fsample_count): + return (fcount > 1) + +def IsNotSingletonFE(fphrase, ephrase, paircount, fcount, fsample_count): + return (paircount > 1) + +def IsFEGreaterThanZero(fphrase, ephrase, paircount, fcount, fsample_count): + return (paircount > 0.01) |