diff options
author | Victor Chahuneau <vchahune@cs.cmu.edu> | 2012-07-27 01:16:03 -0400 |
---|---|---|
committer | Victor Chahuneau <vchahune@cs.cmu.edu> | 2012-07-27 01:16:03 -0400 |
commit | b2a8bccb2bd713d9ec081cf3dad0162c2cb492d8 (patch) | |
tree | c661044fd2a3943cf2ad12109b916fd7b56a519e /python/cdec/scfg/features.py | |
parent | 148b1168c2b07abf0c7757a31141377c28ec3d91 (diff) |
[python] Fork of the suffix-array extractor with surface improvements
Available as the cdec.sa module, with commande-line helpers:
python -m cdec.sa.compile -f ... -e ... -a ... -o sa-out/ -c extract.ini
python -m cdec.sa.extract -c extract.ini -g grammars-out/ < input.txt > input.sgml
+ renamed cdec.scfg -> cdec.sa
+ Python README
Diffstat (limited to 'python/cdec/scfg/features.py')
-rw-r--r-- | python/cdec/scfg/features.py | 62 |
1 files changed, 0 insertions, 62 deletions
diff --git a/python/cdec/scfg/features.py b/python/cdec/scfg/features.py deleted file mode 100644 index 6419cdd8..00000000 --- a/python/cdec/scfg/features.py +++ /dev/null @@ -1,62 +0,0 @@ -from __future__ import division -import math -import sym - -def contextless(feature): - feature.compute_contextless_score = feature - return feature - -MAXSCORE = 99 - -def EgivenF(fphrase, ephrase, paircount, fcount, fsample_count): # p(e|f) - return -math.log10(paircount/fcount) - -def CountEF(fphrase, ephrase, paircount, fcount, fsample_count): - return math.log10(1 + paircount) - -def SampleCountF(fphrase, ephrase, paircount, fcount, fsample_count): - return math.log10(1 + fsample_count) - -def EgivenFCoherent(fphrase, ephrase, paircount, fcount, fsample_count): - prob = paircount/fsample_count - return -math.log10(prob) if prob > 0 else MAXSCORE - -def CoherenceProb(fphrase, ephrase, paircount, fcount, fsample_count): - return -math.log10(fcount/fsample_count) - -def MaxLexEgivenF(ttable): - def feature(fphrase, ephrase, paircount, fcount, fsample_count): - fwords = [sym.tostring(w) for w in fphrase if not sym.isvar(w)] + ['NULL'] - ewords = (sym.tostring(w) for w in ephrase if not sym.isvar(w)) - def score(): - for e in ewords: - maxScore = max(ttable.get_score(f, e, 0) for f in fwords) - yield -math.log10(maxScore) if maxScore > 0 else MAXSCORE - return sum(score()) - return feature - -def MaxLexFgivenE(ttable): - def feature(fphrase, ephrase, paircount, fcount, fsample_count): - fwords = (sym.tostring(w) for w in fphrase if not sym.isvar(w)) - ewords = [sym.tostring(w) for w in ephrase if not sym.isvar(w)] + ['NULL'] - def score(): - for f in fwords: - maxScore = max(ttable.get_score(f, e, 1) for e in ewords) - yield -math.log10(maxScore) if maxScore > 0 else MAXSCORE - return sum(score()) - return feature - -def IsSingletonF(fphrase, ephrase, paircount, fcount, fsample_count): - return (fcount == 1) - -def IsSingletonFE(fphrase, ephrase, paircount, fcount, fsample_count): - return (paircount == 1) - -def IsNotSingletonF(fphrase, ephrase, paircount, fcount, fsample_count): - return (fcount > 1) - -def IsNotSingletonFE(fphrase, ephrase, paircount, fcount, fsample_count): - return (paircount > 1) - -def IsFEGreaterThanZero(fphrase, ephrase, paircount, fcount, fsample_count): - return (paircount > 0.01) |