summaryrefslogtreecommitdiff
path: root/python/pkg/cdec/sa/features.py
diff options
context:
space:
mode:
authorMichael Denkowski <michael.j.denkowski@gmail.com>2013-01-24 16:55:21 -0500
committerMichael Denkowski <michael.j.denkowski@gmail.com>2013-01-24 16:55:21 -0500
commitab25e1dc737ca49b8ec13a8f48d18ba961d24342 (patch)
tree0139640f93cf660f57b99fad0c910ebf8334bcea /python/pkg/cdec/sa/features.py
parent567b25a4c6e883a88b3d79d671577425769e1143 (diff)
Scored grammars from online extraction. Don't trust them yet.
Diffstat (limited to 'python/pkg/cdec/sa/features.py')
-rw-r--r--python/pkg/cdec/sa/features.py61
1 files changed, 53 insertions, 8 deletions
diff --git a/python/pkg/cdec/sa/features.py b/python/pkg/cdec/sa/features.py
index a4ae23e8..cede5304 100644
--- a/python/pkg/cdec/sa/features.py
+++ b/python/pkg/cdec/sa/features.py
@@ -4,20 +4,39 @@ import math
MAXSCORE = 99
def EgivenF(ctx): # p(e|f) = c(e, f)/c(f)
- return -math.log10(ctx.paircount/ctx.fcount)
+ if not ctx.online:
+ prob = ctx.paircount/ctx.fcount
+ else:
+ prob = (ctx.paircount + ctx.online.paircount) / (ctx.fcount + ctx.online.fcount)
+ return -math.log10(prob)
def CountEF(ctx): # c(e, f)
- return math.log10(1 + ctx.paircount)
+ if not ctx.online:
+ count = 1 + ctx.paircount
+ else:
+ count = 1 + ctx.paircount + ctx.online.paircount
+ return math.log10(count)
def SampleCountF(ctx): # sample c(f)
- return math.log10(1 + ctx.fsample_count)
+ if not ctx.online:
+ count = 1 + ctx.fsample_count
+ else:
+ count = 1 + ctx.fsample_count + ctx.online.fcount
+ return math.log10(count)
def EgivenFCoherent(ctx): # c(e, f) / sample c(f)
- prob = ctx.paircount/ctx.fsample_count
+ if not ctx.online:
+ prob = ctx.paircount/ctx.fsample_count
+ else:
+ prob = (ctx.paircount + ctx.online.paircount) / (ctx.fsample_count + ctx.online.fcount)
return -math.log10(prob) if prob > 0 else MAXSCORE
def CoherenceProb(ctx): # c(f) / sample c(f)
- return -math.log10(ctx.fcount/ctx.fsample_count)
+ if not ctx.online:
+ prob = ctx.fcount/ctx.fsample_count
+ else:
+ prob = (ctx.fcount + ctx.online.fcount) / (ctx.fsample_count + ctx.online.fcount)
+ return -math.log10(prob)
def MaxLexEgivenF(ttable):
def MaxLexEgivenF(ctx):
@@ -42,16 +61,42 @@ def MaxLexFgivenE(ttable):
return MaxLexFgivenE
def IsSingletonF(ctx):
- return (ctx.fcount == 1)
+ if not ctx.online:
+ count = ctx.fcount
+ else:
+ count = ctx.fcount + ctx.online.fcount
+ return (count == 1)
def IsSingletonFE(ctx):
- return (ctx.paircount == 1)
+ if not ctx.online:
+ count = ctx.paircount
+ else:
+ count = ctx.paircount + ctx.online.paircount
+ return (count == 1)
def IsNotSingletonF(ctx):
- return (ctx.fcount > 1)
+ if not ctx.online:
+ count = ctx.fcount
+ else:
+ count = ctx.fcount + ctx.online.fcount
+ return (count > 1)
def IsNotSingletonFE(ctx):
+ if not ctx.online:
+ count = ctx.paircount
+ else:
+ count = ctx.paircount + ctx.online.paircount
return (ctx.paircount > 1)
def IsFEGreaterThanZero(ctx):
+ if not ctx.online:
+ count = ctx.paircount
+ else:
+ count = ctx.paircount + ctx.online.paircount
return (ctx.paircount > 0.01)
+
+def IsSupportedOnline(ctx):
+ if ctx.online:
+ return (ctx.online.fcount > 0.01)
+ else:
+ return False \ No newline at end of file