summaryrefslogtreecommitdiff
path: root/python/pkg/cdec/sa
diff options
context:
space:
mode:
authorMichael Denkowski <michael.j.denkowski@gmail.com>2013-01-26 21:12:25 -0500
committerMichael Denkowski <michael.j.denkowski@gmail.com>2013-01-26 21:12:25 -0500
commit0a6dbb8aefb1662a68f3f14f0c42a72150d8be03 (patch)
tree5b79d719f4f9e2f37ef73cc3d278ec6667c2b47b /python/pkg/cdec/sa
parentca3da3a815b6e85531d6ded07e7d6bec7852748c (diff)
Online grammars now diff with incremental suffix array (except lex, TODO)
Diffstat (limited to 'python/pkg/cdec/sa')
-rw-r--r--python/pkg/cdec/sa/extract.py1
-rw-r--r--python/pkg/cdec/sa/extractor.py10
-rw-r--r--python/pkg/cdec/sa/features.py8
3 files changed, 10 insertions, 9 deletions
diff --git a/python/pkg/cdec/sa/extract.py b/python/pkg/cdec/sa/extract.py
index 20eab9dd..2e596bd3 100644
--- a/python/pkg/cdec/sa/extract.py
+++ b/python/pkg/cdec/sa/extract.py
@@ -53,7 +53,6 @@ def extract(inp):
# Add training instance _after_ extracting grammars
if online:
extractor.add_instance(sentence, reference, alignment)
- #extractor.dump_online_stats()
grammar_file = os.path.abspath(grammar_file)
return '<seg grammar="{0}" id="{1}"> {2} </seg>{3}'.format(grammar_file, i, sentence, suffix)
diff --git a/python/pkg/cdec/sa/extractor.py b/python/pkg/cdec/sa/extractor.py
index 5ef8041c..bb552c49 100644
--- a/python/pkg/cdec/sa/extractor.py
+++ b/python/pkg/cdec/sa/extractor.py
@@ -60,9 +60,9 @@ class GrammarExtractor:
# TODO: clean this up
extended_features = []
- #extended_features.append(IsSupportedOnline)
- if online:
- extended_features.append(IsSupportedOnline)
+ extended_features.append(IsSupportedOnline)
+ #if online:
+ # extended_features.append(IsSupportedOnline)
# TODO: use @cdec.sa.features decorator for standard features too
# + add a mask to disable features
@@ -101,4 +101,6 @@ class GrammarExtractor:
# Debugging
def dump_online_stats(self):
- self.factory.dump_online_stats() \ No newline at end of file
+ self.factory.dump_online_stats()
+ def dump_online_rules(self):
+ self.factory.dump_online_rules() \ No newline at end of file
diff --git a/python/pkg/cdec/sa/features.py b/python/pkg/cdec/sa/features.py
index cede5304..49064f73 100644
--- a/python/pkg/cdec/sa/features.py
+++ b/python/pkg/cdec/sa/features.py
@@ -21,21 +21,21 @@ def SampleCountF(ctx): # sample c(f)
if not ctx.online:
count = 1 + ctx.fsample_count
else:
- count = 1 + ctx.fsample_count + ctx.online.fcount
+ count = 1 + ctx.fsample_count + ctx.online.fsample_count
return math.log10(count)
def EgivenFCoherent(ctx): # c(e, f) / sample c(f)
if not ctx.online:
prob = ctx.paircount/ctx.fsample_count
else:
- prob = (ctx.paircount + ctx.online.paircount) / (ctx.fsample_count + ctx.online.fcount)
+ prob = (ctx.paircount + ctx.online.paircount) / (ctx.fsample_count + ctx.online.fsample_count)
return -math.log10(prob) if prob > 0 else MAXSCORE
def CoherenceProb(ctx): # c(f) / sample c(f)
if not ctx.online:
prob = ctx.fcount/ctx.fsample_count
else:
- prob = (ctx.fcount + ctx.online.fcount) / (ctx.fsample_count + ctx.online.fcount)
+ prob = (ctx.fcount + ctx.online.fcount) / (ctx.fsample_count + ctx.online.fsample_count)
return -math.log10(prob)
def MaxLexEgivenF(ttable):
@@ -95,7 +95,7 @@ def IsFEGreaterThanZero(ctx):
count = ctx.paircount + ctx.online.paircount
return (ctx.paircount > 0.01)
-def IsSupportedOnline(ctx):
+def IsSupportedOnline(ctx): # Occurs in online data?
if ctx.online:
return (ctx.online.fcount > 0.01)
else: