From 5e7a99f9ce09a31092e194c06dd51368e18b3aed Mon Sep 17 00:00:00 2001 From: Michael Denkowski Date: Sat, 26 Jan 2013 21:12:25 -0500 Subject: Online grammars now diff with incremental suffix array (except lex, TODO) --- python/pkg/cdec/sa/extract.py | 1 - python/pkg/cdec/sa/extractor.py | 10 ++++++---- python/pkg/cdec/sa/features.py | 8 ++++---- 3 files changed, 10 insertions(+), 9 deletions(-) (limited to 'python/pkg/cdec') diff --git a/python/pkg/cdec/sa/extract.py b/python/pkg/cdec/sa/extract.py index 20eab9dd..2e596bd3 100644 --- a/python/pkg/cdec/sa/extract.py +++ b/python/pkg/cdec/sa/extract.py @@ -53,7 +53,6 @@ def extract(inp): # Add training instance _after_ extracting grammars if online: extractor.add_instance(sentence, reference, alignment) - #extractor.dump_online_stats() grammar_file = os.path.abspath(grammar_file) return ' {2} {3}'.format(grammar_file, i, sentence, suffix) diff --git a/python/pkg/cdec/sa/extractor.py b/python/pkg/cdec/sa/extractor.py index 5ef8041c..bb552c49 100644 --- a/python/pkg/cdec/sa/extractor.py +++ b/python/pkg/cdec/sa/extractor.py @@ -60,9 +60,9 @@ class GrammarExtractor: # TODO: clean this up extended_features = [] - #extended_features.append(IsSupportedOnline) - if online: - extended_features.append(IsSupportedOnline) + extended_features.append(IsSupportedOnline) + #if online: + # extended_features.append(IsSupportedOnline) # TODO: use @cdec.sa.features decorator for standard features too # + add a mask to disable features @@ -101,4 +101,6 @@ class GrammarExtractor: # Debugging def dump_online_stats(self): - self.factory.dump_online_stats() \ No newline at end of file + self.factory.dump_online_stats() + def dump_online_rules(self): + self.factory.dump_online_rules() \ No newline at end of file diff --git a/python/pkg/cdec/sa/features.py b/python/pkg/cdec/sa/features.py index cede5304..49064f73 100644 --- a/python/pkg/cdec/sa/features.py +++ b/python/pkg/cdec/sa/features.py @@ -21,21 +21,21 @@ def SampleCountF(ctx): # sample c(f) if not ctx.online: count = 1 + ctx.fsample_count else: - count = 1 + ctx.fsample_count + ctx.online.fcount + count = 1 + ctx.fsample_count + ctx.online.fsample_count return math.log10(count) def EgivenFCoherent(ctx): # c(e, f) / sample c(f) if not ctx.online: prob = ctx.paircount/ctx.fsample_count else: - prob = (ctx.paircount + ctx.online.paircount) / (ctx.fsample_count + ctx.online.fcount) + prob = (ctx.paircount + ctx.online.paircount) / (ctx.fsample_count + ctx.online.fsample_count) return -math.log10(prob) if prob > 0 else MAXSCORE def CoherenceProb(ctx): # c(f) / sample c(f) if not ctx.online: prob = ctx.fcount/ctx.fsample_count else: - prob = (ctx.fcount + ctx.online.fcount) / (ctx.fsample_count + ctx.online.fcount) + prob = (ctx.fcount + ctx.online.fcount) / (ctx.fsample_count + ctx.online.fsample_count) return -math.log10(prob) def MaxLexEgivenF(ttable): @@ -95,7 +95,7 @@ def IsFEGreaterThanZero(ctx): count = ctx.paircount + ctx.online.paircount return (ctx.paircount > 0.01) -def IsSupportedOnline(ctx): +def IsSupportedOnline(ctx): # Occurs in online data? if ctx.online: return (ctx.online.fcount > 0.01) else: -- cgit v1.2.3