diff options
author | Michael Denkowski <michael.j.denkowski@gmail.com> | 2013-01-26 21:12:25 -0500 |
---|---|---|
committer | Michael Denkowski <michael.j.denkowski@gmail.com> | 2013-01-26 21:12:25 -0500 |
commit | 5e7a99f9ce09a31092e194c06dd51368e18b3aed (patch) | |
tree | 4839ba53bdfdc04a23a47688e25812d1f543342a /python/pkg/cdec | |
parent | ab25e1dc737ca49b8ec13a8f48d18ba961d24342 (diff) |
Online grammars now diff with incremental suffix array (except lex, TODO)
Diffstat (limited to 'python/pkg/cdec')
-rw-r--r-- | python/pkg/cdec/sa/extract.py | 1 | ||||
-rw-r--r-- | python/pkg/cdec/sa/extractor.py | 10 | ||||
-rw-r--r-- | python/pkg/cdec/sa/features.py | 8 |
3 files changed, 10 insertions, 9 deletions
diff --git a/python/pkg/cdec/sa/extract.py b/python/pkg/cdec/sa/extract.py index 20eab9dd..2e596bd3 100644 --- a/python/pkg/cdec/sa/extract.py +++ b/python/pkg/cdec/sa/extract.py @@ -53,7 +53,6 @@ def extract(inp): # Add training instance _after_ extracting grammars if online: extractor.add_instance(sentence, reference, alignment) - #extractor.dump_online_stats() grammar_file = os.path.abspath(grammar_file) return '<seg grammar="{0}" id="{1}"> {2} </seg>{3}'.format(grammar_file, i, sentence, suffix) diff --git a/python/pkg/cdec/sa/extractor.py b/python/pkg/cdec/sa/extractor.py index 5ef8041c..bb552c49 100644 --- a/python/pkg/cdec/sa/extractor.py +++ b/python/pkg/cdec/sa/extractor.py @@ -60,9 +60,9 @@ class GrammarExtractor: # TODO: clean this up extended_features = [] - #extended_features.append(IsSupportedOnline) - if online: - extended_features.append(IsSupportedOnline) + extended_features.append(IsSupportedOnline) + #if online: + # extended_features.append(IsSupportedOnline) # TODO: use @cdec.sa.features decorator for standard features too # + add a mask to disable features @@ -101,4 +101,6 @@ class GrammarExtractor: # Debugging def dump_online_stats(self): - self.factory.dump_online_stats()
\ No newline at end of file + self.factory.dump_online_stats() + def dump_online_rules(self): + self.factory.dump_online_rules()
\ No newline at end of file diff --git a/python/pkg/cdec/sa/features.py b/python/pkg/cdec/sa/features.py index cede5304..49064f73 100644 --- a/python/pkg/cdec/sa/features.py +++ b/python/pkg/cdec/sa/features.py @@ -21,21 +21,21 @@ def SampleCountF(ctx): # sample c(f) if not ctx.online: count = 1 + ctx.fsample_count else: - count = 1 + ctx.fsample_count + ctx.online.fcount + count = 1 + ctx.fsample_count + ctx.online.fsample_count return math.log10(count) def EgivenFCoherent(ctx): # c(e, f) / sample c(f) if not ctx.online: prob = ctx.paircount/ctx.fsample_count else: - prob = (ctx.paircount + ctx.online.paircount) / (ctx.fsample_count + ctx.online.fcount) + prob = (ctx.paircount + ctx.online.paircount) / (ctx.fsample_count + ctx.online.fsample_count) return -math.log10(prob) if prob > 0 else MAXSCORE def CoherenceProb(ctx): # c(f) / sample c(f) if not ctx.online: prob = ctx.fcount/ctx.fsample_count else: - prob = (ctx.fcount + ctx.online.fcount) / (ctx.fsample_count + ctx.online.fcount) + prob = (ctx.fcount + ctx.online.fcount) / (ctx.fsample_count + ctx.online.fsample_count) return -math.log10(prob) def MaxLexEgivenF(ttable): @@ -95,7 +95,7 @@ def IsFEGreaterThanZero(ctx): count = ctx.paircount + ctx.online.paircount return (ctx.paircount > 0.01) -def IsSupportedOnline(ctx): +def IsSupportedOnline(ctx): # Occurs in online data? if ctx.online: return (ctx.online.fcount > 0.01) else: |