diff options
Diffstat (limited to 'python/pkg/cdec')
| -rw-r--r-- | python/pkg/cdec/sa/extract.py | 1 | ||||
| -rw-r--r-- | python/pkg/cdec/sa/extractor.py | 10 | ||||
| -rw-r--r-- | python/pkg/cdec/sa/features.py | 8 | 
3 files changed, 10 insertions, 9 deletions
diff --git a/python/pkg/cdec/sa/extract.py b/python/pkg/cdec/sa/extract.py index 20eab9dd..2e596bd3 100644 --- a/python/pkg/cdec/sa/extract.py +++ b/python/pkg/cdec/sa/extract.py @@ -53,7 +53,6 @@ def extract(inp):      # Add training instance _after_ extracting grammars      if online:          extractor.add_instance(sentence, reference, alignment) -        #extractor.dump_online_stats()      grammar_file = os.path.abspath(grammar_file)      return '<seg grammar="{0}" id="{1}"> {2} </seg>{3}'.format(grammar_file, i, sentence, suffix) diff --git a/python/pkg/cdec/sa/extractor.py b/python/pkg/cdec/sa/extractor.py index 5ef8041c..bb552c49 100644 --- a/python/pkg/cdec/sa/extractor.py +++ b/python/pkg/cdec/sa/extractor.py @@ -60,9 +60,9 @@ class GrammarExtractor:          # TODO: clean this up          extended_features = [] -        #extended_features.append(IsSupportedOnline) -        if online: -            extended_features.append(IsSupportedOnline) +        extended_features.append(IsSupportedOnline) +        #if online: +        #    extended_features.append(IsSupportedOnline)          # TODO: use @cdec.sa.features decorator for standard features too          # + add a mask to disable features @@ -101,4 +101,6 @@ class GrammarExtractor:      # Debugging      def dump_online_stats(self): -        self.factory.dump_online_stats()
\ No newline at end of file +        self.factory.dump_online_stats() +    def dump_online_rules(self): +        self.factory.dump_online_rules()
\ No newline at end of file diff --git a/python/pkg/cdec/sa/features.py b/python/pkg/cdec/sa/features.py index cede5304..49064f73 100644 --- a/python/pkg/cdec/sa/features.py +++ b/python/pkg/cdec/sa/features.py @@ -21,21 +21,21 @@ def SampleCountF(ctx): # sample c(f)      if not ctx.online:          count = 1 + ctx.fsample_count      else: -        count = 1 + ctx.fsample_count + ctx.online.fcount +        count = 1 + ctx.fsample_count + ctx.online.fsample_count      return math.log10(count)  def EgivenFCoherent(ctx): # c(e, f) / sample c(f)      if not ctx.online:          prob = ctx.paircount/ctx.fsample_count      else: -        prob = (ctx.paircount + ctx.online.paircount) / (ctx.fsample_count + ctx.online.fcount) +        prob = (ctx.paircount + ctx.online.paircount) / (ctx.fsample_count + ctx.online.fsample_count)      return -math.log10(prob) if prob > 0 else MAXSCORE  def CoherenceProb(ctx): # c(f) / sample c(f)      if not ctx.online:          prob = ctx.fcount/ctx.fsample_count      else: -        prob = (ctx.fcount + ctx.online.fcount) / (ctx.fsample_count + ctx.online.fcount) +        prob = (ctx.fcount + ctx.online.fcount) / (ctx.fsample_count + ctx.online.fsample_count)      return -math.log10(prob)  def MaxLexEgivenF(ttable): @@ -95,7 +95,7 @@ def IsFEGreaterThanZero(ctx):          count = ctx.paircount + ctx.online.paircount      return (ctx.paircount > 0.01) -def IsSupportedOnline(ctx): +def IsSupportedOnline(ctx): # Occurs in online data?      if ctx.online:          return (ctx.online.fcount > 0.01)      else:  | 
