diff options
author | Victor Chahuneau <vchahune@cs.cmu.edu> | 2012-06-21 17:27:48 -0400 |
---|---|---|
committer | Victor Chahuneau <vchahune@cs.cmu.edu> | 2012-06-21 17:27:48 -0400 |
commit | 899f78c7c4c3a8cff97494665ed52ddb3460d44a (patch) | |
tree | 719a46d4b832572eb6e4501328b13057a1295a17 /python/cdec | |
parent | f3fef50a89e8e88da39e3c7000310c9e319d5cfc (diff) |
Allow SA rule extraction to write to a python buffer
+ very small sa-extract cleanup
Diffstat (limited to 'python/cdec')
-rw-r--r-- | python/cdec/scfg/extractor.py | 12 |
1 files changed, 6 insertions, 6 deletions
diff --git a/python/cdec/scfg/extractor.py b/python/cdec/scfg/extractor.py index 9f1e1137..0a45ddb8 100644 --- a/python/cdec/scfg/extractor.py +++ b/python/cdec/scfg/extractor.py @@ -1,5 +1,5 @@ -#!/usr/bin/env python import StringIO +from itertools import chain import clex import rulefactory @@ -9,12 +9,12 @@ import cdat import sym import log -log.level = -1 - from features import EgivenFCoherent, SampleCountF, CountEF,\ MaxLexEgivenF, MaxLexFgivenE, IsSingletonF, IsSingletonFE from features import contextless +log.level = -1 + class Output(StringIO.StringIO): def close(self): pass @@ -22,8 +22,6 @@ class Output(StringIO.StringIO): def __str__(self): return self.getvalue() -from itertools import chain - def get_cn(sentence): sentence = chain(('<s>',), sentence.split(), ('</s>',)) sentence = (sym.fromstring(word, terminal=True) for word in sentence) @@ -93,9 +91,11 @@ class GrammarExtractor: self.models = tuple(contextless(feature) for feature in self.models) def grammar(self, sentence): + if isinstance(sentence, unicode): + sentence = sentence.encode('utf8') out = Output() cn = get_cn(sentence) - self.factory.input_file(cn, out) + self.factory.input(cn, output=out) return str(out) def main(config): |