diff options
Diffstat (limited to 'python/cdec')
-rw-r--r-- | python/cdec/scfg/extractor.py | 12 |
1 files changed, 6 insertions, 6 deletions
diff --git a/python/cdec/scfg/extractor.py b/python/cdec/scfg/extractor.py index 9f1e1137..0a45ddb8 100644 --- a/python/cdec/scfg/extractor.py +++ b/python/cdec/scfg/extractor.py @@ -1,5 +1,5 @@ -#!/usr/bin/env python import StringIO +from itertools import chain import clex import rulefactory @@ -9,12 +9,12 @@ import cdat import sym import log -log.level = -1 - from features import EgivenFCoherent, SampleCountF, CountEF,\ MaxLexEgivenF, MaxLexFgivenE, IsSingletonF, IsSingletonFE from features import contextless +log.level = -1 + class Output(StringIO.StringIO): def close(self): pass @@ -22,8 +22,6 @@ class Output(StringIO.StringIO): def __str__(self): return self.getvalue() -from itertools import chain - def get_cn(sentence): sentence = chain(('<s>',), sentence.split(), ('</s>',)) sentence = (sym.fromstring(word, terminal=True) for word in sentence) @@ -93,9 +91,11 @@ class GrammarExtractor: self.models = tuple(contextless(feature) for feature in self.models) def grammar(self, sentence): + if isinstance(sentence, unicode): + sentence = sentence.encode('utf8') out = Output() cn = get_cn(sentence) - self.factory.input_file(cn, out) + self.factory.input(cn, output=out) return str(out) def main(config): |