From 899f78c7c4c3a8cff97494665ed52ddb3460d44a Mon Sep 17 00:00:00 2001 From: Victor Chahuneau Date: Thu, 21 Jun 2012 17:27:48 -0400 Subject: Allow SA rule extraction to write to a python buffer + very small sa-extract cleanup --- python/cdec/scfg/extractor.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'python') diff --git a/python/cdec/scfg/extractor.py b/python/cdec/scfg/extractor.py index 9f1e1137..0a45ddb8 100644 --- a/python/cdec/scfg/extractor.py +++ b/python/cdec/scfg/extractor.py @@ -1,5 +1,5 @@ -#!/usr/bin/env python import StringIO +from itertools import chain import clex import rulefactory @@ -9,12 +9,12 @@ import cdat import sym import log -log.level = -1 - from features import EgivenFCoherent, SampleCountF, CountEF,\ MaxLexEgivenF, MaxLexFgivenE, IsSingletonF, IsSingletonFE from features import contextless +log.level = -1 + class Output(StringIO.StringIO): def close(self): pass @@ -22,8 +22,6 @@ class Output(StringIO.StringIO): def __str__(self): return self.getvalue() -from itertools import chain - def get_cn(sentence): sentence = chain(('',), sentence.split(), ('',)) sentence = (sym.fromstring(word, terminal=True) for word in sentence) @@ -93,9 +91,11 @@ class GrammarExtractor: self.models = tuple(contextless(feature) for feature in self.models) def grammar(self, sentence): + if isinstance(sentence, unicode): + sentence = sentence.encode('utf8') out = Output() cn = get_cn(sentence) - self.factory.input_file(cn, out) + self.factory.input(cn, output=out) return str(out) def main(config): -- cgit v1.2.3