summaryrefslogtreecommitdiff
path: root/python/cdec
diff options
context:
space:
mode:
authorVictor Chahuneau <vchahune@cs.cmu.edu>2012-06-21 17:27:48 -0400
committerVictor Chahuneau <vchahune@cs.cmu.edu>2012-06-21 17:27:48 -0400
commit37bb4a4c537ea2f9bd916f3e031c759e61bf49f7 (patch)
treeb53f8696f230b9bd54032c0d2f2fedaecdba9899 /python/cdec
parent93f768b5c9a0ab54c462901e4edddacc65cb8ecf (diff)
Allow SA rule extraction to write to a python buffer
+ very small sa-extract cleanup
Diffstat (limited to 'python/cdec')
-rw-r--r--python/cdec/scfg/extractor.py12
1 files changed, 6 insertions, 6 deletions
diff --git a/python/cdec/scfg/extractor.py b/python/cdec/scfg/extractor.py
index 9f1e1137..0a45ddb8 100644
--- a/python/cdec/scfg/extractor.py
+++ b/python/cdec/scfg/extractor.py
@@ -1,5 +1,5 @@
-#!/usr/bin/env python
import StringIO
+from itertools import chain
import clex
import rulefactory
@@ -9,12 +9,12 @@ import cdat
import sym
import log
-log.level = -1
-
from features import EgivenFCoherent, SampleCountF, CountEF,\
MaxLexEgivenF, MaxLexFgivenE, IsSingletonF, IsSingletonFE
from features import contextless
+log.level = -1
+
class Output(StringIO.StringIO):
def close(self):
pass
@@ -22,8 +22,6 @@ class Output(StringIO.StringIO):
def __str__(self):
return self.getvalue()
-from itertools import chain
-
def get_cn(sentence):
sentence = chain(('<s>',), sentence.split(), ('</s>',))
sentence = (sym.fromstring(word, terminal=True) for word in sentence)
@@ -93,9 +91,11 @@ class GrammarExtractor:
self.models = tuple(contextless(feature) for feature in self.models)
def grammar(self, sentence):
+ if isinstance(sentence, unicode):
+ sentence = sentence.encode('utf8')
out = Output()
cn = get_cn(sentence)
- self.factory.input_file(cn, out)
+ self.factory.input(cn, output=out)
return str(out)
def main(config):