From 1cef9b6842fec7598a0a0571f69bf4caab8e4c91 Mon Sep 17 00:00:00 2001 From: Victor Chahuneau Date: Thu, 6 Sep 2012 17:46:41 +0100 Subject: [cdec.sa] Allow sentence annotation and initial configuration --- python/pkg/cdec/sa/__init__.py | 14 ++++++++++++++ python/pkg/cdec/sa/extractor.py | 8 ++++++-- 2 files changed, 20 insertions(+), 2 deletions(-) (limited to 'python/pkg/cdec') diff --git a/python/pkg/cdec/sa/__init__.py b/python/pkg/cdec/sa/__init__.py index d4b94484..e0a344b7 100644 --- a/python/pkg/cdec/sa/__init__.py +++ b/python/pkg/cdec/sa/__init__.py @@ -4,7 +4,21 @@ from cdec.sa._sa import make_lattice, decode_lattice, decode_sentence,\ from cdec.sa.extractor import GrammarExtractor _SA_FEATURES = [] +_SA_ANNOTATORS = {} +_SA_CONFIGURE = [] def feature(fn): _SA_FEATURES.append(fn) return fn + +def annotator(fn): + _SA_ANNOTATORS[fn.__name__] = fn + +def annotate(sentence): + meta = {} + for name, fn in _SA_ANNOTATORS.iteritems(): + meta[name] = fn(sentence) + return meta + +def configure(fn): + _SA_CONFIGURE.append(fn) diff --git a/python/pkg/cdec/sa/extractor.py b/python/pkg/cdec/sa/extractor.py index 94392c30..a5ce8a68 100644 --- a/python/pkg/cdec/sa/extractor.py +++ b/python/pkg/cdec/sa/extractor.py @@ -71,10 +71,14 @@ class GrammarExtractor: sampler = cdec.sa.Sampler(300, fsarray) self.factory.configure(fsarray, edarray, sampler, scorer) + # Initialize feature definitions with configuration + for fn in cdec.sa._SA_CONFIGURE: + fn(config) def grammar(self, sentence): if isinstance(sentence, unicode): sentence = sentence.encode('utf8') - words = chain(('',), sentence.split(), ('',)) + words = tuple(chain(('',), sentence.split(), ('',))) + meta = cdec.sa.annotate(words) cnet = cdec.sa.make_lattice(words) - return self.factory.input(cnet) + return self.factory.input(cnet, meta) -- cgit v1.2.3