summaryrefslogtreecommitdiff
path: root/python/pkg/cdec/sa/extractor.py
diff options
context:
space:
mode:
authorChris Dyer <cdyer@cs.cmu.edu>2012-09-20 21:51:31 -0400
committerChris Dyer <cdyer@cs.cmu.edu>2012-09-20 21:51:31 -0400
commitd2bc8694e5450a46c6f851d926c1ebfeb3424cbf (patch)
tree5619896999d43ca478acee0da2b1c60244aab5b1 /python/pkg/cdec/sa/extractor.py
parent78518f1f417616633b300a361cd5e0c1bcb1ff24 (diff)
parent5d159b948ad71850bcb03d0882ea7183a3a59b7e (diff)
Merge branch 'master' of https://github.com/redpony/cdec
Diffstat (limited to 'python/pkg/cdec/sa/extractor.py')
-rw-r--r--python/pkg/cdec/sa/extractor.py13
1 files changed, 9 insertions, 4 deletions
diff --git a/python/pkg/cdec/sa/extractor.py b/python/pkg/cdec/sa/extractor.py
index 89e35bf8..a5ce8a68 100644
--- a/python/pkg/cdec/sa/extractor.py
+++ b/python/pkg/cdec/sa/extractor.py
@@ -57,6 +57,8 @@ class GrammarExtractor:
# lexical weighting tables
tt = cdec.sa.BiLex(from_binary=config['lex_file'])
+ # TODO: use @cdec.sa.features decorator for standard features too
+ # + add a mask to disable features
scorer = cdec.sa.Scorer(EgivenFCoherent, SampleCountF, CountEF,
MaxLexFgivenE(tt), MaxLexEgivenF(tt), IsSingletonF, IsSingletonFE,
*cdec.sa._SA_FEATURES)
@@ -69,11 +71,14 @@ class GrammarExtractor:
sampler = cdec.sa.Sampler(300, fsarray)
self.factory.configure(fsarray, edarray, sampler, scorer)
+ # Initialize feature definitions with configuration
+ for fn in cdec.sa._SA_CONFIGURE:
+ fn(config)
def grammar(self, sentence):
if isinstance(sentence, unicode):
sentence = sentence.encode('utf8')
- cnet = chain(('<s>',), sentence.split(), ('</s>',))
- cnet = (cdec.sa.sym_fromstring(word, terminal=True) for word in cnet)
- cnet = tuple(((word, None, 1), ) for word in cnet)
- return self.factory.input(cnet)
+ words = tuple(chain(('<s>',), sentence.split(), ('</s>',)))
+ meta = cdec.sa.annotate(words)
+ cnet = cdec.sa.make_lattice(words)
+ return self.factory.input(cnet, meta)