diff options
| author | Chris Dyer <cdyer@cs.cmu.edu> | 2012-09-20 21:51:31 -0400 | 
|---|---|---|
| committer | Chris Dyer <cdyer@cs.cmu.edu> | 2012-09-20 21:51:31 -0400 | 
| commit | 214f4714d95cb27d31ff976a11dec8a0c0eb438d (patch) | |
| tree | 0970ab16db5260f128a65d60f1dc60caf831efc5 /python/pkg/cdec/sa/extractor.py | |
| parent | 17d085055e24bf189a3b378af77e1071922893cc (diff) | |
| parent | e26edac51cc47b2b2322fbb870308daa708cec8c (diff) | |
Merge branch 'master' of https://github.com/redpony/cdec
Diffstat (limited to 'python/pkg/cdec/sa/extractor.py')
| -rw-r--r-- | python/pkg/cdec/sa/extractor.py | 13 | 
1 files changed, 9 insertions, 4 deletions
| diff --git a/python/pkg/cdec/sa/extractor.py b/python/pkg/cdec/sa/extractor.py index 89e35bf8..a5ce8a68 100644 --- a/python/pkg/cdec/sa/extractor.py +++ b/python/pkg/cdec/sa/extractor.py @@ -57,6 +57,8 @@ class GrammarExtractor:          # lexical weighting tables          tt = cdec.sa.BiLex(from_binary=config['lex_file']) +        # TODO: use @cdec.sa.features decorator for standard features too +        # + add a mask to disable features          scorer = cdec.sa.Scorer(EgivenFCoherent, SampleCountF, CountEF,               MaxLexFgivenE(tt), MaxLexEgivenF(tt), IsSingletonF, IsSingletonFE,              *cdec.sa._SA_FEATURES) @@ -69,11 +71,14 @@ class GrammarExtractor:          sampler = cdec.sa.Sampler(300, fsarray)          self.factory.configure(fsarray, edarray, sampler, scorer) +        # Initialize feature definitions with configuration +        for fn in cdec.sa._SA_CONFIGURE: +            fn(config)      def grammar(self, sentence):          if isinstance(sentence, unicode):              sentence = sentence.encode('utf8') -        cnet = chain(('<s>',), sentence.split(), ('</s>',)) -        cnet = (cdec.sa.sym_fromstring(word, terminal=True) for word in cnet) -        cnet = tuple(((word, None, 1), ) for word in cnet) -        return self.factory.input(cnet) +        words = tuple(chain(('<s>',), sentence.split(), ('</s>',))) +        meta = cdec.sa.annotate(words) +        cnet = cdec.sa.make_lattice(words) +        return self.factory.input(cnet, meta) | 
