diff options
author | Victor Chahuneau <vchahune@cs.cmu.edu> | 2012-09-05 14:55:11 +0100 |
---|---|---|
committer | Victor Chahuneau <vchahune@cs.cmu.edu> | 2012-09-05 14:55:11 +0100 |
commit | 6fb3cc36cc4113c9f3510d87b3ae3b9c9351bf4e (patch) | |
tree | ae29f2c831037665ec39e24df0cdf2657dfadc5e /python/pkg/cdec/sa/extractor.py | |
parent | 1fd5b40da3bc9c55fd2fba03bb7fdb43eabee63c (diff) |
Expose new feature extraction API
Diffstat (limited to 'python/pkg/cdec/sa/extractor.py')
-rw-r--r-- | python/pkg/cdec/sa/extractor.py | 5 |
1 files changed, 3 insertions, 2 deletions
diff --git a/python/pkg/cdec/sa/extractor.py b/python/pkg/cdec/sa/extractor.py index 90cc4c51..89e35bf8 100644 --- a/python/pkg/cdec/sa/extractor.py +++ b/python/pkg/cdec/sa/extractor.py @@ -9,7 +9,7 @@ import cdec.sa MAX_INITIAL_SIZE = 15 class GrammarExtractor: - def __init__(self, config): + def __init__(self, config, features=None): if isinstance(config, str) or isinstance(config, unicode): if not os.path.exists(config): raise IOError('cannot read configuration from {0}'.format(config)) @@ -58,7 +58,8 @@ class GrammarExtractor: tt = cdec.sa.BiLex(from_binary=config['lex_file']) scorer = cdec.sa.Scorer(EgivenFCoherent, SampleCountF, CountEF, - MaxLexFgivenE(tt), MaxLexEgivenF(tt), IsSingletonF, IsSingletonFE) + MaxLexFgivenE(tt), MaxLexEgivenF(tt), IsSingletonF, IsSingletonFE, + *cdec.sa._SA_FEATURES) fsarray = cdec.sa.SuffixArray(from_binary=config['f_sa_file']) edarray = cdec.sa.DataArray(from_binary=config['e_file']) |