summaryrefslogtreecommitdiff
path: root/python/pkg/cdec/sa/extractor.py
diff options
context:
space:
mode:
authorVictor Chahuneau <vchahune@cs.cmu.edu>2012-09-05 14:55:11 +0100
committerVictor Chahuneau <vchahune@cs.cmu.edu>2012-09-05 14:55:11 +0100
commit6fb3cc36cc4113c9f3510d87b3ae3b9c9351bf4e (patch)
treeae29f2c831037665ec39e24df0cdf2657dfadc5e /python/pkg/cdec/sa/extractor.py
parent1fd5b40da3bc9c55fd2fba03bb7fdb43eabee63c (diff)
Expose new feature extraction API
Diffstat (limited to 'python/pkg/cdec/sa/extractor.py')
-rw-r--r--python/pkg/cdec/sa/extractor.py5
1 files changed, 3 insertions, 2 deletions
diff --git a/python/pkg/cdec/sa/extractor.py b/python/pkg/cdec/sa/extractor.py
index 90cc4c51..89e35bf8 100644
--- a/python/pkg/cdec/sa/extractor.py
+++ b/python/pkg/cdec/sa/extractor.py
@@ -9,7 +9,7 @@ import cdec.sa
MAX_INITIAL_SIZE = 15
class GrammarExtractor:
- def __init__(self, config):
+ def __init__(self, config, features=None):
if isinstance(config, str) or isinstance(config, unicode):
if not os.path.exists(config):
raise IOError('cannot read configuration from {0}'.format(config))
@@ -58,7 +58,8 @@ class GrammarExtractor:
tt = cdec.sa.BiLex(from_binary=config['lex_file'])
scorer = cdec.sa.Scorer(EgivenFCoherent, SampleCountF, CountEF,
- MaxLexFgivenE(tt), MaxLexEgivenF(tt), IsSingletonF, IsSingletonFE)
+ MaxLexFgivenE(tt), MaxLexEgivenF(tt), IsSingletonF, IsSingletonFE,
+ *cdec.sa._SA_FEATURES)
fsarray = cdec.sa.SuffixArray(from_binary=config['f_sa_file'])
edarray = cdec.sa.DataArray(from_binary=config['e_file'])