diff options
Diffstat (limited to 'python/cdec/sa/extractor.py')
-rw-r--r-- | python/cdec/sa/extractor.py | 10 |
1 files changed, 5 insertions, 5 deletions
diff --git a/python/cdec/sa/extractor.py b/python/cdec/sa/extractor.py index 25313cc3..c2ded1d6 100644 --- a/python/cdec/sa/extractor.py +++ b/python/cdec/sa/extractor.py @@ -4,7 +4,7 @@ import cdec.configobj from cdec.sa._sa import gzip_or_text from cdec.sa.features import EgivenFCoherent, SampleCountF, CountEF,\ MaxLexEgivenF, MaxLexFgivenE, IsSingletonF, IsSingletonFE,\ - IsSupportedOnline, CountExceptLM + IsSupportedOnline import cdec.sa # maximum span of a grammar rule in TEST DATA @@ -57,16 +57,16 @@ class GrammarExtractor: ) # lexical weighting tables - tt = cdec.sa.BiLex(from_binary=config['lex_file']) + if not online: + tt = cdec.sa.BiLex(from_binary=config['lex_file']) + else: + tt = cdec.sa.online.Bilex(config['bilex_file']) # TODO: clean this up # Load data and add features for online grammar extraction extended_features = [] if online: extended_features.append(IsSupportedOnline) - vocab_file = config['vocab_file'] - vcb_set = set(line.strip() for line in gzip_or_text(vocab)) - extended_features.append(CountExceptLM(vcb_set)) # TODO: use @cdec.sa.features decorator for standard features too # + add a mask to disable features |