diff options
author | Michael Denkowski <mdenkows@cs.cmu.edu> | 2014-03-07 01:52:09 -0800 |
---|---|---|
committer | Michael Denkowski <mdenkows@cs.cmu.edu> | 2014-03-07 01:52:09 -0800 |
commit | abcd6865a25944a1cd07c9224db2fd7a729f02e6 (patch) | |
tree | 5fd9e5298bd55f91a497c5bd46c35a3414aa5c5c /python/cdec/sa/extractor.py | |
parent | a49f3a5b19547e7e46a652b22fab601da8fc210f (diff) |
More online bilex updates
Diffstat (limited to 'python/cdec/sa/extractor.py')
-rw-r--r-- | python/cdec/sa/extractor.py | 10 |
1 files changed, 5 insertions, 5 deletions
diff --git a/python/cdec/sa/extractor.py b/python/cdec/sa/extractor.py index 25313cc3..c2ded1d6 100644 --- a/python/cdec/sa/extractor.py +++ b/python/cdec/sa/extractor.py @@ -4,7 +4,7 @@ import cdec.configobj from cdec.sa._sa import gzip_or_text from cdec.sa.features import EgivenFCoherent, SampleCountF, CountEF,\ MaxLexEgivenF, MaxLexFgivenE, IsSingletonF, IsSingletonFE,\ - IsSupportedOnline, CountExceptLM + IsSupportedOnline import cdec.sa # maximum span of a grammar rule in TEST DATA @@ -57,16 +57,16 @@ class GrammarExtractor: ) # lexical weighting tables - tt = cdec.sa.BiLex(from_binary=config['lex_file']) + if not online: + tt = cdec.sa.BiLex(from_binary=config['lex_file']) + else: + tt = cdec.sa.online.Bilex(config['bilex_file']) # TODO: clean this up # Load data and add features for online grammar extraction extended_features = [] if online: extended_features.append(IsSupportedOnline) - vocab_file = config['vocab_file'] - vcb_set = set(line.strip() for line in gzip_or_text(vocab)) - extended_features.append(CountExceptLM(vcb_set)) # TODO: use @cdec.sa.features decorator for standard features too # + add a mask to disable features |