From 85fb3e7fa679101b30c6d1d5e3347b019a6c73d2 Mon Sep 17 00:00:00 2001 From: Victor Chahuneau Date: Thu, 13 Dec 2012 00:19:45 -0500 Subject: Enable loose phrase extraction parameter (default is still tight) use --loose when compiling corpus or tight_phrases = False in config --- python/pkg/cdec/sa/extractor.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'python/pkg/cdec/sa/extractor.py') diff --git a/python/pkg/cdec/sa/extractor.py b/python/pkg/cdec/sa/extractor.py index a5ce8a68..e09f79ea 100644 --- a/python/pkg/cdec/sa/extractor.py +++ b/python/pkg/cdec/sa/extractor.py @@ -10,7 +10,7 @@ MAX_INITIAL_SIZE = 15 class GrammarExtractor: def __init__(self, config, features=None): - if isinstance(config, str) or isinstance(config, unicode): + if isinstance(config, basestring): if not os.path.exists(config): raise IOError('cannot read configuration from {0}'.format(config)) config = cdec.configobj.ConfigObj(config, unrepr=True) @@ -50,8 +50,8 @@ class GrammarExtractor: train_max_initial_size=config['max_size'], # minimum span of an RHS nonterminal in a rule extracted from TRAINING DATA train_min_gap_size=config['min_gap'], - # True if phrases should be tight, False otherwise (better but slower) - tight_phrases=True, + # False if phrases should be loose (better but slower), True otherwise + tight_phrases=config.get('tight_phrases', True), ) # lexical weighting tables -- cgit v1.2.3