summaryrefslogtreecommitdiff
path: root/python/pkg/cdec
diff options
context:
space:
mode:
authorVictor Chahuneau <vchahune@cs.cmu.edu>2012-09-05 19:17:29 +0100
committerVictor Chahuneau <vchahune@cs.cmu.edu>2012-09-05 19:17:29 +0100
commitb939bff222736e87fa234c2835511cc29fce644f (patch)
tree869aeabb86e5ce4ab4f9bdb7924f3c6233e1a051 /python/pkg/cdec
parentc6b35eff2537f0b07ceb9aca499e8f76b3d33710 (diff)
Revert to the "old style" pair count...
+ API naming fixes + Multiple feature definition files can be passed to the extractor
Diffstat (limited to 'python/pkg/cdec')
-rw-r--r--python/pkg/cdec/sa/extract.py23
-rw-r--r--python/pkg/cdec/sa/extractor.py2
2 files changed, 15 insertions, 10 deletions
diff --git a/python/pkg/cdec/sa/extract.py b/python/pkg/cdec/sa/extract.py
index 472f128b..3136c5a7 100644
--- a/python/pkg/cdec/sa/extract.py
+++ b/python/pkg/cdec/sa/extract.py
@@ -11,16 +11,17 @@ extractor, prefix = None, None
def make_extractor(config, grammars, features):
global extractor, prefix
signal.signal(signal.SIGINT, signal.SIG_IGN) # Let parent process catch Ctrl+C
- if features: load_features(features)
+ load_features(features)
extractor = cdec.sa.GrammarExtractor(config)
prefix = grammars
def load_features(features):
- logging.info('Loading additional feature definitions from %s', features)
- prefix = os.path.dirname(features)
- sys.path.append(prefix)
- __import__(os.path.basename(features).replace('.py', ''))
- sys.path.remove(prefix)
+ for featdef in features:
+ logging.info('Loading additional feature definitions from %s', featdef)
+ prefix = os.path.dirname(featdef)
+ sys.path.append(prefix)
+ __import__(os.path.basename(featdef).replace('.py', ''))
+ sys.path.remove(prefix)
def extract(inp):
global extractor, prefix
@@ -44,15 +45,17 @@ def main():
help='number of parallel extractors')
parser.add_argument('-s', '--chunksize', type=int, default=10,
help='number of sentences / chunk')
- parser.add_argument('-f', '--features', type=str, default=None,
+ parser.add_argument('-f', '--features', action='append',
help='additional feature definitions')
args = parser.parse_args()
if not os.path.exists(args.grammars):
os.mkdir(args.grammars)
- if not (args.features is None or args.features.endswith('.py')):
- sys.stderr.write('Error: feature definition file should be a python module\n')
- sys.exit(1)
+ for featdef in args.features:
+ if not featdef.endswith('.py'):
+ sys.stderr.write('Error: feature definition file <{0}>'
+ ' should be a python module\n'.format(featdef))
+ sys.exit(1)
if args.jobs > 1:
logging.info('Starting %d workers; chunk size: %d', args.jobs, args.chunksize)
diff --git a/python/pkg/cdec/sa/extractor.py b/python/pkg/cdec/sa/extractor.py
index 89e35bf8..940544fb 100644
--- a/python/pkg/cdec/sa/extractor.py
+++ b/python/pkg/cdec/sa/extractor.py
@@ -57,6 +57,8 @@ class GrammarExtractor:
# lexical weighting tables
tt = cdec.sa.BiLex(from_binary=config['lex_file'])
+ # TODO: use @cdec.sa.features decorator for standard features too
+ # + add a mask to disable features
scorer = cdec.sa.Scorer(EgivenFCoherent, SampleCountF, CountEF,
MaxLexFgivenE(tt), MaxLexEgivenF(tt), IsSingletonF, IsSingletonFE,
*cdec.sa._SA_FEATURES)