diff options
author | Victor Chahuneau <vchahune@cs.cmu.edu> | 2012-12-13 00:19:45 -0500 |
---|---|---|
committer | Victor Chahuneau <vchahune@cs.cmu.edu> | 2012-12-13 00:19:45 -0500 |
commit | 17bef03f04cf4decc41d477ec015886663120477 (patch) | |
tree | 133608067d12053183c7b277d0e5dc1b433eef2b /python/pkg/cdec/sa/compile.py | |
parent | 63221793f35c393eb1f5fd8d7f95cc335ab8e9e6 (diff) |
Enable loose phrase extraction parameter
(default is still tight)
use --loose when compiling corpus
or tight_phrases = False in config
Diffstat (limited to 'python/pkg/cdec/sa/compile.py')
-rw-r--r-- | python/pkg/cdec/sa/compile.py | 8 |
1 files changed, 6 insertions, 2 deletions
diff --git a/python/pkg/cdec/sa/compile.py b/python/pkg/cdec/sa/compile.py index 393c72a4..c0402761 100644 --- a/python/pkg/cdec/sa/compile.py +++ b/python/pkg/cdec/sa/compile.py @@ -35,6 +35,8 @@ def main(): help='Number of pre-computed frequent patterns') parser.add_argument('--rank2', '-r2', type=int, default=10, help='Number of pre-computed super-frequent patterns)') + parser.add_argument('--loose', action='store_true', + help='Enable loose phrase extraction (default: tight)') parser.add_argument('-c', '--config', default='/dev/stdout', help='Output configuration') parser.add_argument('-f', '--source', @@ -53,8 +55,10 @@ def main(): parser.error('a parallel corpus is required\n' '\tuse -f (source) with -e (target) or -b (bitext)') - param_names = ("max_len", "max_nt", "max_size", "min_gap", "rank1", "rank2") - params = (args.maxlen, args.maxnt, args.maxsize, args.mingap, args.rank1, args.rank2) + param_names = ('max_len', 'max_nt', 'max_size', 'min_gap', + 'rank1', 'rank2', 'tight_phrases') + params = (args.maxlen, args.maxnt, args.maxsize, args.mingap, + args.rank1, args.rank2, not args.loose) if not os.path.exists(args.output): os.mkdir(args.output) |