diff options
author | Avneesh Saluja <asaluja@gmail.com> | 2013-03-28 18:28:16 -0700 |
---|---|---|
committer | Avneesh Saluja <asaluja@gmail.com> | 2013-03-28 18:28:16 -0700 |
commit | 3d8d656fa7911524e0e6885647173474524e0784 (patch) | |
tree | 81b1ee2fcb67980376d03f0aa48e42e53abff222 /python/pkg/cdec/sa/compile.py | |
parent | be7f57fdd484e063775d7abf083b9fa4c403b610 (diff) | |
parent | 96fedabebafe7a38a6d5928be8fff767e411d705 (diff) |
fixed conflicts
Diffstat (limited to 'python/pkg/cdec/sa/compile.py')
-rw-r--r-- | python/pkg/cdec/sa/compile.py | 13 |
1 files changed, 10 insertions, 3 deletions
diff --git a/python/pkg/cdec/sa/compile.py b/python/pkg/cdec/sa/compile.py index 393c72a4..ce249c0f 100644 --- a/python/pkg/cdec/sa/compile.py +++ b/python/pkg/cdec/sa/compile.py @@ -4,9 +4,10 @@ import os import logging import cdec.configobj import cdec.sa +import sys MAX_PHRASE_LENGTH = 4 -def precompute(f_sa, max_len, max_nt, max_size, min_gap, rank1, rank2): +def precompute(f_sa, max_len, max_nt, max_size, min_gap, rank1, rank2, tight_phrases): lcp = cdec.sa.LCP(f_sa) stats = sorted(lcp.compute_stats(MAX_PHRASE_LENGTH), reverse=True) precomp = cdec.sa.Precomputation(from_stats=stats, @@ -20,6 +21,8 @@ def precompute(f_sa, max_len, max_nt, max_size, min_gap, rank1, rank2): return precomp def main(): + sys.setrecursionlimit(sys.getrecursionlimit() * 100) + logging.basicConfig(level=logging.INFO) logger = logging.getLogger('cdec.sa.compile') parser = argparse.ArgumentParser(description='Compile a corpus into a suffix array.') @@ -35,6 +38,8 @@ def main(): help='Number of pre-computed frequent patterns') parser.add_argument('--rank2', '-r2', type=int, default=10, help='Number of pre-computed super-frequent patterns)') + parser.add_argument('--loose', action='store_true', + help='Enable loose phrase extraction (default: tight)') parser.add_argument('-c', '--config', default='/dev/stdout', help='Output configuration') parser.add_argument('-f', '--source', @@ -53,8 +58,10 @@ def main(): parser.error('a parallel corpus is required\n' '\tuse -f (source) with -e (target) or -b (bitext)') - param_names = ("max_len", "max_nt", "max_size", "min_gap", "rank1", "rank2") - params = (args.maxlen, args.maxnt, args.maxsize, args.mingap, args.rank1, args.rank2) + param_names = ('max_len', 'max_nt', 'max_size', 'min_gap', + 'rank1', 'rank2', 'tight_phrases') + params = (args.maxlen, args.maxnt, args.maxsize, args.mingap, + args.rank1, args.rank2, not args.loose) if not os.path.exists(args.output): os.mkdir(args.output) |