summaryrefslogtreecommitdiff
path: root/extools/simple-extract.sh
blob: ec5c5276a3900aec2342979755e4e349ab5ef500 (plain)
1
2
3
4
5
6
7
8
9
10
11
#!/bin/bash

export LANG=C
date
./extractor -i $1 -d X -c 500000 -L 12 -b | sort -t $'\t' -k 1 | gzip > ex.output.gz
date
# -p = compute phrase marginals
# -b = bidirectional rules (starting with F or E) were extracted
zcat ex.output.gz | ./mr_stripe_rule_reduce -p -b | sort -t $'\t' -k 1 | ./mr_stripe_rule_reduce | gzip > phrase-table.gz
date