summaryrefslogtreecommitdiff
path: root/extools/simple-extract.sh
blob: 7d9f439d82bd35656a4d5d96813ace14bfab1059 (plain)
1
2
3
4
5
6
7
8
9
10
11
#!/bin/bash

export LANG=C
date
./extractor -i $1 -d X -c 500000 -L 12 -b | sort -t $'\t' -k 1 | gzip > ex.output.gz
date
# -p = compute phrase marginals
# -b = bidirectional rules (starting with F or E) were extracted
gzcat ex.output.gz | ./mr_stripe_rule_reduce -p -b | sort -t $'\t' -k 1 | ./mr_stripe_rule_reduce | gzip > phrase-table.gz
date