From ab3534c45f463e541f3baf05006a50b64e3bbe31 Mon Sep 17 00:00:00 2001 From: "trevor.cohn" Date: Mon, 28 Jun 2010 19:34:58 +0000 Subject: First bits of code for PR training git-svn-id: https://ws10smt.googlecode.com/svn/trunk@44 ec762483-ff6d-05da-a07a-a48fb63a330f --- gi/posterior-regularisation/README | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 gi/posterior-regularisation/README (limited to 'gi/posterior-regularisation/README') diff --git a/gi/posterior-regularisation/README b/gi/posterior-regularisation/README new file mode 100644 index 00000000..a3d54ffc --- /dev/null +++ b/gi/posterior-regularisation/README @@ -0,0 +1,3 @@ + 557 ./cdec_extools/extractor -i btec/split.zh-en.al -c 500000 -L 12 -C | sort -t $'\t' -k 1 | ./cdec_extools/mr_stripe_rule_reduce > btec.concordance + 559 wc -l btec.concordance + 588 cat btec.concordance | sed 's/.* //' | awk '{ for (i=1; i < NF; i++) { x=substr($i, 1, 2); if (x == "C=") printf "\n"; else if (x != "||") printf "%s ", $i; }; printf "\n"; }' | sort | uniq | wc -l -- cgit v1.2.3