From abba3dc169341e2179c57c76d0f07c8c125b770c Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Tue, 2 Oct 2012 01:16:32 -0400 Subject: note to self --- python/README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/python/README.md b/python/README.md index da9f9387..bea6190a 100644 --- a/python/README.md +++ b/python/README.md @@ -12,6 +12,10 @@ Compile a parallel corpus and a word alignment into a suffix array representatio python -m cdec.sa.compile -f f.txt -e e.txt -a a.txt -o output/ -c extract.ini +Or, if your parallel corpus is in a single-file format (with source and target sentences on a single line, separated by a triple pipe `|||`), use: + + python -m cdec.sa.compile -b f-e.txt -a a.txt -o output/ -c extract.ini + Extract grammar rules from the compiled corpus: cat input.txt | python -m cdec.sa.extract -c extract.ini -g grammars/ -- cgit v1.2.3