diff options
author | Patrick Simianer <p@simianer.de> | 2014-04-24 18:47:24 +0200 |
---|---|---|
committer | Patrick Simianer <p@simianer.de> | 2014-04-24 18:47:24 +0200 |
commit | 4921f3e8938b5c219b0481870e1b33b291659bd5 (patch) | |
tree | b056f5da90f81131df71650b90eb789ef42fa591 | |
parent | b08be6207a78fc3639231489b74b4c72d78221d7 (diff) |
parse-stanford
-rwxr-xr-x | parse-stanford.sh | 13 |
1 files changed, 13 insertions, 0 deletions
diff --git a/parse-stanford.sh b/parse-stanford.sh new file mode 100755 index 0000000..f8d4210 --- /dev/null +++ b/parse-stanford.sh @@ -0,0 +1,13 @@ +#!/bin/bash + +if [ $# != 1 ]; then + echo "$0 text-file" + exit 1 +fi + +export CLASSPATH=:/toolbox/stanfordparser_3_2_0/* + +IN=$1 + +cat $IN | java -server -mx25000m edu.stanford.nlp.parser.lexparser.LexicalizedParser -nthreads 8 -sentences newline -encoding utf-8 -tokenized -outputFormat "typedDependencies" -outputFormatOptions "basicDependencies" edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz - | tr '\n' '\t' | sed 's/\t\t/\n/g' | sed 's/\t/ /g' | sed 's/ *$//' | sed 's/, /,/g' > $IN.stp + |