From 4921f3e8938b5c219b0481870e1b33b291659bd5 Mon Sep 17 00:00:00 2001 From: Patrick Simianer
Date: Thu, 24 Apr 2014 18:47:24 +0200 Subject: parse-stanford --- parse-stanford.sh | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100755 parse-stanford.sh diff --git a/parse-stanford.sh b/parse-stanford.sh new file mode 100755 index 0000000..f8d4210 --- /dev/null +++ b/parse-stanford.sh @@ -0,0 +1,13 @@ +#!/bin/bash + +if [ $# != 1 ]; then + echo "$0 text-file" + exit 1 +fi + +export CLASSPATH=:/toolbox/stanfordparser_3_2_0/* + +IN=$1 + +cat $IN | java -server -mx25000m edu.stanford.nlp.parser.lexparser.LexicalizedParser -nthreads 8 -sentences newline -encoding utf-8 -tokenized -outputFormat "typedDependencies" -outputFormatOptions "basicDependencies" edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz - | tr '\n' '\t' | sed 's/\t\t/\n/g' | sed 's/\t/ /g' | sed 's/ *$//' | sed 's/, /,/g' > $IN.stp + -- cgit v1.2.3