summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPatrick Simianer <p@simianer.de>2014-04-24 18:47:24 +0200
committerPatrick Simianer <p@simianer.de>2014-04-24 18:47:24 +0200
commit4921f3e8938b5c219b0481870e1b33b291659bd5 (patch)
treeb056f5da90f81131df71650b90eb789ef42fa591
parentb08be6207a78fc3639231489b74b4c72d78221d7 (diff)
parse-stanford
-rwxr-xr-xparse-stanford.sh13
1 files changed, 13 insertions, 0 deletions
diff --git a/parse-stanford.sh b/parse-stanford.sh
new file mode 100755
index 0000000..f8d4210
--- /dev/null
+++ b/parse-stanford.sh
@@ -0,0 +1,13 @@
+#!/bin/bash
+
+if [ $# != 1 ]; then
+ echo "$0 text-file"
+ exit 1
+fi
+
+export CLASSPATH=:/toolbox/stanfordparser_3_2_0/*
+
+IN=$1
+
+cat $IN | java -server -mx25000m edu.stanford.nlp.parser.lexparser.LexicalizedParser -nthreads 8 -sentences newline -encoding utf-8 -tokenized -outputFormat "typedDependencies" -outputFormatOptions "basicDependencies" edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz - | tr '\n' '\t' | sed 's/\t\t/\n/g' | sed 's/\t/ /g' | sed 's/ *$//' | sed 's/, /,/g' > $IN.stp
+