summaryrefslogtreecommitdiff
path: root/gi
diff options
context:
space:
mode:
Diffstat (limited to 'gi')
-rwxr-xr-xgi/posterior-regularisation/prjava/train-PR-cluster.sh2
-rwxr-xr-xgi/pyp-topics/scripts/spans2labels.py11
2 files changed, 9 insertions, 4 deletions
diff --git a/gi/posterior-regularisation/prjava/train-PR-cluster.sh b/gi/posterior-regularisation/prjava/train-PR-cluster.sh
index 4d4c68d0..8298aa14 100755
--- a/gi/posterior-regularisation/prjava/train-PR-cluster.sh
+++ b/gi/posterior-regularisation/prjava/train-PR-cluster.sh
@@ -1,4 +1,4 @@
#!/bin/sh
d=`dirname $0`
-java -ea -Xmx8g -cp $d/prjava.jar:$d/lib/trove-2.0.2.jar:$d/lib/optimization.jar:$d/lib/jopt-simple-3.2.jar:$d/lib/lib/commons-math-2.1.jar phrase.Trainer $*
+java -ea -Xmx60g -cp $d/prjava.jar:$d/lib/trove-2.0.2.jar:$d/lib/optimization.jar:$d/lib/jopt-simple-3.2.jar:$d/lib/lib/commons-math-2.1.jar phrase.Trainer $*
diff --git a/gi/pyp-topics/scripts/spans2labels.py b/gi/pyp-topics/scripts/spans2labels.py
index f3968616..73ea20f2 100755
--- a/gi/pyp-topics/scripts/spans2labels.py
+++ b/gi/pyp-topics/scripts/spans2labels.py
@@ -20,6 +20,7 @@ if len(sys.argv) > 4:
assert phr in 'stb'
assert ctx in 'stb'
+print >>sys.stderr, "Loading phrase index"
phrase_context_index = {}
for line in file(sys.argv[1], 'r'):
phrase,tail= line.split('\t')
@@ -37,17 +38,20 @@ for line in file(sys.argv[1], 'r'):
features=dict([ keyval.split('=') for keyval in contexts[i+1].split()])
category = features['C']
if features.has_key('P') and float(features['P']) < threshold:
- category = cutoff_cat
+ category = cutoff_cat
phrase_context_index[(phrase,contexts[i])] = category
-# print (phrase,contexts[i]), category, prob
+ #print (phrase,contexts[i]), category
+print >>sys.stderr, "Labelling spans"
for line in sys.stdin:
line_segments = line.split('|||')
source = ['<s>' for x in range(order)] + line_segments[0].split() + ['</s>' for x in range(order)]
target = ['<s>' for x in range(order)] + line_segments[1].split() + ['</s>' for x in range(order)]
phrases = [ [int(i) for i in x.split('-')] for x in line_segments[2].split()]
+ #print >>sys.stderr, "line", source, '---', target, 'phrases', phrases
+
print "|||",
for s1,s2,t1,t2 in phrases:
@@ -85,7 +89,8 @@ for line in sys.stdin:
else:
context = contextt
- label = phrase_context_index.get((phrase,context), "<UNK>")
+ #print "%d-%d-%d-%d looking up" % (s1-order,s2-order,t1-order,t2-order), (phrase, context)
+ label = phrase_context_index.get((phrase,context), cutoff_cat)
if label != cutoff_cat: #cutoff'd spans are left unlabelled
print "%d-%d-%d-%d:X%s" % (s1-order,s2-order,t1-order,t2-order,label),
print