summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authortrevor.cohn <trevor.cohn@ec762483-ff6d-05da-a07a-a48fb63a330f>2010-07-01 01:30:16 +0000
committertrevor.cohn <trevor.cohn@ec762483-ff6d-05da-a07a-a48fb63a330f>2010-07-01 01:30:16 +0000
commit6ea9bd3aa600bce5224fa97dae79bee6f40699a2 (patch)
treec4749b21990697bdd0533dab01c18611e6cc5a90
parenteb9fc0e76ff9708783bd9483a68e28d70fb2bcd2 (diff)
Verified that the phrase-tag L1Lmax constraints can be optimised independently for each phrase.
git-svn-id: https://ws10smt.googlecode.com/svn/trunk@86 ec762483-ff6d-05da-a07a-a48fb63a330f
-rw-r--r--gi/posterior-regularisation/train_pr_parallel.py18
1 files changed, 13 insertions, 5 deletions
diff --git a/gi/posterior-regularisation/train_pr_parallel.py b/gi/posterior-regularisation/train_pr_parallel.py
index 72c5cf25..d5df87b5 100644
--- a/gi/posterior-regularisation/train_pr_parallel.py
+++ b/gi/posterior-regularisation/train_pr_parallel.py
@@ -1,7 +1,7 @@
import sys
import scipy.optimize
from numpy import *
-from numpy.random import random
+from numpy.random import random, seed
#
# Step 1: load the concordance counts
@@ -48,6 +48,10 @@ num_types = len(types)
num_phrases = len(edges_phrase_to_context)
num_contexts = len(edges_context_to_phrase)
delta = float(sys.argv[1])
+assert sys.argv[2] in ('local', 'global')
+local = sys.argv[2] == 'local'
+if len(sys.argv) >= 2:
+ seed(int(sys.argv[3]))
def normalise(a):
return a / float(sum(a))
@@ -267,7 +271,7 @@ for iteration in range(20):
# E-step
llh = kl = l1lmax = 0
- if False:
+ if local:
for p in range(num_phrases):
o = LocalDualObjective(p, delta)
#print '\toptimising lambda for phrase', p, '=', edges_phrase_to_context[p][0]
@@ -284,18 +288,22 @@ for iteration in range(20):
for i in range(4):
for t in range(num_tags):
contextWordCounts[i][t][types[context[i]]] += count * o.q[j,t]
+
+ #print 'iteration', iteration, 'LOCAL objective', (llh + kl + delta * l1lmax), 'llh', llh, 'kl', kl, 'l1lmax', l1lmax
else:
o = GlobalDualObjective(delta)
obj = o.optimize()
llh, kl, l1lmax = o.optimize()
+ index = 0
for p, (phrase, edges) in enumerate(edges_phrase_to_context):
- for j, (context, count) in enumerate(edges):
+ for context, count in edges:
for t in range(num_tags):
- tagCounts[p][t] += count * o.q[j,t]
+ tagCounts[p][t] += count * o.q[index,t]
for i in range(4):
for t in range(num_tags):
- contextWordCounts[i][t][types[context[i]]] += count * o.q[j,t]
+ contextWordCounts[i][t][types[context[i]]] += count * o.q[index,t]
+ index += 1
print 'iteration', iteration, 'objective', (llh + kl + delta * l1lmax), 'llh', llh, 'kl', kl, 'l1lmax', l1lmax