summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authortrevor.cohn <trevor.cohn@ec762483-ff6d-05da-a07a-a48fb63a330f>2010-06-29 17:32:04 +0000
committertrevor.cohn <trevor.cohn@ec762483-ff6d-05da-a07a-a48fb63a330f>2010-06-29 17:32:04 +0000
commite4da0d2b736169935452018bc77b5590b64a46e5 (patch)
treeb9e7ff76e45aa93d50d0f64d2acedbab33f2a436
parent1a1f3094010bd85acbfefe64743ebde04f0b41a4 (diff)
Fixed context-based constraints
git-svn-id: https://ws10smt.googlecode.com/svn/trunk@57 ec762483-ff6d-05da-a07a-a48fb63a330f
-rw-r--r--gi/posterior-regularisation/train_pr_global.py117
1 files changed, 66 insertions, 51 deletions
diff --git a/gi/posterior-regularisation/train_pr_global.py b/gi/posterior-regularisation/train_pr_global.py
index 467069ef..da32fa18 100644
--- a/gi/posterior-regularisation/train_pr_global.py
+++ b/gi/posterior-regularisation/train_pr_global.py
@@ -40,6 +40,8 @@ print 'Read in', num_edges, 'edges and', len(types), 'word types'
num_tags = 5
num_types = len(types)
+num_phrases = len(edges_phrase_to_context)
+num_contexts = len(edges_context_to_phrase)
delta = int(sys.argv[1])
gamma = int(sys.argv[2])
@@ -51,7 +53,8 @@ tagDist = normalise(random(num_tags)+1)
# Pr(context at pos i = w | tag) indexed by i, tag, word
contextWordDist = [[normalise(random(num_types)+1) for t in range(num_tags)] for i in range(4)]
# PR langrange multipliers
-lamba = zeros(num_edges * num_tags)
+lamba = zeros(2 * num_edges * num_tags)
+omega_offset = num_edges * num_tags
lamba_index = {}
next = 0
for phrase, ccs in edges_phrase_to_context.items():
@@ -88,7 +91,8 @@ for iteration in range(20):
local_z = 0
for t in range(num_tags):
- local_z += conditionals[t] * exp(-ls[lamba_index[phrase,context] + t])
+ li = lamba_index[phrase,context] + t
+ local_z += conditionals[t] * exp(-ls[li] - ls[omega_offset+li])
logz += log(local_z) * count
#print 'ls', ls
@@ -96,12 +100,8 @@ for iteration in range(20):
#print 'dual', logz
return logz
- def primal(ls):
- # FIXME: returns negative values for KL (impossible)
- logz = dual(ls)
- kl = -logz
-
- expectations = zeros(lamba.shape)
+ def loglikelihood():
+ llh = 0
for phrase, ccs in edges_phrase_to_context.items():
for context, count in ccs.items():
conditionals = zeros(num_tags)
@@ -111,17 +111,15 @@ for iteration in range(20):
prob *= contextWordDist[i][t][types[context[i]]]
conditionals[t] = prob
cz = sum(conditionals)
- conditionals /= cz
+ llh += log(cz) * count
+ return llh
- scores = zeros(num_tags)
- for t in range(num_tags):
- scores[t] = conditionals[t] * exp(-ls[lamba_index[phrase,context] + t])
- local_z = sum(scores)
-
- for t in range(num_tags):
- li = lamba_index[phrase,context] + t
- expectations[li] = scores[t] / local_z * count
- kl -= expectations[li] * ls[li]
+ def primal(ls):
+ # FIXME: returns negative values for KL (impossible)
+ logz = dual(ls)
+ expectations = -dual_deriv(ls)
+ kl = -logz - dot(ls, expectations)
+ llh = loglikelihood()
pt_l1linf = 0
for phrase, ccs in edges_phrase_to_context.items():
@@ -143,11 +141,11 @@ for iteration in range(20):
if s > best: best = s
ct_l1linf += best
- return kl, pt_l1linf, ct_l1linf, kl + delta * pt_l1linf + gamma * ct_l1linf
+ return llh, kl, pt_l1linf, ct_l1linf, llh + kl + delta * pt_l1linf + gamma * ct_l1linf
def dual_deriv(ls):
# d/dl log(z) = E_q[phi]
- deriv = zeros(num_edges * num_tags)
+ deriv = zeros(2 * num_edges * num_tags)
for phrase, ccs in edges_phrase_to_context.items():
for context, count in ccs.items():
conditionals = zeros(num_tags)
@@ -161,58 +159,74 @@ for iteration in range(20):
scores = zeros(num_tags)
for t in range(num_tags):
- scores[t] = conditionals[t] * exp(-ls[lamba_index[phrase,context] + t])
+ li = lamba_index[phrase,context] + t
+ scores[t] = conditionals[t] * exp(-ls[li] - ls[omega_offset + li])
local_z = sum(scores)
for t in range(num_tags):
- deriv[lamba_index[phrase,context] + t] -= count * scores[t] / local_z
+ if delta > 0:
+ deriv[lamba_index[phrase,context] + t] -= count * scores[t] / local_z
+ if gamma > 0:
+ deriv[omega_offset + lamba_index[phrase,context] + t] -= count * scores[t] / local_z
#print 'ddual', deriv
return deriv
def constraints(ls):
- cons = []
- if delta > 0:
- for phrase, ccs in edges_phrase_to_context.items():
- for t in range(num_tags):
+ cons = zeros(num_phrases * num_tags + num_edges * num_tags)
+
+ index = 0
+ for phrase, ccs in edges_phrase_to_context.items():
+ for t in range(num_tags):
+ if delta > 0:
total = delta
for cprime in ccs.keys():
total -= ls[lamba_index[phrase, cprime] + t]
- cons.append(total)
+ cons[index] = total
+ index += 1
- if gamma > 0:
- for context, pcs in edges_context_to_phrase.items():
- for t in range(num_tags):
+ for context, pcs in edges_context_to_phrase.items():
+ for t in range(num_tags):
+ if gamma > 0:
total = gamma
for pprime in pcs.keys():
- total -= ls[lamba_index[pprime, context] + t]
- cons.append(total)
+ total -= ls[omega_offset + lamba_index[pprime, context] + t]
+ cons[index] = total
+ index += 1
+
#print 'cons', cons
return cons
def constraints_deriv(ls):
- cons = []
- if delta > 0:
- for phrase, ccs in edges_phrase_to_context.items():
- for t in range(num_tags):
- d = zeros(num_edges * num_tags)
+ cons = zeros((num_phrases * num_tags + num_edges * num_tags, 2 * num_edges * num_tags))
+
+ index = 0
+ for phrase, ccs in edges_phrase_to_context.items():
+ for t in range(num_tags):
+ if delta > 0:
+ d = cons[index,:]#zeros(num_edges * num_tags)
for cprime in ccs.keys():
d[lamba_index[phrase, cprime] + t] = -1
- cons.append(d)
+ #cons[index] = d
+ index += 1
- if gamma > 0:
- for context, pcs in edges_context_to_phrase.items():
- for t in range(num_tags):
- d = zeros(num_edges * num_tags)
+ for context, pcs in edges_context_to_phrase.items():
+ for t in range(num_tags):
+ if gamma > 0:
+ d = cons[index,:]#d = zeros(num_edges * num_tags)
for pprime in pcs.keys():
- d[lamba_index[pprime, context] + t] = -1
- cons.append(d)
+ d[omega_offset + lamba_index[pprime, context] + t] = -1
+ #cons[index] = d
+ index += 1
#print 'dcons', cons
return cons
print 'Pre lambda optimisation dual', dual(lamba), 'primal', primal(lamba)
+ #print 'lambda', lamba, lamba.shape
+ #print 'bounds', [(0, max(delta, gamma))] * (2 * num_edges * num_tags)
+
lamba = scipy.optimize.fmin_slsqp(dual, lamba,
- bounds=[(0, delta)] * (num_edges * num_tags),
+ bounds=[(0, max(delta, gamma))] * (2 * num_edges * num_tags),
f_ieqcons=constraints,
fprime=dual_deriv,
fprime_ieqcons=constraints_deriv,
@@ -236,7 +250,7 @@ for iteration in range(20):
scores = zeros(num_tags)
li = lamba_index[phrase, context]
for t in range(num_tags):
- scores[t] = conditionals[t] * exp(-lamba[li + t])
+ scores[t] = conditionals[t] * exp(-lamba[li + t] - lamba[omega_offset + li + t])
z += count * sum(scores)
tagCounts += count * scores
@@ -264,9 +278,10 @@ for phrase, ccs in edges_phrase_to_context.items():
cz = sum(conditionals)
conditionals /= cz
- scores = zeros(num_tags)
- li = lamba_index[phrase, context]
- for t in range(num_tags):
- scores[t] = conditionals[t] * exp(-lamba[li + t])
+ #scores = zeros(num_tags)
+ #li = lamba_index[phrase, context]
+ #for t in range(num_tags):
+ # scores[t] = conditionals[t] * exp(-lamba[li + t])
- print '%s\t%s ||| C=%d ||| %d |||' % (phrase, context, count, argmax(scores)), scores / sum(scores)
+ #print '%s\t%s ||| C=%d ||| %d |||' % (phrase, context, count, argmax(scores)), scores / sum(scores)
+ print '%s\t%s ||| C=%d ||| %d |||' % (phrase, context, count, argmax(conditionals)), conditionals