From d59da3201bd8dce7512c3a376a915720a07cda8c Mon Sep 17 00:00:00 2001 From: "trevor.cohn" Date: Wed, 30 Jun 2010 21:26:24 +0000 Subject: Some tweaks git-svn-id: https://ws10smt.googlecode.com/svn/trunk@82 ec762483-ff6d-05da-a07a-a48fb63a330f --- gi/posterior-regularisation/train_pr_global.py | 4 +- gi/posterior-regularisation/train_pr_parallel.py | 320 +++++++++++++++++++++++ 2 files changed, 323 insertions(+), 1 deletion(-) create mode 100644 gi/posterior-regularisation/train_pr_parallel.py diff --git a/gi/posterior-regularisation/train_pr_global.py b/gi/posterior-regularisation/train_pr_global.py index 6ce7290d..8b80c6bc 100644 --- a/gi/posterior-regularisation/train_pr_global.py +++ b/gi/posterior-regularisation/train_pr_global.py @@ -263,7 +263,9 @@ for iteration in range(20): scores[t] = conditionals[t] * exp(-lamba[li + t] - lamba[omega_offset + li + t]) z += count * sum(scores) - tagCounts[p] += count * scores + for t in range(num_tags): + tagCounts[p][t] += count * scores[t] + for i in range(4): for t in range(num_tags): contextWordCounts[i][t][types[context[i]]] += count * scores[t] diff --git a/gi/posterior-regularisation/train_pr_parallel.py b/gi/posterior-regularisation/train_pr_parallel.py new file mode 100644 index 00000000..72c5cf25 --- /dev/null +++ b/gi/posterior-regularisation/train_pr_parallel.py @@ -0,0 +1,320 @@ +import sys +import scipy.optimize +from numpy import * +from numpy.random import random + +# +# Step 1: load the concordance counts +# + +edges_phrase_to_context = [] +edges_context_to_phrase = [] +types = {} +context_types = {} +num_edges = 0 + +for line in sys.stdin: + phrase, rest = line.strip().split('\t') + parts = rest.split('|||') + edges_phrase_to_context.append((phrase, [])) + for i in range(0, len(parts), 2): + context, count = parts[i:i+2] + + ctx = tuple(filter(lambda x: x != '', context.split())) + cnt = int(count.strip()[2:]) + edges_phrase_to_context[-1][1].append((ctx, cnt)) + + cid = context_types.get(ctx, len(context_types)) + if cid == len(context_types): + context_types[ctx] = cid + edges_context_to_phrase.append((ctx, [])) + edges_context_to_phrase[cid][1].append((phrase, cnt)) + + for token in ctx: + types.setdefault(token, len(types)) + for token in phrase.split(): + types.setdefault(token, len(types)) + + num_edges += 1 + +print 'Read in', num_edges, 'edges and', len(types), 'word types' + +# +# Step 2: initialise the model parameters +# + +num_tags = 5 +num_types = len(types) +num_phrases = len(edges_phrase_to_context) +num_contexts = len(edges_context_to_phrase) +delta = float(sys.argv[1]) + +def normalise(a): + return a / float(sum(a)) + +# Pr(tag | phrase) +tagDist = [normalise(random(num_tags)+1) for p in range(num_phrases)] +# Pr(context at pos i = w | tag) indexed by i, tag, word +contextWordDist = [[normalise(random(num_types)+1) for t in range(num_tags)] for i in range(4)] + +# +# Step 3: expectation maximisation +# + +class GlobalDualObjective: + """ + Objective, log(z), for all phrases s.t. lambda >= 0, sum_c lambda_pct <= scale + """ + + def __init__(self, scale): + self.scale = scale + self.posterior = zeros((num_edges, num_tags)) + self.q = zeros((num_edges, num_tags)) + self.llh = 0 + + index = 0 + for j, (phrase, edges) in enumerate(edges_phrase_to_context): + for context, count in edges: + for t in range(num_tags): + prob = tagDist[j][t] + for k, token in enumerate(context): + prob *= contextWordDist[k][t][types[token]] + self.posterior[index,t] = prob + z = sum(self.posterior[index,:]) + self.posterior[index,:] /= z + self.llh += log(z) + index += 1 + + def objective(self, ls): + ls = ls.reshape((num_edges, num_tags)) + logz = 0 + + index = 0 + for j, (phrase, edges) in enumerate(edges_phrase_to_context): + for context, count in edges: + for t in range(num_tags): + self.q[index,t] = self.posterior[index,t] * exp(-ls[index,t]) + local_z = sum(self.q[index,:]) + self.q[index,:] /= local_z + logz += log(local_z) * count + index += 1 + + return logz + + # FIXME: recomputes q many more times than necessary + + def gradient(self, ls): + ls = ls.reshape((num_edges, num_tags)) + gradient = zeros((num_edges, num_tags)) + + index = 0 + for j, (phrase, edges) in enumerate(edges_phrase_to_context): + for context, count in edges: + for t in range(num_tags): + self.q[index,t] = self.posterior[index,t] * exp(-ls[index,t]) + local_z = sum(self.q[index,:]) + self.q[index,:] /= local_z + for t in range(num_tags): + gradient[index,t] -= self.q[index,t] * count + index += 1 + + return gradient.ravel() + + def constraints(self, ls): + ls = ls.reshape((num_edges, num_tags)) + cons = ones((num_phrases, num_tags)) * self.scale + index = 0 + for j, (phrase, edges) in enumerate(edges_phrase_to_context): + for i, (context, count) in enumerate(edges): + for t in range(num_tags): + cons[j,t] -= ls[index,t] + index += 1 + return cons.ravel() + + def constraints_gradient(self, ls): + ls = ls.reshape((num_edges, num_tags)) + gradient = zeros((num_phrases, num_tags, num_edges, num_tags)) + index = 0 + for j, (phrase, edges) in enumerate(edges_phrase_to_context): + for i, (context, count) in enumerate(edges): + for t in range(num_tags): + gradient[j,t,index,t] -= 1 + index += 1 + return gradient.reshape((num_phrases*num_tags, num_edges*num_tags)) + + def optimize(self): + ls = zeros(num_edges * num_tags) + #print '\tpre lambda optimisation dual', self.objective(ls) #, 'primal', primal(lamba) + ls = scipy.optimize.fmin_slsqp(self.objective, ls, + bounds=[(0, self.scale)] * num_edges * num_tags, + f_ieqcons=self.constraints, + fprime=self.gradient, + fprime_ieqcons=self.constraints_gradient, + iprint=0) # =2 for verbose + #print '\tpost lambda optimisation dual', self.objective(ls) #, 'primal', primal(lamba) + + # returns llh, kl and l1lmax contribution + l1lmax = 0 + index = 0 + for j, (phrase, edges) in enumerate(edges_phrase_to_context): + for t in range(num_tags): + lmax = None + for i, (context, count) in enumerate(edges): + lmax = max(lmax, self.q[index+i,t]) + l1lmax += lmax + index += len(edges) + + return self.llh, -self.objective(ls) + dot(ls, self.gradient(ls)), l1lmax + +class LocalDualObjective: + """ + Local part of objective, log(z) relevant to lambda_p**. + Optimised subject to lambda >= 0, sum_c lambda_pct <= scale forall t + """ + + def __init__(self, phraseId, scale): + self.phraseId = phraseId + self.scale = scale + edges = edges_phrase_to_context[self.phraseId][1] + self.posterior = zeros((len(edges), num_tags)) + self.q = zeros((len(edges), num_tags)) + self.llh = 0 + + for i, (context, count) in enumerate(edges): + for t in range(num_tags): + prob = tagDist[phraseId][t] + for j, token in enumerate(context): + prob *= contextWordDist[j][t][types[token]] + self.posterior[i,t] = prob + z = sum(self.posterior[i,:]) + self.posterior[i,:] /= z + self.llh += log(z) + + def objective(self, ls): + edges = edges_phrase_to_context[self.phraseId][1] + ls = ls.reshape((len(edges), num_tags)) + logz = 0 + + for i, (context, count) in enumerate(edges): + for t in range(num_tags): + self.q[i,t] = self.posterior[i,t] * exp(-ls[i,t]) + local_z = sum(self.q[i,:]) + self.q[i,:] /= local_z + logz += log(local_z) * count + + return logz + + # FIXME: recomputes q many more times than necessary + + def gradient(self, ls): + edges = edges_phrase_to_context[self.phraseId][1] + ls = ls.reshape((len(edges), num_tags)) + gradient = zeros((len(edges), num_tags)) + + for i, (context, count) in enumerate(edges): + for t in range(num_tags): + self.q[i,t] = self.posterior[i,t] * exp(-ls[i,t]) + local_z = sum(self.q[i,:]) + self.q[i,:] /= local_z + for t in range(num_tags): + gradient[i,t] -= self.q[i,t] * count + + return gradient.ravel() + + def constraints(self, ls): + edges = edges_phrase_to_context[self.phraseId][1] + ls = ls.reshape((len(edges), num_tags)) + cons = ones(num_tags) * self.scale + for t in range(num_tags): + for i, (context, count) in enumerate(edges): + cons[t] -= ls[i,t] + return cons + + def constraints_gradient(self, ls): + edges = edges_phrase_to_context[self.phraseId][1] + ls = ls.reshape((len(edges), num_tags)) + gradient = zeros((num_tags, len(edges), num_tags)) + for t in range(num_tags): + for i, (context, count) in enumerate(edges): + gradient[t,i,t] -= 1 + return gradient.reshape((num_tags, len(edges)*num_tags)) + + def optimize(self): + edges = edges_phrase_to_context[self.phraseId][1] + ls = zeros(len(edges) * num_tags) + #print '\tpre lambda optimisation dual', self.objective(ls) #, 'primal', primal(lamba) + ls = scipy.optimize.fmin_slsqp(self.objective, ls, + bounds=[(0, self.scale)] * len(edges) * num_tags, + f_ieqcons=self.constraints, + fprime=self.gradient, + fprime_ieqcons=self.constraints_gradient, + iprint=0) # =2 for verbose + #print '\tpost lambda optimisation dual', self.objective(ls) #, 'primal', primal(lamba) + + # returns llh, kl and l1lmax contribution + l1lmax = 0 + for t in range(num_tags): + lmax = None + for i, (context, count) in enumerate(edges): + lmax = max(lmax, self.q[i,t]) + l1lmax += lmax + + return self.llh, -self.objective(ls) + dot(ls, self.gradient(ls)), l1lmax + +for iteration in range(20): + tagCounts = [zeros(num_tags) for p in range(num_phrases)] + contextWordCounts = [[zeros(num_types) for t in range(num_tags)] for i in range(4)] + + # E-step + llh = kl = l1lmax = 0 + if False: + for p in range(num_phrases): + o = LocalDualObjective(p, delta) + #print '\toptimising lambda for phrase', p, '=', edges_phrase_to_context[p][0] + obj = o.optimize() + print '\tphrase', p, 'deltas', obj + llh += obj[0] + kl += obj[1] + l1lmax += obj[2] + + edges = edges_phrase_to_context[p][1] + for j, (context, count) in enumerate(edges): + for t in range(num_tags): + tagCounts[p][t] += count * o.q[j,t] + for i in range(4): + for t in range(num_tags): + contextWordCounts[i][t][types[context[i]]] += count * o.q[j,t] + else: + o = GlobalDualObjective(delta) + obj = o.optimize() + llh, kl, l1lmax = o.optimize() + + for p, (phrase, edges) in enumerate(edges_phrase_to_context): + for j, (context, count) in enumerate(edges): + for t in range(num_tags): + tagCounts[p][t] += count * o.q[j,t] + for i in range(4): + for t in range(num_tags): + contextWordCounts[i][t][types[context[i]]] += count * o.q[j,t] + + print 'iteration', iteration, 'objective', (llh + kl + delta * l1lmax), 'llh', llh, 'kl', kl, 'l1lmax', l1lmax + + # M-step + for p in range(num_phrases): + tagDist[p] = normalise(tagCounts[p]) + for i in range(4): + for t in range(num_tags): + contextWordDist[i][t] = normalise(contextWordCounts[i][t]) + +for p, (phrase, ccs) in enumerate(edges_phrase_to_context): + for context, count in ccs: + conditionals = zeros(num_tags) + for t in range(num_tags): + prob = tagDist[p][t] + for i in range(4): + prob *= contextWordDist[i][t][types[context[i]]] + conditionals[t] = prob + cz = sum(conditionals) + conditionals /= cz + + print '%s\t%s ||| C=%d ||| %d |||' % (phrase, context, count, argmax(conditionals)), conditionals -- cgit v1.2.3