summaryrefslogtreecommitdiff
path: root/gi/posterior-regularisation/train_pr_parallel.py
diff options
context:
space:
mode:
authorChris Dyer <cdyer@cs.cmu.edu>2012-10-11 14:06:32 -0400
committerChris Dyer <cdyer@cs.cmu.edu>2012-10-11 14:06:32 -0400
commit07ea7b64b6f85e5798a8068453ed9fd2b97396db (patch)
tree644496a1690d84d82a396bbc1e39160788beb2cd /gi/posterior-regularisation/train_pr_parallel.py
parent37b9e45e5cb29d708f7249dbe0b0fb27685282a0 (diff)
parenta36fcc5d55c1de84ae68c1091ebff2b1c32dc3b7 (diff)
Merge branch 'master' of https://github.com/redpony/cdec
Diffstat (limited to 'gi/posterior-regularisation/train_pr_parallel.py')
-rw-r--r--gi/posterior-regularisation/train_pr_parallel.py333
1 files changed, 0 insertions, 333 deletions
diff --git a/gi/posterior-regularisation/train_pr_parallel.py b/gi/posterior-regularisation/train_pr_parallel.py
deleted file mode 100644
index 3b9cefed..00000000
--- a/gi/posterior-regularisation/train_pr_parallel.py
+++ /dev/null
@@ -1,333 +0,0 @@
-import sys
-import scipy.optimize
-from numpy import *
-from numpy.random import random, seed
-
-#
-# Step 1: load the concordance counts
-#
-
-edges_phrase_to_context = []
-edges_context_to_phrase = []
-types = {}
-context_types = {}
-num_edges = 0
-
-for line in sys.stdin:
- phrase, rest = line.strip().split('\t')
- parts = rest.split('|||')
- edges_phrase_to_context.append((phrase, []))
- for i in range(0, len(parts), 2):
- context, count = parts[i:i+2]
-
- ctx = tuple(filter(lambda x: x != '<PHRASE>', context.split()))
- cnt = int(count.strip()[2:])
- edges_phrase_to_context[-1][1].append((ctx, cnt))
-
- cid = context_types.get(ctx, len(context_types))
- if cid == len(context_types):
- context_types[ctx] = cid
- edges_context_to_phrase.append((ctx, []))
- edges_context_to_phrase[cid][1].append((phrase, cnt))
-
- for token in ctx:
- types.setdefault(token, len(types))
- for token in phrase.split():
- types.setdefault(token, len(types))
-
- num_edges += 1
-
-#
-# Step 2: initialise the model parameters
-#
-
-num_tags = 25
-num_types = len(types)
-num_phrases = len(edges_phrase_to_context)
-num_contexts = len(edges_context_to_phrase)
-delta = float(sys.argv[1])
-assert sys.argv[2] in ('local', 'global')
-local = sys.argv[2] == 'local'
-if len(sys.argv) >= 2:
- seed(int(sys.argv[3]))
-
-print 'Read in', num_edges, 'edges', num_phrases, 'phrases', num_contexts, 'contexts and', len(types), 'word types'
-
-def normalise(a):
- return a / float(sum(a))
-
-# Pr(tag | phrase)
-tagDist = [normalise(random(num_tags)+1) for p in range(num_phrases)]
-# Pr(context at pos i = w | tag) indexed by i, tag, word
-contextWordDist = [[normalise(random(num_types)+1) for t in range(num_tags)] for i in range(4)]
-
-#
-# Step 3: expectation maximisation
-#
-
-class GlobalDualObjective:
- """
- Objective, log(z), for all phrases s.t. lambda >= 0, sum_c lambda_pct <= scale
- """
-
- def __init__(self, scale):
- self.scale = scale
- self.posterior = zeros((num_edges, num_tags))
- self.q = zeros((num_edges, num_tags))
- self.llh = 0
-
- index = 0
- for j, (phrase, edges) in enumerate(edges_phrase_to_context):
- for context, count in edges:
- for t in range(num_tags):
- prob = tagDist[j][t]
- for k, token in enumerate(context):
- prob *= contextWordDist[k][t][types[token]]
- self.posterior[index,t] = prob
- z = sum(self.posterior[index,:])
- self.posterior[index,:] /= z
- self.llh += log(z) * count
- index += 1
-
- def objective(self, ls):
- ls = ls.reshape((num_edges, num_tags))
- logz = 0
-
- index = 0
- for j, (phrase, edges) in enumerate(edges_phrase_to_context):
- for context, count in edges:
- for t in range(num_tags):
- self.q[index,t] = self.posterior[index,t] * exp(-ls[index,t])
- local_z = sum(self.q[index,:])
- self.q[index,:] /= local_z
- logz += log(local_z) * count
- index += 1
-
- return logz
-
- # FIXME: recomputes q many more times than necessary
-
- def gradient(self, ls):
- ls = ls.reshape((num_edges, num_tags))
- gradient = zeros((num_edges, num_tags))
-
- index = 0
- for j, (phrase, edges) in enumerate(edges_phrase_to_context):
- for context, count in edges:
- for t in range(num_tags):
- self.q[index,t] = self.posterior[index,t] * exp(-ls[index,t])
- local_z = sum(self.q[index,:])
- self.q[index,:] /= local_z
- for t in range(num_tags):
- gradient[index,t] -= self.q[index,t] * count
- index += 1
-
- return gradient.ravel()
-
- def constraints(self, ls):
- ls = ls.reshape((num_edges, num_tags))
- cons = ones((num_phrases, num_tags)) * self.scale
- index = 0
- for j, (phrase, edges) in enumerate(edges_phrase_to_context):
- for i, (context, count) in enumerate(edges):
- for t in range(num_tags):
- cons[j,t] -= ls[index,t] * count
- index += 1
- return cons.ravel()
-
- def constraints_gradient(self, ls):
- ls = ls.reshape((num_edges, num_tags))
- gradient = zeros((num_phrases, num_tags, num_edges, num_tags))
- index = 0
- for j, (phrase, edges) in enumerate(edges_phrase_to_context):
- for i, (context, count) in enumerate(edges):
- for t in range(num_tags):
- gradient[j,t,index,t] -= count
- index += 1
- return gradient.reshape((num_phrases*num_tags, num_edges*num_tags))
-
- def optimize(self):
- ls = zeros(num_edges * num_tags)
- #print '\tpre lambda optimisation dual', self.objective(ls) #, 'primal', primal(lamba)
- ls = scipy.optimize.fmin_slsqp(self.objective, ls,
- bounds=[(0, self.scale)] * num_edges * num_tags,
- f_ieqcons=self.constraints,
- fprime=self.gradient,
- fprime_ieqcons=self.constraints_gradient,
- iprint=0) # =2 for verbose
- #print '\tpost lambda optimisation dual', self.objective(ls) #, 'primal', primal(lamba)
-
- # returns llh, kl and l1lmax contribution
- l1lmax = 0
- index = 0
- for j, (phrase, edges) in enumerate(edges_phrase_to_context):
- for t in range(num_tags):
- lmax = None
- for i, (context, count) in enumerate(edges):
- lmax = max(lmax, self.q[index+i,t])
- l1lmax += lmax
- index += len(edges)
-
- return self.llh, -self.objective(ls) + dot(ls, self.gradient(ls)), l1lmax
-
-class LocalDualObjective:
- """
- Local part of objective, log(z) relevant to lambda_p**.
- Optimised subject to lambda >= 0, sum_c lambda_pct <= scale forall t
- """
-
- def __init__(self, phraseId, scale):
- self.phraseId = phraseId
- self.scale = scale
- edges = edges_phrase_to_context[self.phraseId][1]
- self.posterior = zeros((len(edges), num_tags))
- self.q = zeros((len(edges), num_tags))
- self.llh = 0
-
- for i, (context, count) in enumerate(edges):
- for t in range(num_tags):
- prob = tagDist[phraseId][t]
- for j, token in enumerate(context):
- prob *= contextWordDist[j][t][types[token]]
- self.posterior[i,t] = prob
- z = sum(self.posterior[i,:])
- self.posterior[i,:] /= z
- self.llh += log(z) * count
-
- def objective(self, ls):
- edges = edges_phrase_to_context[self.phraseId][1]
- ls = ls.reshape((len(edges), num_tags))
- logz = 0
-
- for i, (context, count) in enumerate(edges):
- for t in range(num_tags):
- self.q[i,t] = self.posterior[i,t] * exp(-ls[i,t])
- local_z = sum(self.q[i,:])
- self.q[i,:] /= local_z
- logz += log(local_z) * count
-
- return logz
-
- # FIXME: recomputes q many more times than necessary
-
- def gradient(self, ls):
- edges = edges_phrase_to_context[self.phraseId][1]
- ls = ls.reshape((len(edges), num_tags))
- gradient = zeros((len(edges), num_tags))
-
- for i, (context, count) in enumerate(edges):
- for t in range(num_tags):
- self.q[i,t] = self.posterior[i,t] * exp(-ls[i,t])
- local_z = sum(self.q[i,:])
- self.q[i,:] /= local_z
- for t in range(num_tags):
- gradient[i,t] -= self.q[i,t] * count
-
- return gradient.ravel()
-
- def constraints(self, ls):
- edges = edges_phrase_to_context[self.phraseId][1]
- ls = ls.reshape((len(edges), num_tags))
- cons = ones(num_tags) * self.scale
- for t in range(num_tags):
- for i, (context, count) in enumerate(edges):
- cons[t] -= ls[i,t] * count
- return cons
-
- def constraints_gradient(self, ls):
- edges = edges_phrase_to_context[self.phraseId][1]
- ls = ls.reshape((len(edges), num_tags))
- gradient = zeros((num_tags, len(edges), num_tags))
- for t in range(num_tags):
- for i, (context, count) in enumerate(edges):
- gradient[t,i,t] -= count
- return gradient.reshape((num_tags, len(edges)*num_tags))
-
- def optimize(self, ls=None):
- edges = edges_phrase_to_context[self.phraseId][1]
- if ls == None:
- ls = zeros(len(edges) * num_tags)
- #print '\tpre lambda optimisation dual', self.objective(ls) #, 'primal', primal(lamba)
- ls = scipy.optimize.fmin_slsqp(self.objective, ls,
- bounds=[(0, self.scale)] * len(edges) * num_tags,
- f_ieqcons=self.constraints,
- fprime=self.gradient,
- fprime_ieqcons=self.constraints_gradient,
- iprint=0) # =2 for verbose
- #print '\tlambda', list(ls)
- #print '\tpost lambda optimisation dual', self.objective(ls) #, 'primal', primal(lamba)
-
- # returns llh, kl and l1lmax contribution
- l1lmax = 0
- for t in range(num_tags):
- lmax = None
- for i, (context, count) in enumerate(edges):
- lmax = max(lmax, self.q[i,t])
- l1lmax += lmax
-
- return self.llh, -self.objective(ls) + dot(ls, self.gradient(ls)), l1lmax, ls
-
-ls = [None] * num_phrases
-for iteration in range(20):
- tagCounts = [zeros(num_tags) for p in range(num_phrases)]
- contextWordCounts = [[zeros(num_types) for t in range(num_tags)] for i in range(4)]
-
- # E-step
- llh = kl = l1lmax = 0
- if local:
- for p in range(num_phrases):
- o = LocalDualObjective(p, delta)
- #print '\toptimising lambda for phrase', p, '=', edges_phrase_to_context[p][0]
- #print '\toptimising lambda for phrase', p, 'ls', ls[p]
- obj = o.optimize(ls[p])
- #print '\tphrase', p, 'deltas', obj
- llh += obj[0]
- kl += obj[1]
- l1lmax += obj[2]
- ls[p] = obj[3]
-
- edges = edges_phrase_to_context[p][1]
- for j, (context, count) in enumerate(edges):
- for t in range(num_tags):
- tagCounts[p][t] += count * o.q[j,t]
- for i in range(4):
- for t in range(num_tags):
- contextWordCounts[i][t][types[context[i]]] += count * o.q[j,t]
-
- #print 'iteration', iteration, 'LOCAL objective', (llh + kl + delta * l1lmax), 'llh', llh, 'kl', kl, 'l1lmax', l1lmax
- else:
- o = GlobalDualObjective(delta)
- obj = o.optimize()
- llh, kl, l1lmax = o.optimize()
-
- index = 0
- for p, (phrase, edges) in enumerate(edges_phrase_to_context):
- for context, count in edges:
- for t in range(num_tags):
- tagCounts[p][t] += count * o.q[index,t]
- for i in range(4):
- for t in range(num_tags):
- contextWordCounts[i][t][types[context[i]]] += count * o.q[index,t]
- index += 1
-
- print 'iteration', iteration, 'objective', (llh - kl - delta * l1lmax), 'llh', llh, 'kl', kl, 'l1lmax', l1lmax
-
- # M-step
- for p in range(num_phrases):
- tagDist[p] = normalise(tagCounts[p])
- for i in range(4):
- for t in range(num_tags):
- contextWordDist[i][t] = normalise(contextWordCounts[i][t])
-
-for p, (phrase, ccs) in enumerate(edges_phrase_to_context):
- for context, count in ccs:
- conditionals = zeros(num_tags)
- for t in range(num_tags):
- prob = tagDist[p][t]
- for i in range(4):
- prob *= contextWordDist[i][t][types[context[i]]]
- conditionals[t] = prob
- cz = sum(conditionals)
- conditionals /= cz
-
- print '%s\t%s ||| C=%d |||' % (phrase, context, argmax(conditionals)), conditionals