diff options
| -rw-r--r-- | gi/posterior-regularisation/train_pr_global.py | 4 | ||||
| -rw-r--r-- | gi/posterior-regularisation/train_pr_parallel.py | 320 | 
2 files changed, 323 insertions, 1 deletions
| diff --git a/gi/posterior-regularisation/train_pr_global.py b/gi/posterior-regularisation/train_pr_global.py index 6ce7290d..8b80c6bc 100644 --- a/gi/posterior-regularisation/train_pr_global.py +++ b/gi/posterior-regularisation/train_pr_global.py @@ -263,7 +263,9 @@ for iteration in range(20):                  scores[t] = conditionals[t] * exp(-lamba[li + t] - lamba[omega_offset + li + t])              z += count * sum(scores) -            tagCounts[p] += count * scores +            for t in range(num_tags): +                tagCounts[p][t] += count * scores[t] +              for i in range(4):                  for t in range(num_tags):                      contextWordCounts[i][t][types[context[i]]] += count * scores[t] diff --git a/gi/posterior-regularisation/train_pr_parallel.py b/gi/posterior-regularisation/train_pr_parallel.py new file mode 100644 index 00000000..72c5cf25 --- /dev/null +++ b/gi/posterior-regularisation/train_pr_parallel.py @@ -0,0 +1,320 @@ +import sys +import scipy.optimize +from numpy import * +from numpy.random import random + +# +# Step 1: load the concordance counts +#  + +edges_phrase_to_context = [] +edges_context_to_phrase = [] +types = {} +context_types = {} +num_edges = 0 + +for line in sys.stdin: +    phrase, rest = line.strip().split('\t') +    parts = rest.split('|||') +    edges_phrase_to_context.append((phrase, [])) +    for i in range(0, len(parts), 2): +        context, count = parts[i:i+2] + +        ctx = tuple(filter(lambda x: x != '<PHRASE>', context.split())) +        cnt = int(count.strip()[2:]) +        edges_phrase_to_context[-1][1].append((ctx, cnt)) + +        cid = context_types.get(ctx, len(context_types)) +        if cid == len(context_types): +            context_types[ctx] = cid +            edges_context_to_phrase.append((ctx, [])) +        edges_context_to_phrase[cid][1].append((phrase, cnt)) + +        for token in ctx: +            types.setdefault(token, len(types)) +        for token in phrase.split(): +            types.setdefault(token, len(types)) + +        num_edges += 1 + +print 'Read in', num_edges, 'edges and', len(types), 'word types' + +# +# Step 2: initialise the model parameters +# + +num_tags = 5 +num_types = len(types) +num_phrases = len(edges_phrase_to_context) +num_contexts = len(edges_context_to_phrase) +delta = float(sys.argv[1]) + +def normalise(a): +    return a / float(sum(a)) + +# Pr(tag | phrase) +tagDist = [normalise(random(num_tags)+1) for p in range(num_phrases)] +# Pr(context at pos i = w | tag) indexed by i, tag, word +contextWordDist = [[normalise(random(num_types)+1) for t in range(num_tags)] for i in range(4)] + +# +# Step 3: expectation maximisation +# + +class GlobalDualObjective: +    """ +    Objective, log(z), for all phrases s.t. lambda >= 0, sum_c lambda_pct <= scale  +    """ + +    def __init__(self, scale): +        self.scale = scale +        self.posterior = zeros((num_edges, num_tags)) +        self.q = zeros((num_edges, num_tags)) +        self.llh = 0 + +        index = 0 +        for j, (phrase, edges) in enumerate(edges_phrase_to_context): +            for context, count in edges: +                for t in range(num_tags): +                    prob = tagDist[j][t] +                    for k, token in enumerate(context): +                        prob *= contextWordDist[k][t][types[token]] +                    self.posterior[index,t] = prob +                z = sum(self.posterior[index,:]) +                self.posterior[index,:] /= z +                self.llh += log(z) +                index += 1 + +    def objective(self, ls): +        ls = ls.reshape((num_edges, num_tags)) +        logz = 0 + +        index = 0 +        for j, (phrase, edges) in enumerate(edges_phrase_to_context): +            for context, count in edges: +                for t in range(num_tags): +                    self.q[index,t] = self.posterior[index,t] * exp(-ls[index,t]) +                local_z = sum(self.q[index,:]) +                self.q[index,:] /= local_z +                logz += log(local_z) * count +                index += 1 + +        return logz + +    # FIXME: recomputes q many more times than necessary + +    def gradient(self, ls): +        ls = ls.reshape((num_edges, num_tags)) +        gradient = zeros((num_edges, num_tags)) + +        index = 0 +        for j, (phrase, edges) in enumerate(edges_phrase_to_context): +            for context, count in edges: +                for t in range(num_tags): +                    self.q[index,t] = self.posterior[index,t] * exp(-ls[index,t]) +                local_z = sum(self.q[index,:]) +                self.q[index,:] /= local_z +                for t in range(num_tags): +                    gradient[index,t] -= self.q[index,t] * count +                index += 1 + +        return gradient.ravel() + +    def constraints(self, ls): +        ls = ls.reshape((num_edges, num_tags)) +        cons = ones((num_phrases, num_tags)) * self.scale +        index = 0 +        for j, (phrase, edges) in enumerate(edges_phrase_to_context): +            for i, (context, count) in enumerate(edges): +                for t in range(num_tags): +                    cons[j,t] -= ls[index,t] +                index += 1 +        return cons.ravel() + +    def constraints_gradient(self, ls): +        ls = ls.reshape((num_edges, num_tags)) +        gradient = zeros((num_phrases, num_tags, num_edges, num_tags)) +        index = 0 +        for j, (phrase, edges) in enumerate(edges_phrase_to_context): +            for i, (context, count) in enumerate(edges): +                for t in range(num_tags): +                    gradient[j,t,index,t] -= 1 +                index += 1 +        return gradient.reshape((num_phrases*num_tags, num_edges*num_tags)) + +    def optimize(self): +        ls = zeros(num_edges * num_tags) +        #print '\tpre lambda optimisation dual', self.objective(ls) #, 'primal', primal(lamba) +        ls = scipy.optimize.fmin_slsqp(self.objective, ls, +                                bounds=[(0, self.scale)] * num_edges * num_tags, +                                f_ieqcons=self.constraints, +                                fprime=self.gradient, +                                fprime_ieqcons=self.constraints_gradient, +                                iprint=0) # =2 for verbose +        #print '\tpost lambda optimisation dual', self.objective(ls) #, 'primal', primal(lamba) + +        # returns llh, kl and l1lmax contribution +        l1lmax = 0 +        index = 0 +        for j, (phrase, edges) in enumerate(edges_phrase_to_context): +            for t in range(num_tags): +                lmax = None +                for i, (context, count) in enumerate(edges): +                    lmax = max(lmax, self.q[index+i,t]) +                l1lmax += lmax +            index += len(edges) + +        return self.llh, -self.objective(ls) + dot(ls, self.gradient(ls)), l1lmax + +class LocalDualObjective: +    """ +    Local part of objective, log(z) relevant to lambda_p**. +    Optimised subject to lambda >= 0, sum_c lambda_pct <= scale forall t  +    """ + +    def __init__(self, phraseId, scale): +        self.phraseId = phraseId +        self.scale = scale +        edges = edges_phrase_to_context[self.phraseId][1] +        self.posterior = zeros((len(edges), num_tags)) +        self.q = zeros((len(edges), num_tags)) +        self.llh = 0 + +        for i, (context, count) in enumerate(edges): +            for t in range(num_tags): +                prob = tagDist[phraseId][t] +                for j, token in enumerate(context): +                    prob *= contextWordDist[j][t][types[token]] +                self.posterior[i,t] = prob +            z = sum(self.posterior[i,:]) +            self.posterior[i,:] /= z +            self.llh += log(z) + +    def objective(self, ls): +        edges = edges_phrase_to_context[self.phraseId][1] +        ls = ls.reshape((len(edges), num_tags)) +        logz = 0 + +        for i, (context, count) in enumerate(edges): +            for t in range(num_tags): +                self.q[i,t] = self.posterior[i,t] * exp(-ls[i,t]) +            local_z = sum(self.q[i,:]) +            self.q[i,:] /= local_z +            logz += log(local_z) * count + +        return logz + +    # FIXME: recomputes q many more times than necessary + +    def gradient(self, ls): +        edges = edges_phrase_to_context[self.phraseId][1] +        ls = ls.reshape((len(edges), num_tags)) +        gradient = zeros((len(edges), num_tags)) + +        for i, (context, count) in enumerate(edges): +            for t in range(num_tags): +                self.q[i,t] = self.posterior[i,t] * exp(-ls[i,t]) +            local_z = sum(self.q[i,:]) +            self.q[i,:] /= local_z +            for t in range(num_tags): +                gradient[i,t] -= self.q[i,t] * count + +        return gradient.ravel() + +    def constraints(self, ls): +        edges = edges_phrase_to_context[self.phraseId][1] +        ls = ls.reshape((len(edges), num_tags)) +        cons = ones(num_tags) * self.scale +        for t in range(num_tags): +            for i, (context, count) in enumerate(edges): +                cons[t] -= ls[i,t] +        return cons + +    def constraints_gradient(self, ls): +        edges = edges_phrase_to_context[self.phraseId][1] +        ls = ls.reshape((len(edges), num_tags)) +        gradient = zeros((num_tags, len(edges), num_tags)) +        for t in range(num_tags): +            for i, (context, count) in enumerate(edges): +                gradient[t,i,t] -= 1 +        return gradient.reshape((num_tags, len(edges)*num_tags)) + +    def optimize(self): +        edges = edges_phrase_to_context[self.phraseId][1] +        ls = zeros(len(edges) * num_tags) +        #print '\tpre lambda optimisation dual', self.objective(ls) #, 'primal', primal(lamba) +        ls = scipy.optimize.fmin_slsqp(self.objective, ls, +                                bounds=[(0, self.scale)] * len(edges) * num_tags, +                                f_ieqcons=self.constraints, +                                fprime=self.gradient, +                                fprime_ieqcons=self.constraints_gradient, +                                iprint=0) # =2 for verbose +        #print '\tpost lambda optimisation dual', self.objective(ls) #, 'primal', primal(lamba) + +        # returns llh, kl and l1lmax contribution +        l1lmax = 0 +        for t in range(num_tags): +            lmax = None +            for i, (context, count) in enumerate(edges): +                lmax = max(lmax, self.q[i,t]) +            l1lmax += lmax + +        return self.llh, -self.objective(ls) + dot(ls, self.gradient(ls)), l1lmax + +for iteration in range(20): +    tagCounts = [zeros(num_tags) for p in range(num_phrases)] +    contextWordCounts = [[zeros(num_types) for t in range(num_tags)] for i in range(4)] + +    # E-step +    llh = kl = l1lmax = 0 +    if False: +        for p in range(num_phrases): +            o = LocalDualObjective(p, delta) +            #print '\toptimising lambda for phrase', p, '=', edges_phrase_to_context[p][0] +            obj = o.optimize() +            print '\tphrase', p, 'deltas', obj +            llh += obj[0] +            kl += obj[1] +            l1lmax += obj[2] + +            edges = edges_phrase_to_context[p][1] +            for j, (context, count) in enumerate(edges): +                for t in range(num_tags): +                    tagCounts[p][t] += count * o.q[j,t] +                for i in range(4): +                    for t in range(num_tags): +                        contextWordCounts[i][t][types[context[i]]] += count * o.q[j,t] +    else: +        o = GlobalDualObjective(delta) +        obj = o.optimize() +        llh, kl, l1lmax = o.optimize() + +        for p, (phrase, edges) in enumerate(edges_phrase_to_context): +            for j, (context, count) in enumerate(edges): +                for t in range(num_tags): +                    tagCounts[p][t] += count * o.q[j,t] +                for i in range(4): +                    for t in range(num_tags): +                        contextWordCounts[i][t][types[context[i]]] += count * o.q[j,t] + +    print 'iteration', iteration, 'objective', (llh + kl + delta * l1lmax), 'llh', llh, 'kl', kl, 'l1lmax', l1lmax + +    # M-step +    for p in range(num_phrases): +        tagDist[p] = normalise(tagCounts[p]) +    for i in range(4): +        for t in range(num_tags): +            contextWordDist[i][t] = normalise(contextWordCounts[i][t]) + +for p, (phrase, ccs) in enumerate(edges_phrase_to_context): +    for context, count in ccs: +        conditionals = zeros(num_tags) +        for t in range(num_tags): +            prob = tagDist[p][t] +            for i in range(4): +                prob *= contextWordDist[i][t][types[context[i]]] +            conditionals[t] = prob +        cz = sum(conditionals) +        conditionals /= cz + +        print '%s\t%s ||| C=%d ||| %d |||' % (phrase, context, count, argmax(conditionals)), conditionals | 
