From e0265b2c2be3173341bdc2ffacdb6847b30890f5 Mon Sep 17 00:00:00 2001 From: "trevor.cohn" Date: Tue, 13 Jul 2010 18:22:59 +0000 Subject: Cleaning up the PC model. git-svn-id: https://ws10smt.googlecode.com/svn/trunk@236 ec762483-ff6d-05da-a07a-a48fb63a330f --- gi/posterior-regularisation/prjava/Makefile | 2 +- gi/posterior-regularisation/prjava/src/arr/F.java | 9 ++ .../AbstractGradientBaseMethod.java | 9 +- .../projections/SimplexProjection.java | 2 +- .../prjava/src/phrase/PhraseCluster.java | 3 +- .../prjava/src/phrase/PhraseContextObjective.java | 155 ++++++++++----------- .../prjava/src/phrase/PhraseObjective.java | 3 +- gi/posterior-regularisation/train_pr_global.py | 45 +++--- 8 files changed, 118 insertions(+), 110 deletions(-) (limited to 'gi/posterior-regularisation') diff --git a/gi/posterior-regularisation/prjava/Makefile b/gi/posterior-regularisation/prjava/Makefile index abd9b964..a16adcde 100644 --- a/gi/posterior-regularisation/prjava/Makefile +++ b/gi/posterior-regularisation/prjava/Makefile @@ -1,5 +1,5 @@ all: - ant + ant dist clean: ant clean diff --git a/gi/posterior-regularisation/prjava/src/arr/F.java b/gi/posterior-regularisation/prjava/src/arr/F.java index 54dadeac..79de5d1a 100644 --- a/gi/posterior-regularisation/prjava/src/arr/F.java +++ b/gi/posterior-regularisation/prjava/src/arr/F.java @@ -56,6 +56,7 @@ public class F { } public static double l1norm(double a[]){ + // FIXME: this isn't the l1 norm for a < 0 double norm=0; for(int i=0;i> tasks = new ArrayList>(); - //System.out.println("projectPoint: " + Arrays.toString(point)); + //System.out.println("\t\tprojectPoint: " + Arrays.toString(point)); Arrays.fill(newPoint, 0, newPoint.length, 0); - if (scalePT > 0) + // first project using the phrase-tag constraints, + // for all p,t: sum_c lambda_ptc < scaleP + if (pool == null) { - // first project using the phrase-tag constraints, - // for all p,t: sum_c lambda_ptc < scaleP - if (pool == null) + for (int p = 0; p < c.c.getNumPhrases(); ++p) { - for (int p = 0; p < c.c.getNumPhrases(); ++p) + List edges = c.c.getEdgesForPhrase(p); + double[] toProject = new double[edges.size()]; + for(int tag=0;tag edges = c.c.getEdgesForPhrase(p); - double[] toProject = new double[edges.size()]; - for(int tag=0;tag edges = c.c.getEdgesForPhrase(phrase); + double toProject[] = new double[edges.size()]; + for(int tag=0;tag edges = c.c.getEdgesForPhrase(phrase); - double toProject[] = new double[edges.size()]; - for(int tag=0;tag 1e-6) + // now project using the context-tag constraints, + // for all c,t: sum_p omega_pct < scaleC + if (pool == null) { - // now project using the context-tag constraints, - // for all c,t: sum_p omega_pct < scaleC - if (pool == null) + for (int ctx = 0; ctx < c.c.getNumContexts(); ++ctx) { - for (int ctx = 0; ctx < c.c.getNumContexts(); ++ctx) + List edges = c.c.getEdgesForContext(ctx); + double toProject[] = new double[edges.size()]; + for(int tag=0;tag edges = c.c.getEdgesForContext(ctx); - double toProject[] = new double[edges.size()]; - for(int tag=0;tag edges = c.c.getEdgesForContext(context); + double toProject[] = new double[edges.size()]; + for(int tag=0;tag edges = c.c.getEdgesForContext(context); - double toProject[] = new double[edges.size()]; - for(int tag=0;tag best: best = s ct_l1linf += best - return llh, kl, pt_l1linf, ct_l1linf, llh + kl + delta * pt_l1linf + gamma * ct_l1linf + return llh, kl, pt_l1linf, ct_l1linf, llh - kl - delta * pt_l1linf - gamma * ct_l1linf def dual_deriv(ls): # d/dl log(z) = E_q[phi] @@ -173,13 +175,13 @@ for iteration in range(20): scores[t] = conditionals[t] * exp(-ls[li] - ls[omega_offset + li]) local_z = sum(scores) + #print 'ddual', phrase, context, count, 'q =', scores / local_z + for t in range(num_tags): - if delta > 0: - deriv[lamba_index[phrase,context] + t] -= count * scores[t] / local_z - if gamma > 0: - deriv[omega_offset + lamba_index[phrase,context] + t] -= count * scores[t] / local_z + deriv[lamba_index[phrase,context] + t] -= count * scores[t] / local_z + deriv[omega_offset + lamba_index[phrase,context] + t] -= count * scores[t] / local_z - print 'ddual', list(deriv) + #print 'ddual', list(deriv) return deriv def constraints(ls): @@ -244,7 +246,7 @@ for iteration in range(20): print 'Post lambda optimisation dual', dual(lamba), 'primal', primal(lamba) # E-step - llh = z = 0 + llh = log_z = 0 for p, (phrase, ccs) in enumerate(edges_phrase_to_context): for context, count in ccs: conditionals = zeros(num_tags) @@ -257,20 +259,21 @@ for iteration in range(20): conditionals /= cz llh += log(cz) * count - scores = zeros(num_tags) + q = zeros(num_tags) li = lamba_index[phrase, context] for t in range(num_tags): - scores[t] = conditionals[t] * exp(-lamba[li + t] - lamba[omega_offset + li + t]) - z += count * sum(scores) + q[t] = conditionals[t] * exp(-lamba[li + t] - lamba[omega_offset + li + t]) + qz = sum(q) + log_z += count * log(qz) for t in range(num_tags): - tagCounts[p][t] += count * scores[t] + tagCounts[p][t] += count * q[t] / qz for i in range(4): for t in range(num_tags): - contextWordCounts[i][t][types[context[i]]] += count * scores[t] + contextWordCounts[i][t][types[context[i]]] += count * q[t] / qz - print 'iteration', iteration, 'llh', llh, 'logz', log(z) + print 'iteration', iteration, 'llh', llh, 'logz', log_z # M-step for p in range(num_phrases): -- cgit v1.2.3