Verified that the phrase-tag L1Lmax constraints can be optimised independently for each phrase.

git-svn-id: https://ws10smt.googlecode.com/svn/trunk@86 ec762483-ff6d-05da-a07a-a48fb63a330f
author: trevor.cohn <trevor.cohn@ec762483-ff6d-05da-a07a-a48fb63a330f> 2010-07-01 01:30:16 +0000
committer: trevor.cohn <trevor.cohn@ec762483-ff6d-05da-a07a-a48fb63a330f> 2010-07-01 01:30:16 +0000
commit: 6ea9bd3aa600bce5224fa97dae79bee6f40699a2 (patch)
tree: c4749b21990697bdd0533dab01c18611e6cc5a90 /gi/posterior-regularisation/train_pr_parallel.py
parent: eb9fc0e76ff9708783bd9483a68e28d70fb2bcd2 (diff)
1 files changed, 13 insertions, 5 deletions
diff --git a/gi/posterior-regularisation/train_pr_parallel.py b/gi/posterior-regularisation/train_pr_parallel.py
index 72c5cf25..d5df87b5 100644
--- a/gi/posterior-regularisation/train_pr_parallel.py
+++ b/gi/posterior-regularisation/train_pr_parallel.py
@@ -1,7 +1,7 @@
 import sys
 import scipy.optimize
 from numpy import *
-from numpy.random import random
+from numpy.random import random, seed
 
 #
 # Step 1: load the concordance counts
@@ -48,6 +48,10 @@ num_types = len(types)
 num_phrases = len(edges_phrase_to_context)
 num_contexts = len(edges_context_to_phrase)
 delta = float(sys.argv[1])
+assert sys.argv[2] in ('local', 'global')
+local = sys.argv[2] == 'local'
+if len(sys.argv) >= 2:
+     seed(int(sys.argv[3]))
 
 def normalise(a):
     return a / float(sum(a))
@@ -267,7 +271,7 @@ for iteration in range(20):
 
     # E-step
     llh = kl = l1lmax = 0
-    if False:
+    if local:
         for p in range(num_phrases):
             o = LocalDualObjective(p, delta)
             #print '\toptimising lambda for phrase', p, '=', edges_phrase_to_context[p][0]
@@ -284,18 +288,22 @@ for iteration in range(20):
                 for i in range(4):
                     for t in range(num_tags):
                         contextWordCounts[i][t][types[context[i]]] += count * o.q[j,t]
+
+        #print 'iteration', iteration, 'LOCAL objective', (llh + kl + delta * l1lmax), 'llh', llh, 'kl', kl, 'l1lmax', l1lmax
     else:
         o = GlobalDualObjective(delta)
         obj = o.optimize()
         llh, kl, l1lmax = o.optimize()
 
+        index = 0
         for p, (phrase, edges) in enumerate(edges_phrase_to_context):
-            for j, (context, count) in enumerate(edges):
+            for context, count in edges:
                 for t in range(num_tags):
-                    tagCounts[p][t] += count * o.q[j,t]
+                    tagCounts[p][t] += count * o.q[index,t]
                 for i in range(4):
                     for t in range(num_tags):
-                        contextWordCounts[i][t][types[context[i]]] += count * o.q[j,t]
+                        contextWordCounts[i][t][types[context[i]]] += count * o.q[index,t]
+                index += 1
 
     print 'iteration', iteration, 'objective', (llh + kl + delta * l1lmax), 'llh', llh, 'kl', kl, 'l1lmax', l1lmax
author	trevor.cohn <trevor.cohn@ec762483-ff6d-05da-a07a-a48fb63a330f>	2010-07-01 01:30:16 +0000
committer	trevor.cohn <trevor.cohn@ec762483-ff6d-05da-a07a-a48fb63a330f>	2010-07-01 01:30:16 +0000
commit	6ea9bd3aa600bce5224fa97dae79bee6f40699a2 (patch)
tree	c4749b21990697bdd0533dab01c18611e6cc5a90 /gi/posterior-regularisation/train_pr_parallel.py
parent	eb9fc0e76ff9708783bd9483a68e28d70fb2bcd2 (diff)