Merge branch 'master' of github.com:redpony/cdec

author: Kenneth Heafield <github@kheafield.com> 2012-08-03 07:46:54 -0400
committer: Kenneth Heafield <github@kheafield.com> 2012-08-03 07:46:54 -0400
commit: be1ab0a8937f9c5668ea5e6c31b798e87672e55e (patch)
tree: a13aad60ab6cced213401bce6a38ac885ba171ba /python/test.py
parent: e5d6f4ae41009c26978ecd62668501af9762b0bc (diff)
parent: 9fe0219562e5db25171cce8776381600ff9a5649 (diff)
1 files changed, 29 insertions, 8 deletions
diff --git a/python/test.py b/python/test.py
index 1542dd4f..eb9e6a95 100644
--- a/python/test.py
+++ b/python/test.py
@@ -2,12 +2,11 @@
 import cdec
 import gzip
 
-config = 'formalism=scfg'
 weights = '../tests/system_tests/australia/weights'
 grammar_file = '../tests/system_tests/australia/australia.scfg.gz'
 
 # Load decoder width configuration
-decoder = cdec.Decoder(config)
+decoder = cdec.Decoder(formalism='scfg')
 # Read weights
 decoder.read_weights(weights)
 
@@ -19,24 +18,36 @@ with gzip.open(grammar_file) as f:
 
 # Input sentence
 sentence = u'澳洲 是 与 北韩 有 邦交 的 少数 国家 之一 。'
-print 'Input:', sentence
+print '    Input:', sentence.encode('utf8')
 
 # Decode
 forest = decoder.translate(sentence, grammar=grammar)
 
 # Get viterbi translation
 print 'Output[0]:', forest.viterbi().encode('utf8')
-print '  Tree[0]:', forest.viterbi_tree().encode('utf8')
+f_tree, e_tree = forest.viterbi_trees()
+print ' FTree[0]:', f_tree.encode('utf8')
+print ' ETree[0]:', e_tree.encode('utf8')
+print 'LgProb[0]:', forest.viterbi_features().dot(decoder.weights)
 
 # Get k-best translations
-for i, (sentence, tree) in enumerate(zip(forest.kbest(5), forest.kbest_tree(5)), 1):
+kbest = zip(forest.kbest(5), forest.kbest_trees(5), forest.kbest_features(5))
+for i, (sentence, (f_tree, e_tree), features) in enumerate(kbest, 1):
     print 'Output[%d]:' % i, sentence.encode('utf8')
-    print '  Tree[%d]:' % i, tree.encode('utf8')
+    print ' FTree[%d]:' % i, f_tree.encode('utf8')
+    print ' ETree[%d]:' % i, e_tree.encode('utf8')
+    print ' FVect[%d]:' % i, dict(features)
 
 # Sample translations from the forest
 for sentence in forest.sample(5):
     print 'Sample:', sentence.encode('utf8')
 
+# Get feature vector for 1best
+fsrc = forest.viterbi_features()
+
+# Feature expectations
+print 'Feature expectations:', dict(forest.inside_outside())
+
 # Reference lattice
 lattice = ((('australia',0,1),),(('is',0,1),),(('one',0,1),),(('of',0,1),),(('the',0,4),('a',0,4),('a',0,1),('the',0,1),),(('small',0,1),('tiny',0,1),('miniscule',0,1),('handful',0,2),),(('number',0,1),('group',0,1),),(('of',0,2),),(('few',0,1),),(('countries',0,1),),(('that',0,1),),(('has',0,1),('have',0,1),),(('diplomatic',0,1),),(('relations',0,1),),(('with',0,1),),(('north',0,1),),(('korea',0,1),),(('.',0,1),),)
 
@@ -44,6 +55,16 @@ lat = cdec.Lattice(lattice)
 assert (lattice == tuple(lat))
 
 # Intersect forest and lattice
-forest.intersect(lat)
+assert forest.intersect(lat)
+
 # Get best synchronous parse
-print forest.viterbi_tree()
+f_tree, e_tree = forest.viterbi_trees()
+print 'FTree:', f_tree.encode('utf8')
+print 'ETree:', e_tree.encode('utf8')
+
+# Compare 1best and reference feature vectors
+fref = forest.viterbi_features()
+print dict(fsrc - fref)
+
+# Prune hypergraph
+forest.prune(density=100)
author	Kenneth Heafield <github@kheafield.com>	2012-08-03 07:46:54 -0400
committer	Kenneth Heafield <github@kheafield.com>	2012-08-03 07:46:54 -0400
commit	be1ab0a8937f9c5668ea5e6c31b798e87672e55e (patch)
tree	a13aad60ab6cced213401bce6a38ac885ba171ba /python/test.py
parent	e5d6f4ae41009c26978ecd62668501af9762b0bc (diff)
parent	9fe0219562e5db25171cce8776381600ff9a5649 (diff)