diff options
author | Kenneth Heafield <github@kheafield.com> | 2012-08-03 07:46:54 -0400 |
---|---|---|
committer | Kenneth Heafield <github@kheafield.com> | 2012-08-03 07:46:54 -0400 |
commit | be1ab0a8937f9c5668ea5e6c31b798e87672e55e (patch) | |
tree | a13aad60ab6cced213401bce6a38ac885ba171ba /python/test.py | |
parent | e5d6f4ae41009c26978ecd62668501af9762b0bc (diff) | |
parent | 9fe0219562e5db25171cce8776381600ff9a5649 (diff) |
Merge branch 'master' of github.com:redpony/cdec
Diffstat (limited to 'python/test.py')
-rw-r--r-- | python/test.py | 37 |
1 files changed, 29 insertions, 8 deletions
diff --git a/python/test.py b/python/test.py index 1542dd4f..eb9e6a95 100644 --- a/python/test.py +++ b/python/test.py @@ -2,12 +2,11 @@ import cdec import gzip -config = 'formalism=scfg' weights = '../tests/system_tests/australia/weights' grammar_file = '../tests/system_tests/australia/australia.scfg.gz' # Load decoder width configuration -decoder = cdec.Decoder(config) +decoder = cdec.Decoder(formalism='scfg') # Read weights decoder.read_weights(weights) @@ -19,24 +18,36 @@ with gzip.open(grammar_file) as f: # Input sentence sentence = u'澳洲 是 与 北韩 有 邦交 的 少数 国家 之一 。' -print 'Input:', sentence +print ' Input:', sentence.encode('utf8') # Decode forest = decoder.translate(sentence, grammar=grammar) # Get viterbi translation print 'Output[0]:', forest.viterbi().encode('utf8') -print ' Tree[0]:', forest.viterbi_tree().encode('utf8') +f_tree, e_tree = forest.viterbi_trees() +print ' FTree[0]:', f_tree.encode('utf8') +print ' ETree[0]:', e_tree.encode('utf8') +print 'LgProb[0]:', forest.viterbi_features().dot(decoder.weights) # Get k-best translations -for i, (sentence, tree) in enumerate(zip(forest.kbest(5), forest.kbest_tree(5)), 1): +kbest = zip(forest.kbest(5), forest.kbest_trees(5), forest.kbest_features(5)) +for i, (sentence, (f_tree, e_tree), features) in enumerate(kbest, 1): print 'Output[%d]:' % i, sentence.encode('utf8') - print ' Tree[%d]:' % i, tree.encode('utf8') + print ' FTree[%d]:' % i, f_tree.encode('utf8') + print ' ETree[%d]:' % i, e_tree.encode('utf8') + print ' FVect[%d]:' % i, dict(features) # Sample translations from the forest for sentence in forest.sample(5): print 'Sample:', sentence.encode('utf8') +# Get feature vector for 1best +fsrc = forest.viterbi_features() + +# Feature expectations +print 'Feature expectations:', dict(forest.inside_outside()) + # Reference lattice lattice = ((('australia',0,1),),(('is',0,1),),(('one',0,1),),(('of',0,1),),(('the',0,4),('a',0,4),('a',0,1),('the',0,1),),(('small',0,1),('tiny',0,1),('miniscule',0,1),('handful',0,2),),(('number',0,1),('group',0,1),),(('of',0,2),),(('few',0,1),),(('countries',0,1),),(('that',0,1),),(('has',0,1),('have',0,1),),(('diplomatic',0,1),),(('relations',0,1),),(('with',0,1),),(('north',0,1),),(('korea',0,1),),(('.',0,1),),) @@ -44,6 +55,16 @@ lat = cdec.Lattice(lattice) assert (lattice == tuple(lat)) # Intersect forest and lattice -forest.intersect(lat) +assert forest.intersect(lat) + # Get best synchronous parse -print forest.viterbi_tree() +f_tree, e_tree = forest.viterbi_trees() +print 'FTree:', f_tree.encode('utf8') +print 'ETree:', e_tree.encode('utf8') + +# Compare 1best and reference feature vectors +fref = forest.viterbi_features() +print dict(fsrc - fref) + +# Prune hypergraph +forest.prune(density=100) |