summaryrefslogtreecommitdiff
path: root/example/cdec2json.py
diff options
context:
space:
mode:
Diffstat (limited to 'example/cdec2json.py')
-rwxr-xr-xexample/cdec2json.py100
1 files changed, 0 insertions, 100 deletions
diff --git a/example/cdec2json.py b/example/cdec2json.py
deleted file mode 100755
index cd2a846..0000000
--- a/example/cdec2json.py
+++ /dev/null
@@ -1,100 +0,0 @@
-#!/usr/bin/env python2
-
-import cdec
-import sys, argparse
-import json
-import gzip
-
-
-#FIXME new format
-# strings?
-# map?
-def hg2json(hg, weights):
- """
- output a JSON representation of a cdec hypegraph
- """
- res = ''
- res += "{\n"
- res += '"rules":[\n'
- rules = []
- for i in hg.edges:
- s = json.dumps(str(i.trule))
- try:
- rules.index(s)
- except:
- rules.append(s)
- res += ",\n".join(rules)
- res += "\n],\n"
- res += '"nodes":'+"\n"
- res += "[\n"
- a = []
- a.append( '{ "id":0, "symbol":"root", "span":[-1,-1] }' )
- for i in hg.nodes:
- a.append('{ "id":%d, "symbol":"%s", "span":[%d,%d] }'%(i.id+1, i.cat, i.span[0], i.span[1]))
- res += ",\n".join(a)+"\n"
- res += "],\n"
- res += '"edges":'+"\n"
- res += "[\n"
- a = []
- for i in hg.edges:
- s = "{"
- s += '"head":%d'%(i.head_node.id+1)
- s += ', "rule":%s'%(rules.index(json.dumps(str(i.trule))))
- # f
- #xs = ' "f":{'
- #b = []
- #for j in i.feature_values:
- # b.append( '"%s":%s'%(j[0], j[1]) )
- #xs += ", ".join(b)
- #xs += "},"
- # tails
- if len(list(i.tail_nodes)) > 0:
- s += ', "tails":[ %s ],'%(",".join([str(n.id+1) for n in i.tail_nodes]))
- else:
- s += ', "tails":[ 0 ],'
- #s += xs
- s += ' "score":%s }'%(i.prob)
- a.append(s)
- res += ",\n".join(a)+"\n"
- res += "]\n"
- res += "}\n"
- return res
-
-def main():
- parser = argparse.ArgumentParser(description='get a proper json representation of cdec hypergraphs')
- parser.add_argument('-c', '--config', required=True, help='decoder configuration')
- parser.add_argument('-w', '--weights', required=True, help='feature weights')
- parser.add_argument('-g', '--grammar', required=False, help='grammar')
- args = parser.parse_args()
- with open(args.config) as config:
- config = config.read()
- decoder = cdec.Decoder(config)
- decoder.read_weights(args.weights)
- ins = sys.stdin.readline().strip()
- if args.grammar:
- if args.grammar.split('.')[-1] == 'gz':
- with gzip.open(args.grammar) as grammar:
- grammar = grammar.read()
- else:
- with open(args.grammar) as grammar:
- grammar = grammar.read()
- hg = decoder.translate(ins, grammar=grammar)
- else:
- hg = decoder.translate(ins)
-
- sys.stderr.write( "input:\n '%s'\n"%(ins) )
- sys.stderr.write( "viterbi translation:\n '%s'\n"%(hg.viterbi()) )
- num_nodes = 0
- for i in hg.nodes: num_nodes+=1
- sys.stderr.write( "# nodes = %s\n"%(num_nodes) )
- num_edges = 0
- for i in hg.edges: num_edges+=1
- sys.stderr.write( "# edges = %s\n"%(num_edges) )
- sys.stderr.write( "viterbi score = %s\n"%(round(hg.viterbi_features().dot(decoder.weights), 2)) )
-
- print hg2json(hg, decoder.weights)
-
-
-if __name__=="__main__":
- main()
-