diff options
author | Patrick Simianer <p@simianer.de> | 2014-07-15 18:19:00 +0200 |
---|---|---|
committer | Patrick Simianer <p@simianer.de> | 2014-07-15 18:19:00 +0200 |
commit | e207b43bf179277d53b3adc767372469bd8a2ad2 (patch) | |
tree | 6cfab61e60bba7515b2872448b4fc5f2b860d822 /util/cdec2json.py | |
parent | 567f2bd17c05d31cd8a9b9d351f9030cddf53cb7 (diff) |
before msgpack
Diffstat (limited to 'util/cdec2json.py')
-rwxr-xr-x | util/cdec2json.py | 19 |
1 files changed, 14 insertions, 5 deletions
diff --git a/util/cdec2json.py b/util/cdec2json.py index 6cebd70..ac468ca 100755 --- a/util/cdec2json.py +++ b/util/cdec2json.py @@ -2,6 +2,9 @@ import cdec import sys, argparse +import json +import gzip + #FIXME new format def hg2json(hg, weights): @@ -20,7 +23,7 @@ def hg2json(hg, weights): res += '"nodes":'+"\n" res += "[\n" a = [] - a.append( '{ "id":-1, "cat":"root", "span":[-1,-1] }' ) + a.append( '{ "id":0, "cat":"root", "span":[-1,-1] }' ) for i in hg.nodes: a.append('{ "id":%d, "cat":"%s", "span":[%d,%d] }'%(i.id, i.cat, i.span[0], i.span[1])) res += ",\n".join(a)+"\n" @@ -31,7 +34,7 @@ def hg2json(hg, weights): for i in hg.edges: s = "{" s += '"head":%d'%(i.head_node.id) - s += ', "rule":"%s"'%(i.trule) + s += ', "rule":%s'%(json.dumps(str(i.trule))) # f xs = ' "f":{' b = [] @@ -41,9 +44,9 @@ def hg2json(hg, weights): xs += "}," # tails if len(list(i.tail_nodes)) > 0: - s += ', "tails":[ %s ],'%(",".join([str(n.id) for n in i.tail_nodes])) + s += ', "tails":[ %s ],'%(",".join([str(n.id+1) for n in i.tail_nodes])) else: - s += ', "tails":[ -1 ],' + s += ', "tails":[ 0 ],' s += xs s += ' "weight":%s }'%(i.prob) a.append(s) @@ -56,13 +59,19 @@ def main(): parser = argparse.ArgumentParser(description='get a proper json representation of cdec hypergraphs') parser.add_argument('-c', '--config', required=True, help='decoder configuration') parser.add_argument('-w', '--weights', required=True, help='feature weights') + parser.add_argument('-g', '--grammar', required=False, help='grammar') args = parser.parse_args() with open(args.config) as config: config = config.read() decoder = cdec.Decoder(config) decoder.read_weights(args.weights) ins = sys.stdin.readline().strip() - hg = decoder.translate(ins) + if args.grammar: + with gzip.open(args.grammar) as grammar: + grammar = grammar.read() + hg = decoder.translate(ins, grammar=grammar) + else: + hg = decoder.translate(ins) sys.stderr.write( "input:\n '%s'\n"%(ins) ) sys.stderr.write( "viterbi translation:\n '%s'\n"%(hg.viterbi()) ) |