diff options
-rwxr-xr-x | hg2json.py | 81 | ||||
-rw-r--r-- | test/hg2json/cdec.ini | 5 | ||||
-rw-r--r-- | test/hg2json/grammar.gz | bin | 0 -> 1399915 bytes | |||
-rw-r--r-- | test/hg2json/hg.json.gz | bin | 0 -> 318029 bytes | |||
-rw-r--r-- | test/hg2json/hg.meta | 7 | ||||
-rw-r--r-- | test/hg2json/in | 1 | ||||
-rw-r--r-- | test/hg2json/toy.cdec.ini | 2 | ||||
-rw-r--r-- | test/hg2json/toy.grammar | 12 | ||||
-rw-r--r-- | test/hg2json/toy.in | 1 | ||||
-rw-r--r-- | test/hg2json/toy.weights | 3 | ||||
-rw-r--r-- | test/hg2json/weights | 17 |
11 files changed, 129 insertions, 0 deletions
diff --git a/hg2json.py b/hg2json.py new file mode 100755 index 0000000..5bd5c2c --- /dev/null +++ b/hg2json.py @@ -0,0 +1,81 @@ +#!/usr/bin/env python2 + +import cdec +import sys, argparse + +def hg2json(hg, weights): + """ + output a JSON representation of a cdec hypegraph + (see http://aclweb.org/aclwiki/index.php?title=Hypergraph_Format ) + """ + res = '' + res += "{\n" + res += '"weights":{'+"\n" + a = [] + for i in weights: + a.append( '"%s":%s'%(i[0], i[1]) ) + res += ", ".join(a)+"\n" + res += "},\n" + res += '"nodes":'+"\n" + res += "[\n" + a = [] + a.append( '{ "label":"root", "cat":"root" }' ) + for i in hg.nodes: + a.append( '{ "label":"%s", "cat":"%s" }'%(i.id, i.cat) ) + res += ",\n".join(a)+"\n" + res += "],\n" + res += '"edges":'+"\n" + res += "[\n" + a = [] + for i in hg.edges: + s = "{" + s += '"head":"%s"'%(i.head_node.id) + xs = ' "f":{' + b = [] + for j in i.feature_values: + b.append( '"%s":%s'%(j[0], j[1]) ) + xs += ", ".join(b) + xs += "}," + c = [] + for j in i.tail_nodes: + c.append( '"'+str(j.id)+'"' ) + if len(c) > 0: + s += ', "tails":[ %s ],'%(",".join(c)) + else: + s += ', "tails":[ "root" ],' + s += xs + s += ' "weight":%s }'%(i.prob) + a.append(s) + res += ",\n".join(a)+"\n" + res += "]\n" + res += "}\n" + return res + +def main(): + parser = argparse.ArgumentParser(description='get a proper json representation of cdec hypergraphs') + parser.add_argument('-c', '--config', required=True, help='decoder configuration') + parser.add_argument('-w', '--weights', required=True, help='feature weights') + args = parser.parse_args() + with open(args.config) as config: + config = config.read() + decoder = cdec.Decoder(config) + decoder.read_weights(args.weights) + ins = sys.stdin.readline().strip() + hg = decoder.translate(ins) + + sys.stderr.write( "input:\n '%s'\n"%(ins) ) + sys.stderr.write( "viterbi translation:\n '%s'\n"%(hg.viterbi()) ) + num_nodes = 0 + for i in hg.nodes: num_nodes+=1 + sys.stderr.write( "# nodes = %s\n"%(num_nodes) ) + num_edges = 0 + for i in hg.edges: num_edges+=1 + sys.stderr.write( "# edges = %s\n"%(num_edges) ) + sys.stderr.write( "viterbi score = %s\n"%(round(hg.viterbi_features().dot(decoder.weights), 2)) ) + + print hg2json(hg, decoder.weights) + + +if __name__=="__main__": + main() + diff --git a/test/hg2json/cdec.ini b/test/hg2json/cdec.ini new file mode 100644 index 0000000..1ad25b5 --- /dev/null +++ b/test/hg2json/cdec.ini @@ -0,0 +1,5 @@ +formalism=scfg +grammar=test/hg2json/grammar.gz +add_pass_through_rules=true +feature_function=WordPenalty +intersection_strategy=full diff --git a/test/hg2json/grammar.gz b/test/hg2json/grammar.gz Binary files differnew file mode 100644 index 0000000..78dda98 --- /dev/null +++ b/test/hg2json/grammar.gz diff --git a/test/hg2json/hg.json.gz b/test/hg2json/hg.json.gz Binary files differnew file mode 100644 index 0000000..ed178c6 --- /dev/null +++ b/test/hg2json/hg.json.gz diff --git a/test/hg2json/hg.meta b/test/hg2json/hg.meta new file mode 100644 index 0000000..d33a54c --- /dev/null +++ b/test/hg2json/hg.meta @@ -0,0 +1,7 @@ +input: + 'in dem verzweifelten versuch , an der macht festzuhalten , hat pervez musharraf den rahmen der pakistanischen verfassung verlassen und den notstand ausgerufen .' +viterbi translation: + 'which labor market desperate transformed into attempting gathered by failed to show any the non - is making festzuhalten gathered by pervez musharraf meant to its borders with within than the non - have pakistan 's intelligence relied constitutional for security as a its borders with declared a state of emergency - range missiles .' +# nodes = 220 +# edges = 16640 +viterbi score = 228.95 diff --git a/test/hg2json/in b/test/hg2json/in new file mode 100644 index 0000000..7dc411d --- /dev/null +++ b/test/hg2json/in @@ -0,0 +1 @@ +in dem verzweifelten versuch , an der macht festzuhalten , hat pervez musharraf den rahmen der pakistanischen verfassung verlassen und den notstand ausgerufen . diff --git a/test/hg2json/toy.cdec.ini b/test/hg2json/toy.cdec.ini new file mode 100644 index 0000000..d4a2896 --- /dev/null +++ b/test/hg2json/toy.cdec.ini @@ -0,0 +1,2 @@ +formalism=scfg +grammar=test/hg2json/toy.grammar diff --git a/test/hg2json/toy.grammar b/test/hg2json/toy.grammar new file mode 100644 index 0000000..382c94f --- /dev/null +++ b/test/hg2json/toy.grammar @@ -0,0 +1,12 @@ +[S] ||| [NP,1] [VP,2] ||| [1] [2] ||| logp=0 +[NP] ||| ich ||| i ||| logp=-0.5 use_i=1.0 +[NP] ||| ein [NN,1] ||| a [1] ||| logp=0 use_a=1.0 +[NN] ||| [JJ,1] haus ||| [1] house ||| logp=0 use_house=1 +[NN] ||| [JJ,1] haus ||| [1] shell ||| logp=0 use_shell=1 +[JJ] ||| kleines ||| small ||| logp=0 use_small=1.0 +[JJ] ||| kleines ||| little ||| logp=0 use_little=1.0 +[JJ] ||| grosses ||| big ||| logp=0 +[JJ] ||| grosses ||| large ||| logp=0 +[VP] ||| [V,1] [NP,2] ||| [1] [2] ||| logp=0 +[V] ||| sah ||| saw ||| logp=-0.25 use_saw=1.0 +[V] ||| fand ||| found ||| logp=0 diff --git a/test/hg2json/toy.in b/test/hg2json/toy.in new file mode 100644 index 0000000..e6df927 --- /dev/null +++ b/test/hg2json/toy.in @@ -0,0 +1 @@ +ich sah ein kleines haus diff --git a/test/hg2json/toy.weights b/test/hg2json/toy.weights new file mode 100644 index 0000000..70075b7 --- /dev/null +++ b/test/hg2json/toy.weights @@ -0,0 +1,3 @@ +logp 2 +use_house 0 +use_shell 1 diff --git a/test/hg2json/weights b/test/hg2json/weights new file mode 100644 index 0000000..7f96f1d --- /dev/null +++ b/test/hg2json/weights @@ -0,0 +1,17 @@ +PhraseModel_0 1.0 +PhraseModel_1 1.0 +PhraseModel_2 1.0 +PhraseModel_3 1.0 +PhraseModel_4 1.0 +PhraseModel_5 1.0 +PhraseModel_6 1.0 +PassThrough -1.0 +PassThrough_1 -1.0 +PassThrough_2 -1.0 +PassThrough_3 -1.0 +PassThrough_4 -1.0 +PassThrough_5 -1.0 +PassThrough_6 -1.0 +Glue 0.1 +LanguageModel 10.0 +LanguageModel_OOV -10 |