diff options
author | Chris Dyer <cdyer@allegro.clab.cs.cmu.edu> | 2014-11-28 14:06:33 -0500 |
---|---|---|
committer | Chris Dyer <cdyer@allegro.clab.cs.cmu.edu> | 2014-11-28 14:06:33 -0500 |
commit | 2d684c9e3f5478d1aec7374824f94a5d6a761d07 (patch) | |
tree | e7d336d41a208a3e86dfe64ac89ab82373991778 | |
parent | 326ea26db4680219f801eeb6622dd2ee378e974b (diff) |
conll example in cdec
-rw-r--r-- | tests/system_tests/conll/README | 8 | ||||
-rw-r--r-- | tests/system_tests/conll/cdec.ini | 13 | ||||
-rw-r--r-- | tests/system_tests/conll/gold.statistics | 20 | ||||
-rw-r--r-- | tests/system_tests/conll/gold.stdout | 0 | ||||
-rw-r--r-- | tests/system_tests/conll/input.conll | 13 | ||||
-rw-r--r-- | tests/system_tests/conll/input.txt | 2 | ||||
-rw-r--r-- | tests/system_tests/conll/tagset.txt | 1 | ||||
-rw-r--r-- | tests/system_tests/conll/weights | 64 |
8 files changed, 121 insertions, 0 deletions
diff --git a/tests/system_tests/conll/README b/tests/system_tests/conll/README new file mode 100644 index 00000000..261e6a05 --- /dev/null +++ b/tests/system_tests/conll/README @@ -0,0 +1,8 @@ +To generate the input file, run: + + ~/cdec/corpus/conll2cdec.pl input.conll > input.txt + +This will create a training corpus (i.e., an input is present as well as +gold standard output is present) in input.txt. + +See cdec.ini for examples of how to include features in the model. diff --git a/tests/system_tests/conll/cdec.ini b/tests/system_tests/conll/cdec.ini new file mode 100644 index 00000000..f214857a --- /dev/null +++ b/tests/system_tests/conll/cdec.ini @@ -0,0 +1,13 @@ +formalism=tagger +tagger_tagset=tagset.txt + +# grab the second feature column from the conll input (-w 2) and +# create a feature of i-1,i-2 conjoined with y_i +feature_function=CoNLLFeatures -w 2 -t xxy:%x[-1]_%x[0]:%y[0] + +# grab the second feature column from the conll input (-w 2) and +# create a feature of i-1,i-2 conjoined with y_i +feature_function=CoNLLFeatures -w 1 -t xy:%x[0]:%y[0] + +intersection_strategy=full + diff --git a/tests/system_tests/conll/gold.statistics b/tests/system_tests/conll/gold.statistics new file mode 100644 index 00000000..17366689 --- /dev/null +++ b/tests/system_tests/conll/gold.statistics @@ -0,0 +1,20 @@ +-lm_nodes 12 +-lm_edges 24 +-lm_paths 729 ++lm_nodes 12 ++lm_edges 24 ++lm_paths 729 ++lm_trans O O O B I O +constr_nodes 12 +constr_edges 12 +constr_paths 1 +-lm_nodes 10 +-lm_edges 20 +-lm_paths 243 ++lm_nodes 10 ++lm_edges 20 ++lm_paths 243 ++lm_trans O B I I O +constr_nodes 10 +constr_edges 10 +constr_paths 1 diff --git a/tests/system_tests/conll/gold.stdout b/tests/system_tests/conll/gold.stdout new file mode 100644 index 00000000..e69de29b --- /dev/null +++ b/tests/system_tests/conll/gold.stdout diff --git a/tests/system_tests/conll/input.conll b/tests/system_tests/conll/input.conll new file mode 100644 index 00000000..507510ca --- /dev/null +++ b/tests/system_tests/conll/input.conll @@ -0,0 +1,13 @@ +the the DT O +angry angry JJ O +dog dog NN O +bit bite VBD B +me I PRN I +. . . O + +i i PRN O +ate eat VBD B +some some DT I +pie pie NN I +. . . O + diff --git a/tests/system_tests/conll/input.txt b/tests/system_tests/conll/input.txt new file mode 100644 index 00000000..6a1a0230 --- /dev/null +++ b/tests/system_tests/conll/input.txt @@ -0,0 +1,2 @@ +<seg id="0" feat1="the angry dog bite I ." feat2="DT JJ NN VBD PRN ."> the angry dog bit me . ||| O O O B I O </seg> +<seg id="1" feat1="i eat some pie ." feat2="PRN VBD DT NN ."> i ate some pie . ||| O B I I O </seg> diff --git a/tests/system_tests/conll/tagset.txt b/tests/system_tests/conll/tagset.txt new file mode 100644 index 00000000..bd0e6b60 --- /dev/null +++ b/tests/system_tests/conll/tagset.txt @@ -0,0 +1 @@ +B I O diff --git a/tests/system_tests/conll/weights b/tests/system_tests/conll/weights new file mode 100644 index 00000000..de130cb6 --- /dev/null +++ b/tests/system_tests/conll/weights @@ -0,0 +1,64 @@ +# Objective = 7.63544 (eval count=5) +xxy:<s>_DT:B -0.19295226006843877 +xy:the:B -0.19295226006843877 +xxy:<s>_DT:I -0.19295226006843877 +xy:the:I -0.19295226006843877 +xxy:<s>_DT:O 0.38590452013687793 +xy:the:O 0.38590452013687793 +xxy:DT_JJ:B -0.19295226006843877 +xy:angry:B -0.19295226006843877 +xxy:DT_JJ:I -0.19295226006843877 +xy:angry:I -0.19295226006843877 +xxy:DT_JJ:O 0.38590452013687793 +xy:angry:O 0.38590452013687793 +xxy:JJ_NN:B -0.19295226006843885 +xy:dog:B -0.19295226006843885 +xxy:JJ_NN:I -0.19295226006843885 +xy:dog:I -0.19295226006843885 +xxy:JJ_NN:O 0.38590452013687765 +xy:dog:O 0.38590452013687765 +xxy:NN_VBD:B 0.38590452013687765 +xy:bite:B 0.38590452013687765 +xxy:NN_VBD:I -0.19295226006843885 +xy:bite:I -0.19295226006843885 +xxy:NN_VBD:O -0.19295226006843885 +xy:bite:O -0.19295226006843885 +xxy:VBD_PRN:B -0.19295226006843885 +xy:I:B -0.19295226006843885 +xxy:VBD_PRN:I 0.38590452013687765 +xy:I:I 0.38590452013687765 +xxy:VBD_PRN:O -0.19295226006843885 +xy:I:O -0.19295226006843885 +xxy:PRN_.:B -0.16038191506717553 +xy:.:B -0.32076383013435106 +xxy:PRN_.:I -0.16038191506717553 +xy:.:I -0.32076383013435106 +xxy:PRN_.:O 0.32076383013435134 +xy:.:O 0.64152766026870267 +xxy:<s>_PRN:B -0.19295226006843871 +xy:i:B -0.19295226006843871 +xxy:<s>_PRN:I -0.19295226006843871 +xy:i:I -0.19295226006843871 +xxy:<s>_PRN:O 0.38590452013687804 +xy:i:O 0.38590452013687804 +xxy:PRN_VBD:B 0.38590452013687804 +xy:eat:B 0.38590452013687804 +xxy:PRN_VBD:I -0.19295226006843871 +xy:eat:I -0.19295226006843871 +xxy:PRN_VBD:O -0.19295226006843871 +xy:eat:O -0.19295226006843871 +xxy:VBD_DT:B -0.19295226006843877 +xy:some:B -0.19295226006843877 +xxy:VBD_DT:I 0.38590452013687798 +xy:some:I 0.38590452013687798 +xxy:VBD_DT:O -0.19295226006843877 +xy:some:O -0.19295226006843877 +xxy:DT_NN:B -0.19295226006843877 +xy:pie:B -0.19295226006843877 +xxy:DT_NN:I 0.38590452013687798 +xy:pie:I 0.38590452013687798 +xxy:DT_NN:O -0.19295226006843877 +xy:pie:O -0.19295226006843877 +xxy:NN_.:B -0.16038191506717553 +xxy:NN_.:I -0.16038191506717553 +xxy:NN_.:O 0.32076383013435134 |