From 414e902ea252a77cd7d4f48132d3bd194e507cfd Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Fri, 28 Nov 2014 14:06:33 -0500 Subject: conll example in cdec --- tests/system_tests/conll/README | 8 ++++ tests/system_tests/conll/cdec.ini | 13 +++++++ tests/system_tests/conll/gold.statistics | 20 ++++++++++ tests/system_tests/conll/gold.stdout | 0 tests/system_tests/conll/input.conll | 13 +++++++ tests/system_tests/conll/input.txt | 2 + tests/system_tests/conll/tagset.txt | 1 + tests/system_tests/conll/weights | 64 ++++++++++++++++++++++++++++++++ 8 files changed, 121 insertions(+) create mode 100644 tests/system_tests/conll/README create mode 100644 tests/system_tests/conll/cdec.ini create mode 100644 tests/system_tests/conll/gold.statistics create mode 100644 tests/system_tests/conll/gold.stdout create mode 100644 tests/system_tests/conll/input.conll create mode 100644 tests/system_tests/conll/input.txt create mode 100644 tests/system_tests/conll/tagset.txt create mode 100644 tests/system_tests/conll/weights diff --git a/tests/system_tests/conll/README b/tests/system_tests/conll/README new file mode 100644 index 00000000..261e6a05 --- /dev/null +++ b/tests/system_tests/conll/README @@ -0,0 +1,8 @@ +To generate the input file, run: + + ~/cdec/corpus/conll2cdec.pl input.conll > input.txt + +This will create a training corpus (i.e., an input is present as well as +gold standard output is present) in input.txt. + +See cdec.ini for examples of how to include features in the model. diff --git a/tests/system_tests/conll/cdec.ini b/tests/system_tests/conll/cdec.ini new file mode 100644 index 00000000..f214857a --- /dev/null +++ b/tests/system_tests/conll/cdec.ini @@ -0,0 +1,13 @@ +formalism=tagger +tagger_tagset=tagset.txt + +# grab the second feature column from the conll input (-w 2) and +# create a feature of i-1,i-2 conjoined with y_i +feature_function=CoNLLFeatures -w 2 -t xxy:%x[-1]_%x[0]:%y[0] + +# grab the second feature column from the conll input (-w 2) and +# create a feature of i-1,i-2 conjoined with y_i +feature_function=CoNLLFeatures -w 1 -t xy:%x[0]:%y[0] + +intersection_strategy=full + diff --git a/tests/system_tests/conll/gold.statistics b/tests/system_tests/conll/gold.statistics new file mode 100644 index 00000000..17366689 --- /dev/null +++ b/tests/system_tests/conll/gold.statistics @@ -0,0 +1,20 @@ +-lm_nodes 12 +-lm_edges 24 +-lm_paths 729 ++lm_nodes 12 ++lm_edges 24 ++lm_paths 729 ++lm_trans O O O B I O +constr_nodes 12 +constr_edges 12 +constr_paths 1 +-lm_nodes 10 +-lm_edges 20 +-lm_paths 243 ++lm_nodes 10 ++lm_edges 20 ++lm_paths 243 ++lm_trans O B I I O +constr_nodes 10 +constr_edges 10 +constr_paths 1 diff --git a/tests/system_tests/conll/gold.stdout b/tests/system_tests/conll/gold.stdout new file mode 100644 index 00000000..e69de29b diff --git a/tests/system_tests/conll/input.conll b/tests/system_tests/conll/input.conll new file mode 100644 index 00000000..507510ca --- /dev/null +++ b/tests/system_tests/conll/input.conll @@ -0,0 +1,13 @@ +the the DT O +angry angry JJ O +dog dog NN O +bit bite VBD B +me I PRN I +. . . O + +i i PRN O +ate eat VBD B +some some DT I +pie pie NN I +. . . O + diff --git a/tests/system_tests/conll/input.txt b/tests/system_tests/conll/input.txt new file mode 100644 index 00000000..6a1a0230 --- /dev/null +++ b/tests/system_tests/conll/input.txt @@ -0,0 +1,2 @@ + the angry dog bit me . ||| O O O B I O + i ate some pie . ||| O B I I O diff --git a/tests/system_tests/conll/tagset.txt b/tests/system_tests/conll/tagset.txt new file mode 100644 index 00000000..bd0e6b60 --- /dev/null +++ b/tests/system_tests/conll/tagset.txt @@ -0,0 +1 @@ +B I O diff --git a/tests/system_tests/conll/weights b/tests/system_tests/conll/weights new file mode 100644 index 00000000..de130cb6 --- /dev/null +++ b/tests/system_tests/conll/weights @@ -0,0 +1,64 @@ +# Objective = 7.63544 (eval count=5) +xxy:_DT:B -0.19295226006843877 +xy:the:B -0.19295226006843877 +xxy:_DT:I -0.19295226006843877 +xy:the:I -0.19295226006843877 +xxy:_DT:O 0.38590452013687793 +xy:the:O 0.38590452013687793 +xxy:DT_JJ:B -0.19295226006843877 +xy:angry:B -0.19295226006843877 +xxy:DT_JJ:I -0.19295226006843877 +xy:angry:I -0.19295226006843877 +xxy:DT_JJ:O 0.38590452013687793 +xy:angry:O 0.38590452013687793 +xxy:JJ_NN:B -0.19295226006843885 +xy:dog:B -0.19295226006843885 +xxy:JJ_NN:I -0.19295226006843885 +xy:dog:I -0.19295226006843885 +xxy:JJ_NN:O 0.38590452013687765 +xy:dog:O 0.38590452013687765 +xxy:NN_VBD:B 0.38590452013687765 +xy:bite:B 0.38590452013687765 +xxy:NN_VBD:I -0.19295226006843885 +xy:bite:I -0.19295226006843885 +xxy:NN_VBD:O -0.19295226006843885 +xy:bite:O -0.19295226006843885 +xxy:VBD_PRN:B -0.19295226006843885 +xy:I:B -0.19295226006843885 +xxy:VBD_PRN:I 0.38590452013687765 +xy:I:I 0.38590452013687765 +xxy:VBD_PRN:O -0.19295226006843885 +xy:I:O -0.19295226006843885 +xxy:PRN_.:B -0.16038191506717553 +xy:.:B -0.32076383013435106 +xxy:PRN_.:I -0.16038191506717553 +xy:.:I -0.32076383013435106 +xxy:PRN_.:O 0.32076383013435134 +xy:.:O 0.64152766026870267 +xxy:_PRN:B -0.19295226006843871 +xy:i:B -0.19295226006843871 +xxy:_PRN:I -0.19295226006843871 +xy:i:I -0.19295226006843871 +xxy:_PRN:O 0.38590452013687804 +xy:i:O 0.38590452013687804 +xxy:PRN_VBD:B 0.38590452013687804 +xy:eat:B 0.38590452013687804 +xxy:PRN_VBD:I -0.19295226006843871 +xy:eat:I -0.19295226006843871 +xxy:PRN_VBD:O -0.19295226006843871 +xy:eat:O -0.19295226006843871 +xxy:VBD_DT:B -0.19295226006843877 +xy:some:B -0.19295226006843877 +xxy:VBD_DT:I 0.38590452013687798 +xy:some:I 0.38590452013687798 +xxy:VBD_DT:O -0.19295226006843877 +xy:some:O -0.19295226006843877 +xxy:DT_NN:B -0.19295226006843877 +xy:pie:B -0.19295226006843877 +xxy:DT_NN:I 0.38590452013687798 +xy:pie:I 0.38590452013687798 +xxy:DT_NN:O -0.19295226006843877 +xy:pie:O -0.19295226006843877 +xxy:NN_.:B -0.16038191506717553 +xxy:NN_.:I -0.16038191506717553 +xxy:NN_.:O 0.32076383013435134 -- cgit v1.2.3