summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChris Dyer <cdyer@allegro.clab.cs.cmu.edu>2014-11-28 14:06:33 -0500
committerChris Dyer <cdyer@allegro.clab.cs.cmu.edu>2014-11-28 14:06:33 -0500
commit2d684c9e3f5478d1aec7374824f94a5d6a761d07 (patch)
treee7d336d41a208a3e86dfe64ac89ab82373991778
parent326ea26db4680219f801eeb6622dd2ee378e974b (diff)
conll example in cdec
-rw-r--r--tests/system_tests/conll/README8
-rw-r--r--tests/system_tests/conll/cdec.ini13
-rw-r--r--tests/system_tests/conll/gold.statistics20
-rw-r--r--tests/system_tests/conll/gold.stdout0
-rw-r--r--tests/system_tests/conll/input.conll13
-rw-r--r--tests/system_tests/conll/input.txt2
-rw-r--r--tests/system_tests/conll/tagset.txt1
-rw-r--r--tests/system_tests/conll/weights64
8 files changed, 121 insertions, 0 deletions
diff --git a/tests/system_tests/conll/README b/tests/system_tests/conll/README
new file mode 100644
index 00000000..261e6a05
--- /dev/null
+++ b/tests/system_tests/conll/README
@@ -0,0 +1,8 @@
+To generate the input file, run:
+
+ ~/cdec/corpus/conll2cdec.pl input.conll > input.txt
+
+This will create a training corpus (i.e., an input is present as well as
+gold standard output is present) in input.txt.
+
+See cdec.ini for examples of how to include features in the model.
diff --git a/tests/system_tests/conll/cdec.ini b/tests/system_tests/conll/cdec.ini
new file mode 100644
index 00000000..f214857a
--- /dev/null
+++ b/tests/system_tests/conll/cdec.ini
@@ -0,0 +1,13 @@
+formalism=tagger
+tagger_tagset=tagset.txt
+
+# grab the second feature column from the conll input (-w 2) and
+# create a feature of i-1,i-2 conjoined with y_i
+feature_function=CoNLLFeatures -w 2 -t xxy:%x[-1]_%x[0]:%y[0]
+
+# grab the second feature column from the conll input (-w 2) and
+# create a feature of i-1,i-2 conjoined with y_i
+feature_function=CoNLLFeatures -w 1 -t xy:%x[0]:%y[0]
+
+intersection_strategy=full
+
diff --git a/tests/system_tests/conll/gold.statistics b/tests/system_tests/conll/gold.statistics
new file mode 100644
index 00000000..17366689
--- /dev/null
+++ b/tests/system_tests/conll/gold.statistics
@@ -0,0 +1,20 @@
+-lm_nodes 12
+-lm_edges 24
+-lm_paths 729
++lm_nodes 12
++lm_edges 24
++lm_paths 729
++lm_trans O O O B I O
+constr_nodes 12
+constr_edges 12
+constr_paths 1
+-lm_nodes 10
+-lm_edges 20
+-lm_paths 243
++lm_nodes 10
++lm_edges 20
++lm_paths 243
++lm_trans O B I I O
+constr_nodes 10
+constr_edges 10
+constr_paths 1
diff --git a/tests/system_tests/conll/gold.stdout b/tests/system_tests/conll/gold.stdout
new file mode 100644
index 00000000..e69de29b
--- /dev/null
+++ b/tests/system_tests/conll/gold.stdout
diff --git a/tests/system_tests/conll/input.conll b/tests/system_tests/conll/input.conll
new file mode 100644
index 00000000..507510ca
--- /dev/null
+++ b/tests/system_tests/conll/input.conll
@@ -0,0 +1,13 @@
+the the DT O
+angry angry JJ O
+dog dog NN O
+bit bite VBD B
+me I PRN I
+. . . O
+
+i i PRN O
+ate eat VBD B
+some some DT I
+pie pie NN I
+. . . O
+
diff --git a/tests/system_tests/conll/input.txt b/tests/system_tests/conll/input.txt
new file mode 100644
index 00000000..6a1a0230
--- /dev/null
+++ b/tests/system_tests/conll/input.txt
@@ -0,0 +1,2 @@
+<seg id="0" feat1="the angry dog bite I ." feat2="DT JJ NN VBD PRN ."> the angry dog bit me . ||| O O O B I O </seg>
+<seg id="1" feat1="i eat some pie ." feat2="PRN VBD DT NN ."> i ate some pie . ||| O B I I O </seg>
diff --git a/tests/system_tests/conll/tagset.txt b/tests/system_tests/conll/tagset.txt
new file mode 100644
index 00000000..bd0e6b60
--- /dev/null
+++ b/tests/system_tests/conll/tagset.txt
@@ -0,0 +1 @@
+B I O
diff --git a/tests/system_tests/conll/weights b/tests/system_tests/conll/weights
new file mode 100644
index 00000000..de130cb6
--- /dev/null
+++ b/tests/system_tests/conll/weights
@@ -0,0 +1,64 @@
+# Objective = 7.63544 (eval count=5)
+xxy:<s>_DT:B -0.19295226006843877
+xy:the:B -0.19295226006843877
+xxy:<s>_DT:I -0.19295226006843877
+xy:the:I -0.19295226006843877
+xxy:<s>_DT:O 0.38590452013687793
+xy:the:O 0.38590452013687793
+xxy:DT_JJ:B -0.19295226006843877
+xy:angry:B -0.19295226006843877
+xxy:DT_JJ:I -0.19295226006843877
+xy:angry:I -0.19295226006843877
+xxy:DT_JJ:O 0.38590452013687793
+xy:angry:O 0.38590452013687793
+xxy:JJ_NN:B -0.19295226006843885
+xy:dog:B -0.19295226006843885
+xxy:JJ_NN:I -0.19295226006843885
+xy:dog:I -0.19295226006843885
+xxy:JJ_NN:O 0.38590452013687765
+xy:dog:O 0.38590452013687765
+xxy:NN_VBD:B 0.38590452013687765
+xy:bite:B 0.38590452013687765
+xxy:NN_VBD:I -0.19295226006843885
+xy:bite:I -0.19295226006843885
+xxy:NN_VBD:O -0.19295226006843885
+xy:bite:O -0.19295226006843885
+xxy:VBD_PRN:B -0.19295226006843885
+xy:I:B -0.19295226006843885
+xxy:VBD_PRN:I 0.38590452013687765
+xy:I:I 0.38590452013687765
+xxy:VBD_PRN:O -0.19295226006843885
+xy:I:O -0.19295226006843885
+xxy:PRN_.:B -0.16038191506717553
+xy:.:B -0.32076383013435106
+xxy:PRN_.:I -0.16038191506717553
+xy:.:I -0.32076383013435106
+xxy:PRN_.:O 0.32076383013435134
+xy:.:O 0.64152766026870267
+xxy:<s>_PRN:B -0.19295226006843871
+xy:i:B -0.19295226006843871
+xxy:<s>_PRN:I -0.19295226006843871
+xy:i:I -0.19295226006843871
+xxy:<s>_PRN:O 0.38590452013687804
+xy:i:O 0.38590452013687804
+xxy:PRN_VBD:B 0.38590452013687804
+xy:eat:B 0.38590452013687804
+xxy:PRN_VBD:I -0.19295226006843871
+xy:eat:I -0.19295226006843871
+xxy:PRN_VBD:O -0.19295226006843871
+xy:eat:O -0.19295226006843871
+xxy:VBD_DT:B -0.19295226006843877
+xy:some:B -0.19295226006843877
+xxy:VBD_DT:I 0.38590452013687798
+xy:some:I 0.38590452013687798
+xxy:VBD_DT:O -0.19295226006843877
+xy:some:O -0.19295226006843877
+xxy:DT_NN:B -0.19295226006843877
+xy:pie:B -0.19295226006843877
+xxy:DT_NN:I 0.38590452013687798
+xy:pie:I 0.38590452013687798
+xxy:DT_NN:O -0.19295226006843877
+xy:pie:O -0.19295226006843877
+xxy:NN_.:B -0.16038191506717553
+xxy:NN_.:I -0.16038191506717553
+xxy:NN_.:O 0.32076383013435134