summaryrefslogtreecommitdiff
path: root/extools/test_data
diff options
context:
space:
mode:
authorChris Dyer <cdyer@cs.cmu.edu>2012-08-12 23:33:21 -0400
committerChris Dyer <cdyer@cs.cmu.edu>2012-08-12 23:33:21 -0400
commitda176941c1f481f14e93bd7d055cc29cac0ea8c8 (patch)
treec7ec8c0f75b386e6ca6d37da830e5a2e369b1cca /extools/test_data
parent4760209baa483403db3bcb9bf1a32ae87a7b576d (diff)
use new union api
Diffstat (limited to 'extools/test_data')
-rw-r--r--extools/test_data/README10
-rw-r--r--extools/test_data/corpus.aligned5
-rw-r--r--extools/test_data/corpus.en5
-rw-r--r--extools/test_data/corpus.fr5
-rw-r--r--extools/test_data/corpus.len_cats5
-rw-r--r--extools/test_data/fr-en.al.len5
-rwxr-xr-xextools/test_data/make_len_cats.pl23
7 files changed, 0 insertions, 58 deletions
diff --git a/extools/test_data/README b/extools/test_data/README
deleted file mode 100644
index e368cffc..00000000
--- a/extools/test_data/README
+++ /dev/null
@@ -1,10 +0,0 @@
-The following was used to create the test data. The real inputs
-were corpus.fr, corpus.en, and corpus.aligned. The generated files
-were corpus.len_cats and fr-en.al.len.
-
-
- ./make_len_cats.pl corpus.en > corpus.len_cats
-
- ../merge_lines.pl corpus.fr corpus.en corpus.aligned corpus.len_cats > fr-en.al.len
-
-
diff --git a/extools/test_data/corpus.aligned b/extools/test_data/corpus.aligned
deleted file mode 100644
index aa09e9ab..00000000
--- a/extools/test_data/corpus.aligned
+++ /dev/null
@@ -1,5 +0,0 @@
-0-0 1-2 2-1
-0-0 1-1
-0-0 0-1 1-0 1-1 2-0 2-1 3-2 4-3
-0-0
-0-0 1-1
diff --git a/extools/test_data/corpus.en b/extools/test_data/corpus.en
deleted file mode 100644
index 2d4751bf..00000000
--- a/extools/test_data/corpus.en
+++ /dev/null
@@ -1,5 +0,0 @@
-the blue house
-the hat
-there is a hat
-cap
-the cat
diff --git a/extools/test_data/corpus.fr b/extools/test_data/corpus.fr
deleted file mode 100644
index 75b5e127..00000000
--- a/extools/test_data/corpus.fr
+++ /dev/null
@@ -1,5 +0,0 @@
-la maison bleue
-le chapeau
-il y a un chapeau
-chapeau
-le chat
diff --git a/extools/test_data/corpus.len_cats b/extools/test_data/corpus.len_cats
deleted file mode 100644
index 18d321de..00000000
--- a/extools/test_data/corpus.len_cats
+++ /dev/null
@@ -1,5 +0,0 @@
-0-1:SHORT 0-2:SHORT 0-3:MID 1-2:SHORT 1-3:SHORT 2-3:SHORT
-0-1:SHORT 0-2:SHORT 1-2:SHORT
-0-1:SHORT 0-2:SHORT 0-3:MID 0-4:MID 1-2:SHORT 1-3:SHORT 1-4:MID 2-3:SHORT 2-4:SHORT 3-4:SHORT
-0-1:SHORT
-0-1:SHORT 0-2:SHORT 1-2:SHORT
diff --git a/extools/test_data/fr-en.al.len b/extools/test_data/fr-en.al.len
deleted file mode 100644
index 7ee6b85d..00000000
--- a/extools/test_data/fr-en.al.len
+++ /dev/null
@@ -1,5 +0,0 @@
-la maison bleue ||| the blue house ||| 0-0 1-2 2-1 ||| 0-1:SHORT 0-2:SHORT 0-3:MID 1-2:SHORT 1-3:SHORT 2-3:SHORT
-le chapeau ||| the hat ||| 0-0 1-1 ||| 0-1:SHORT 0-2:SHORT 1-2:SHORT
-il y a un chapeau ||| there is a hat ||| 0-0 0-1 1-0 1-1 2-0 2-1 3-2 4-3 ||| 0-1:SHORT 0-2:SHORT 0-3:MID 0-4:MID 1-2:SHORT 1-3:SHORT 1-4:MID 2-3:SHORT 2-4:SHORT 3-4:SHORT
-chapeau ||| cap ||| 0-0 ||| 0-1:SHORT
-le chat ||| the cat ||| 0-0 1-1 ||| 0-1:SHORT 0-2:SHORT 1-2:SHORT
diff --git a/extools/test_data/make_len_cats.pl b/extools/test_data/make_len_cats.pl
deleted file mode 100755
index 25ef75fa..00000000
--- a/extools/test_data/make_len_cats.pl
+++ /dev/null
@@ -1,23 +0,0 @@
-#!/usr/bin/perl -w
-use strict;
-
-my $max_len = 15;
-my @cat_names = qw( NULL SHORT SHORT MID MID MID LONG LONG LONG LONG LONG VLONG VLONG VLONG VLONG VLONG );
-
-while(<>) {
- chomp;
- my @words = split /\s+/;
- my $len = scalar @words;
- my @spans;
- for (my $i =0; $i < $len; $i++) {
- for (my $k = 1; $k <= $max_len; $k++) {
- my $j = $i + $k;
- next if ($j > $len);
- my $cat = $cat_names[$k];
- die unless $cat;
- push @spans, "$i-$j:$cat";
- }
- }
- print "@spans\n";
-}
-