diff options
Diffstat (limited to 'extools/test_data')
-rw-r--r-- | extools/test_data/README | 10 | ||||
-rw-r--r-- | extools/test_data/corpus.aligned | 5 | ||||
-rw-r--r-- | extools/test_data/corpus.en | 5 | ||||
-rw-r--r-- | extools/test_data/corpus.fr | 5 | ||||
-rw-r--r-- | extools/test_data/corpus.len_cats | 5 | ||||
-rw-r--r-- | extools/test_data/fr-en.al.len | 5 | ||||
-rwxr-xr-x | extools/test_data/make_len_cats.pl | 23 |
7 files changed, 0 insertions, 58 deletions
diff --git a/extools/test_data/README b/extools/test_data/README deleted file mode 100644 index e368cffc..00000000 --- a/extools/test_data/README +++ /dev/null @@ -1,10 +0,0 @@ -The following was used to create the test data. The real inputs -were corpus.fr, corpus.en, and corpus.aligned. The generated files -were corpus.len_cats and fr-en.al.len. - - - ./make_len_cats.pl corpus.en > corpus.len_cats - - ../merge_lines.pl corpus.fr corpus.en corpus.aligned corpus.len_cats > fr-en.al.len - - diff --git a/extools/test_data/corpus.aligned b/extools/test_data/corpus.aligned deleted file mode 100644 index aa09e9ab..00000000 --- a/extools/test_data/corpus.aligned +++ /dev/null @@ -1,5 +0,0 @@ -0-0 1-2 2-1 -0-0 1-1 -0-0 0-1 1-0 1-1 2-0 2-1 3-2 4-3 -0-0 -0-0 1-1 diff --git a/extools/test_data/corpus.en b/extools/test_data/corpus.en deleted file mode 100644 index 2d4751bf..00000000 --- a/extools/test_data/corpus.en +++ /dev/null @@ -1,5 +0,0 @@ -the blue house -the hat -there is a hat -cap -the cat diff --git a/extools/test_data/corpus.fr b/extools/test_data/corpus.fr deleted file mode 100644 index 75b5e127..00000000 --- a/extools/test_data/corpus.fr +++ /dev/null @@ -1,5 +0,0 @@ -la maison bleue -le chapeau -il y a un chapeau -chapeau -le chat diff --git a/extools/test_data/corpus.len_cats b/extools/test_data/corpus.len_cats deleted file mode 100644 index 18d321de..00000000 --- a/extools/test_data/corpus.len_cats +++ /dev/null @@ -1,5 +0,0 @@ -0-1:SHORT 0-2:SHORT 0-3:MID 1-2:SHORT 1-3:SHORT 2-3:SHORT -0-1:SHORT 0-2:SHORT 1-2:SHORT -0-1:SHORT 0-2:SHORT 0-3:MID 0-4:MID 1-2:SHORT 1-3:SHORT 1-4:MID 2-3:SHORT 2-4:SHORT 3-4:SHORT -0-1:SHORT -0-1:SHORT 0-2:SHORT 1-2:SHORT diff --git a/extools/test_data/fr-en.al.len b/extools/test_data/fr-en.al.len deleted file mode 100644 index 7ee6b85d..00000000 --- a/extools/test_data/fr-en.al.len +++ /dev/null @@ -1,5 +0,0 @@ -la maison bleue ||| the blue house ||| 0-0 1-2 2-1 ||| 0-1:SHORT 0-2:SHORT 0-3:MID 1-2:SHORT 1-3:SHORT 2-3:SHORT -le chapeau ||| the hat ||| 0-0 1-1 ||| 0-1:SHORT 0-2:SHORT 1-2:SHORT -il y a un chapeau ||| there is a hat ||| 0-0 0-1 1-0 1-1 2-0 2-1 3-2 4-3 ||| 0-1:SHORT 0-2:SHORT 0-3:MID 0-4:MID 1-2:SHORT 1-3:SHORT 1-4:MID 2-3:SHORT 2-4:SHORT 3-4:SHORT -chapeau ||| cap ||| 0-0 ||| 0-1:SHORT -le chat ||| the cat ||| 0-0 1-1 ||| 0-1:SHORT 0-2:SHORT 1-2:SHORT diff --git a/extools/test_data/make_len_cats.pl b/extools/test_data/make_len_cats.pl deleted file mode 100755 index 25ef75fa..00000000 --- a/extools/test_data/make_len_cats.pl +++ /dev/null @@ -1,23 +0,0 @@ -#!/usr/bin/perl -w -use strict; - -my $max_len = 15; -my @cat_names = qw( NULL SHORT SHORT MID MID MID LONG LONG LONG LONG LONG VLONG VLONG VLONG VLONG VLONG ); - -while(<>) { - chomp; - my @words = split /\s+/; - my $len = scalar @words; - my @spans; - for (my $i =0; $i < $len; $i++) { - for (my $k = 1; $k <= $max_len; $k++) { - my $j = $i + $k; - next if ($j > $len); - my $cat = $cat_names[$k]; - die unless $cat; - push @spans, "$i-$j:$cat"; - } - } - print "@spans\n"; -} - |