summaryrefslogtreecommitdiff
path: root/sa-extract/escape-testset.pl
diff options
context:
space:
mode:
authorChris Dyer <cdyer@cs.cmu.edu>2012-07-27 17:31:00 -0400
committerChris Dyer <cdyer@cs.cmu.edu>2012-07-27 17:31:00 -0400
commitb317e0efd2398d75d70e027bb1e2cf442e683981 (patch)
treeec34aff0ce4e8fb9704d1cd2b7abf00cb9a25b9a /sa-extract/escape-testset.pl
parentb2a8bccb2bd713d9ec081cf3dad0162c2cb492d8 (diff)
remove old suffix array extractor (use the one in python/ instead)
Diffstat (limited to 'sa-extract/escape-testset.pl')
-rwxr-xr-xsa-extract/escape-testset.pl35
1 files changed, 0 insertions, 35 deletions
diff --git a/sa-extract/escape-testset.pl b/sa-extract/escape-testset.pl
deleted file mode 100755
index 02fd7445..00000000
--- a/sa-extract/escape-testset.pl
+++ /dev/null
@@ -1,35 +0,0 @@
-#!/usr/bin/perl -w
-
-use utf8;
-use strict;
-
-binmode(STDIN,":utf8");
-binmode(STDOUT,":utf8");
-
-my @fh = ();
-if (scalar @ARGV == 0) {
- push @fh, \*STDIN;
-} else {
- for my $file (@ARGV) {
- my $f;
- open $f, "<$file" or die "Can't read $file: $!\n";
- binmode $f, ":utf8";
- push @fh, $f;
- }
-}
-
-my $id = -1;
-for my $f (@fh) {
- while(<$f>) {
- chomp;
- die "Empty line in test set" if /^\s*$/;
- die "Please remove <seg> tags from input:\n$_" if /^\s*<seg/i;
- $id++;
- s/&/\&amp;/g;
- s/</\&lt;/g;
- s/>/\&gt;/g;
- print "<seg id=\"$id\"> $_ </seg>\n";
- }
-}
-
-