diff options
author | Chris Dyer <cdyer@cs.cmu.edu> | 2012-07-27 17:31:00 -0400 |
---|---|---|
committer | Chris Dyer <cdyer@cs.cmu.edu> | 2012-07-27 17:31:00 -0400 |
commit | 733e1b1507d27d4f53055f740e8098f56215ab8f (patch) | |
tree | 002266f993275b9f1e28ae2f1153e1274183be68 /sa-extract/escape-testset.pl | |
parent | 8fdc3681fb7551e7faeff9f720102cdd417ba077 (diff) |
remove old suffix array extractor (use the one in python/ instead)
Diffstat (limited to 'sa-extract/escape-testset.pl')
-rwxr-xr-x | sa-extract/escape-testset.pl | 35 |
1 files changed, 0 insertions, 35 deletions
diff --git a/sa-extract/escape-testset.pl b/sa-extract/escape-testset.pl deleted file mode 100755 index 02fd7445..00000000 --- a/sa-extract/escape-testset.pl +++ /dev/null @@ -1,35 +0,0 @@ -#!/usr/bin/perl -w - -use utf8; -use strict; - -binmode(STDIN,":utf8"); -binmode(STDOUT,":utf8"); - -my @fh = (); -if (scalar @ARGV == 0) { - push @fh, \*STDIN; -} else { - for my $file (@ARGV) { - my $f; - open $f, "<$file" or die "Can't read $file: $!\n"; - binmode $f, ":utf8"; - push @fh, $f; - } -} - -my $id = -1; -for my $f (@fh) { - while(<$f>) { - chomp; - die "Empty line in test set" if /^\s*$/; - die "Please remove <seg> tags from input:\n$_" if /^\s*<seg/i; - $id++; - s/&/\&/g; - s/</\</g; - s/>/\>/g; - print "<seg id=\"$id\"> $_ </seg>\n"; - } -} - - |