summaryrefslogtreecommitdiff
path: root/sa-extract/escape-testset.pl
diff options
context:
space:
mode:
authorKenneth Heafield <github@kheafield.com>2012-08-03 07:46:54 -0400
committerKenneth Heafield <github@kheafield.com>2012-08-03 07:46:54 -0400
commit122f46c31102b683eaab3ad81a3a98accbc694bb (patch)
tree8d499d789b159ebed25bb23b6983813d064a6296 /sa-extract/escape-testset.pl
parentac664bdb0e481539cf77098a7dd0e1ec8d937ba0 (diff)
parent193d137056c3c4f73d66f8db84691d63307de894 (diff)
Merge branch 'master' of github.com:redpony/cdec
Diffstat (limited to 'sa-extract/escape-testset.pl')
-rwxr-xr-xsa-extract/escape-testset.pl35
1 files changed, 0 insertions, 35 deletions
diff --git a/sa-extract/escape-testset.pl b/sa-extract/escape-testset.pl
deleted file mode 100755
index 02fd7445..00000000
--- a/sa-extract/escape-testset.pl
+++ /dev/null
@@ -1,35 +0,0 @@
-#!/usr/bin/perl -w
-
-use utf8;
-use strict;
-
-binmode(STDIN,":utf8");
-binmode(STDOUT,":utf8");
-
-my @fh = ();
-if (scalar @ARGV == 0) {
- push @fh, \*STDIN;
-} else {
- for my $file (@ARGV) {
- my $f;
- open $f, "<$file" or die "Can't read $file: $!\n";
- binmode $f, ":utf8";
- push @fh, $f;
- }
-}
-
-my $id = -1;
-for my $f (@fh) {
- while(<$f>) {
- chomp;
- die "Empty line in test set" if /^\s*$/;
- die "Please remove <seg> tags from input:\n$_" if /^\s*<seg/i;
- $id++;
- s/&/\&amp;/g;
- s/</\&lt;/g;
- s/>/\&gt;/g;
- print "<seg id=\"$id\"> $_ </seg>\n";
- }
-}
-
-