diff options
author | Kenneth Heafield <github@kheafield.com> | 2012-08-03 07:46:54 -0400 |
---|---|---|
committer | Kenneth Heafield <github@kheafield.com> | 2012-08-03 07:46:54 -0400 |
commit | 122f46c31102b683eaab3ad81a3a98accbc694bb (patch) | |
tree | 8d499d789b159ebed25bb23b6983813d064a6296 /sa-extract/escape-testset.pl | |
parent | ac664bdb0e481539cf77098a7dd0e1ec8d937ba0 (diff) | |
parent | 193d137056c3c4f73d66f8db84691d63307de894 (diff) |
Merge branch 'master' of github.com:redpony/cdec
Diffstat (limited to 'sa-extract/escape-testset.pl')
-rwxr-xr-x | sa-extract/escape-testset.pl | 35 |
1 files changed, 0 insertions, 35 deletions
diff --git a/sa-extract/escape-testset.pl b/sa-extract/escape-testset.pl deleted file mode 100755 index 02fd7445..00000000 --- a/sa-extract/escape-testset.pl +++ /dev/null @@ -1,35 +0,0 @@ -#!/usr/bin/perl -w - -use utf8; -use strict; - -binmode(STDIN,":utf8"); -binmode(STDOUT,":utf8"); - -my @fh = (); -if (scalar @ARGV == 0) { - push @fh, \*STDIN; -} else { - for my $file (@ARGV) { - my $f; - open $f, "<$file" or die "Can't read $file: $!\n"; - binmode $f, ":utf8"; - push @fh, $f; - } -} - -my $id = -1; -for my $f (@fh) { - while(<$f>) { - chomp; - die "Empty line in test set" if /^\s*$/; - die "Please remove <seg> tags from input:\n$_" if /^\s*<seg/i; - $id++; - s/&/\&/g; - s/</\</g; - s/>/\>/g; - print "<seg id=\"$id\"> $_ </seg>\n"; - } -} - - |