summaryrefslogtreecommitdiff
path: root/sa-extract/escape-testset.pl
diff options
context:
space:
mode:
authorPatrick Simianer <p@simianer.de>2012-03-13 09:24:47 +0100
committerPatrick Simianer <p@simianer.de>2012-03-13 09:24:47 +0100
commitef6085e558e26c8819f1735425761103021b6470 (patch)
tree5cf70e4c48c64d838e1326b5a505c8c4061bff4a /sa-extract/escape-testset.pl
parent10a232656a0c882b3b955d2bcfac138ce11e8a2e (diff)
parentdfbc278c1057555fda9312291c8024049e00b7d8 (diff)
merge with upstream
Diffstat (limited to 'sa-extract/escape-testset.pl')
-rwxr-xr-xsa-extract/escape-testset.pl35
1 files changed, 35 insertions, 0 deletions
diff --git a/sa-extract/escape-testset.pl b/sa-extract/escape-testset.pl
new file mode 100755
index 00000000..02fd7445
--- /dev/null
+++ b/sa-extract/escape-testset.pl
@@ -0,0 +1,35 @@
+#!/usr/bin/perl -w
+
+use utf8;
+use strict;
+
+binmode(STDIN,":utf8");
+binmode(STDOUT,":utf8");
+
+my @fh = ();
+if (scalar @ARGV == 0) {
+ push @fh, \*STDIN;
+} else {
+ for my $file (@ARGV) {
+ my $f;
+ open $f, "<$file" or die "Can't read $file: $!\n";
+ binmode $f, ":utf8";
+ push @fh, $f;
+ }
+}
+
+my $id = -1;
+for my $f (@fh) {
+ while(<$f>) {
+ chomp;
+ die "Empty line in test set" if /^\s*$/;
+ die "Please remove <seg> tags from input:\n$_" if /^\s*<seg/i;
+ $id++;
+ s/&/\&amp;/g;
+ s/</\&lt;/g;
+ s/>/\&gt;/g;
+ print "<seg id=\"$id\"> $_ </seg>\n";
+ }
+}
+
+