From 3a2fc36378337147a956e439db31baf91bfb95c8 Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Fri, 3 Feb 2012 18:03:49 -0500 Subject: escaping tool for grammar extractor --- sa-extract/escape-testset.pl | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) create mode 100755 sa-extract/escape-testset.pl (limited to 'sa-extract/escape-testset.pl') diff --git a/sa-extract/escape-testset.pl b/sa-extract/escape-testset.pl new file mode 100755 index 00000000..02fd7445 --- /dev/null +++ b/sa-extract/escape-testset.pl @@ -0,0 +1,35 @@ +#!/usr/bin/perl -w + +use utf8; +use strict; + +binmode(STDIN,":utf8"); +binmode(STDOUT,":utf8"); + +my @fh = (); +if (scalar @ARGV == 0) { + push @fh, \*STDIN; +} else { + for my $file (@ARGV) { + my $f; + open $f, "<$file" or die "Can't read $file: $!\n"; + binmode $f, ":utf8"; + push @fh, $f; + } +} + +my $id = -1; +for my $f (@fh) { + while(<$f>) { + chomp; + die "Empty line in test set" if /^\s*$/; + die "Please remove tags from input:\n$_" if /^\s*/\>/g; + print " $_ \n"; + } +} + + -- cgit v1.2.3