diff options
author | Avneesh Saluja <asaluja@gmail.com> | 2013-03-28 18:28:16 -0700 |
---|---|---|
committer | Avneesh Saluja <asaluja@gmail.com> | 2013-03-28 18:28:16 -0700 |
commit | 3d8d656fa7911524e0e6885647173474524e0784 (patch) | |
tree | 81b1ee2fcb67980376d03f0aa48e42e53abff222 /corpus/cut-corpus.pl | |
parent | be7f57fdd484e063775d7abf083b9fa4c403b610 (diff) | |
parent | 96fedabebafe7a38a6d5928be8fff767e411d705 (diff) |
fixed conflicts
Diffstat (limited to 'corpus/cut-corpus.pl')
-rwxr-xr-x | corpus/cut-corpus.pl | 35 |
1 files changed, 35 insertions, 0 deletions
diff --git a/corpus/cut-corpus.pl b/corpus/cut-corpus.pl new file mode 100755 index 00000000..0af3b23c --- /dev/null +++ b/corpus/cut-corpus.pl @@ -0,0 +1,35 @@ +#!/usr/bin/perl -w +use strict; +die "Usage: $0 N\nSplits a corpus separated by ||| symbols and returns the Nth field\n" unless scalar @ARGV > 0; + +my $x = shift @ARGV; +my @ind = split /,/, $x; +my @o = (); +for my $ff (@ind) { + if ($ff =~ /^\d+$/) { + push @o, $ff - 1; + } elsif ($ff =~ /^(\d+)-(\d+)$/) { + my $a = $1; + my $b = $2; + die "$a-$b is a bad range in input: $x\n" unless $b > $a; + for (my $i=$a; $i <= $b; $i++) { + push @o, $i - 1; + } + } else { + die "Bad input: $x\n"; + } +} + +while(<>) { + chomp; + my @fields = split /\s*\|\|\|\s*/; + my @sf; + for my $i (@o) { + my $y = $fields[$i]; + if (!defined $y) { $y= ''; } + push @sf, $y; + } + print join(' ||| ', @sf) . "\n"; +} + + |