summaryrefslogtreecommitdiff
path: root/corpus/cut-corpus.pl
blob: fc9cce3b89ae614bd7c442eac3d777c576108f5c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
#!/usr/bin/perl -w
use strict;
die "Usage: $0 N\nSplits a corpus separated by ||| symbols and returns the Nth field\n" unless scalar @ARGV > 0;

my $x = shift @ARGV;
die "N must be numeric" unless $x =~ /^\d+$/;
$x--;

while(<>) {
  chomp;
  my @fields = split / \|\|\| /;
  my $y = $fields[$x];
  if (!defined $y) { $y= ''; }
  print "$y\n";
}