summaryrefslogtreecommitdiff
path: root/corpus/cut-corpus.pl
diff options
context:
space:
mode:
Diffstat (limited to 'corpus/cut-corpus.pl')
-rwxr-xr-xcorpus/cut-corpus.pl16
1 files changed, 16 insertions, 0 deletions
diff --git a/corpus/cut-corpus.pl b/corpus/cut-corpus.pl
new file mode 100755
index 00000000..fc9cce3b
--- /dev/null
+++ b/corpus/cut-corpus.pl
@@ -0,0 +1,16 @@
+#!/usr/bin/perl -w
+use strict;
+die "Usage: $0 N\nSplits a corpus separated by ||| symbols and returns the Nth field\n" unless scalar @ARGV > 0;
+
+my $x = shift @ARGV;
+die "N must be numeric" unless $x =~ /^\d+$/;
+$x--;
+
+while(<>) {
+ chomp;
+ my @fields = split / \|\|\| /;
+ my $y = $fields[$x];
+ if (!defined $y) { $y= ''; }
+ print "$y\n";
+}
+