summaryrefslogtreecommitdiff
path: root/gi/pipeline/scripts/remove-tags-from-contexts.pl
diff options
context:
space:
mode:
Diffstat (limited to 'gi/pipeline/scripts/remove-tags-from-contexts.pl')
-rwxr-xr-xgi/pipeline/scripts/remove-tags-from-contexts.pl53
1 files changed, 0 insertions, 53 deletions
diff --git a/gi/pipeline/scripts/remove-tags-from-contexts.pl b/gi/pipeline/scripts/remove-tags-from-contexts.pl
deleted file mode 100755
index 20698816..00000000
--- a/gi/pipeline/scripts/remove-tags-from-contexts.pl
+++ /dev/null
@@ -1,53 +0,0 @@
-#!/usr/bin/perl -w
-use strict;
-
-use Getopt::Long "GetOptions";
-
-my $PHRASE = 'tok';
-my $CONTEXT = 'tag';
-
-die "Usage: $0 [--phrase=tok|tag] [--context=tok|tag] < corpus"
- unless &GetOptions('phrase=s' => \$PHRASE, 'context=s' => \$CONTEXT);
-
-my $lno = 0;
-while(my $line = <>) {
- $lno++;
- chomp $line;
- my @top = split /\t/, $line;
- die unless (scalar @top == 2);
-
- my @pwords = split /\s+/, $top[0];
- foreach my $token (@pwords) {
- #print $token . "\n";
- my @parts = split /_(?!.*_)/, $token;
- die unless (scalar @parts == 2);
- if ($PHRASE eq "tok") {
- $token = $parts[0]
- } elsif ($PHRASE eq "tag") {
- $token = $parts[1]
- }
- }
-
- my @fields = split / \|\|\| /, $top[1];
- foreach my $i (0..((scalar @fields) / 2 - 1)) {
- #print $i . ": " . $fields[2*$i] . " of " . (scalar @fields) . "\n";
- my @cwords = split /\s+/, $fields[2*$i];
- foreach my $token (@cwords) {
- #print $i . ": " . $token . "\n";
- my @parts = split /_(?!.*_)/, $token;
- if (scalar @parts == 2) {
- if ($CONTEXT eq "tok") {
- $token = $parts[0]
- } elsif ($CONTEXT eq "tag") {
- $token = $parts[1]
- }
- }
- }
- $fields[2*$i] = join ' ', @cwords;
- }
-
- print join ' ', @pwords;
- print "\t";
- print join ' ||| ', @fields;
- print "\n";
-}