From e26434979adc33bd949566ba7bf02dff64e80a3e Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Tue, 2 Oct 2012 00:19:43 -0400 Subject: cdec cleanup, remove bayesian stuff, parsing stuff --- gi/pipeline/scripts/remove-tags-from-contexts.pl | 53 ------------------------ 1 file changed, 53 deletions(-) delete mode 100755 gi/pipeline/scripts/remove-tags-from-contexts.pl (limited to 'gi/pipeline/scripts/remove-tags-from-contexts.pl') diff --git a/gi/pipeline/scripts/remove-tags-from-contexts.pl b/gi/pipeline/scripts/remove-tags-from-contexts.pl deleted file mode 100755 index 20698816..00000000 --- a/gi/pipeline/scripts/remove-tags-from-contexts.pl +++ /dev/null @@ -1,53 +0,0 @@ -#!/usr/bin/perl -w -use strict; - -use Getopt::Long "GetOptions"; - -my $PHRASE = 'tok'; -my $CONTEXT = 'tag'; - -die "Usage: $0 [--phrase=tok|tag] [--context=tok|tag] < corpus" - unless &GetOptions('phrase=s' => \$PHRASE, 'context=s' => \$CONTEXT); - -my $lno = 0; -while(my $line = <>) { - $lno++; - chomp $line; - my @top = split /\t/, $line; - die unless (scalar @top == 2); - - my @pwords = split /\s+/, $top[0]; - foreach my $token (@pwords) { - #print $token . "\n"; - my @parts = split /_(?!.*_)/, $token; - die unless (scalar @parts == 2); - if ($PHRASE eq "tok") { - $token = $parts[0] - } elsif ($PHRASE eq "tag") { - $token = $parts[1] - } - } - - my @fields = split / \|\|\| /, $top[1]; - foreach my $i (0..((scalar @fields) / 2 - 1)) { - #print $i . ": " . $fields[2*$i] . " of " . (scalar @fields) . "\n"; - my @cwords = split /\s+/, $fields[2*$i]; - foreach my $token (@cwords) { - #print $i . ": " . $token . "\n"; - my @parts = split /_(?!.*_)/, $token; - if (scalar @parts == 2) { - if ($CONTEXT eq "tok") { - $token = $parts[0] - } elsif ($CONTEXT eq "tag") { - $token = $parts[1] - } - } - } - $fields[2*$i] = join ' ', @cwords; - } - - print join ' ', @pwords; - print "\t"; - print join ' ||| ', @fields; - print "\n"; -} -- cgit v1.2.3