blob: 893c7b22e624e8ed247b47a65daa3ead0c033b27 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
|
#!/usr/bin/perl -w
use strict;
die "Usage: $0 classes.txt corpus.txt" unless scalar @ARGV == 2;
my ($class, $text) = @ARGV;
open C, "<$class" or die "Can't read $class: $!";
open T, "<$text" or die "Can't read $text: $!";
my %dict = ();
my $cc = 0;
while(<C>) {
chomp;
my ($word, $cat) = split /\s+/;
die "'$word' '$cat'" unless (defined $word && defined $cat);
$dict{$word} = $cat;
$cc++;
}
close C;
print STDERR "Loaded classes for $cc words\n";
while(<T>) {
chomp;
my @cats = map { $dict{$_} or die "Undefined class for $_"; } split /\s+/;
print "@cats\n";
}
|