summaryrefslogtreecommitdiff
path: root/word-aligner/classify.pl
blob: 893c7b22e624e8ed247b47a65daa3ead0c033b27 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
#!/usr/bin/perl -w
use strict;

die "Usage: $0 classes.txt corpus.txt" unless scalar @ARGV == 2;

my ($class, $text) = @ARGV;
open C, "<$class" or die "Can't read $class: $!";
open T, "<$text" or die "Can't read $text: $!";

my %dict = ();
my $cc = 0;
while(<C>) {
  chomp;
  my ($word, $cat) = split /\s+/;
  die "'$word' '$cat'" unless (defined $word && defined $cat);
  $dict{$word} = $cat;
  $cc++;
}
close C;
print STDERR "Loaded classes for $cc words\n";

while(<T>) {
  chomp;
  my @cats = map { $dict{$_} or die "Undefined class for $_"; } split /\s+/;
  print "@cats\n";
}