blob: 153bc4546bada15ce7a6119af3e66de28cbdff75 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
|
#!/usr/bin/perl -w
use strict;
# ADDS SELF-TRANSLATIONS OF POORLY ATTESTED WORDS TO THE PARALLEL DATA
my %df;
my %def;
while(<>) {
print;
chomp;
my ($sf, $se) = split / \|\|\| /;
die "Format error: $_\n" unless defined $sf && defined $se;
my @fs = split /\s+/, $sf;
my @es = split /\s+/, $se;
for my $f (@fs) {
$df{$f}++;
for my $e (@es) {
if ($f eq $e) { $def{$f}++; }
}
}
}
for my $k (sort keys %def) {
next if $df{$k} > 4;
print "$k ||| $k\n";
print "$k ||| $k\n";
print "$k ||| $k\n";
}
|