summaryrefslogtreecommitdiff
path: root/corpus/add-self-translations.pl
blob: 153bc4546bada15ce7a6119af3e66de28cbdff75 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
#!/usr/bin/perl -w
use strict;

# ADDS SELF-TRANSLATIONS OF POORLY ATTESTED WORDS TO THE PARALLEL DATA

my %df;
my %def;
while(<>) {
  print;
  chomp;
  my ($sf, $se) = split / \|\|\| /;
  die "Format error: $_\n" unless defined $sf && defined $se;
  my @fs = split /\s+/, $sf;
  my @es = split /\s+/, $se;
  for my $f (@fs) {
    $df{$f}++;
    for my $e (@es) {
      if ($f eq $e) { $def{$f}++; }
    }
  }
}

for my $k (sort keys %def) {
  next if $df{$k} > 4;
  print "$k ||| $k\n";
  print "$k ||| $k\n";
  print "$k ||| $k\n";
}