summaryrefslogtreecommitdiff
path: root/corpus/support/fix-eos.pl
blob: 584f8b4619d7eaa6a84a1632fcb6f3cc7b6a5dc4 (plain)
1
2
3
4
5
6
7
8
9
10
#!/usr/bin/perl -w
use strict;
use utf8;

binmode(STDIN, ":utf8");
binmode(STDOUT, ":utf8");
while(<STDIN>) {
  s/(\p{Devanagari}{2}[A-Za-z0-9! ,.\@\p{Devanagari}]+?)\s+(\.)(\s*$|\s+\|\|\|)/$1 \x{0964}$3/s;
  print;
}