summaryrefslogtreecommitdiff
path: root/corpus/support/fix-eos.pl
blob: fe03727b2cfd96c5a210e416fb26ca24a8ae2e91 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
#!/usr/bin/perl -w
$|++;

use strict;
use utf8;

binmode(STDIN, ":utf8");
binmode(STDOUT, ":utf8");
while(<STDIN>) {
  s/(\p{Devanagari}{2}[A-Za-z0-9! ,.\@\p{Devanagari}]+?)\s+(\.)(\s*$|\s+\|\|\|)/$1 \x{0964}$3/s;
  print;
}