#!/usr/bin/perl -w use strict; use utf8; binmode(STDOUT, ":utf8"); while(<>) { chomp; my @words = split /\s+/; for my $w (@words) { $_ = $w; if ($w =~ /^__NTK__/o) { s/__NTK__//go; next if /^$/; print STDOUT "$_ "; next; } s/tR/\x{0679}/g; # retroflex t s/dR/\x{0688}/g; # retroflex d s/rR/\x{0691}/g; # retroflex r s/p/\x{067E}/g; # peh s/c/\x{0686}/g; # tcheh s/g/\x{06AF}/g; # geh (G=ghain) s/@/\x{06BE}/g; # heh doachashmee s/h'/\x{06c2}/g; # heh goal + hamza s/h/\x{06c1}/g; # heh goal s/J/\x{0698}/g; # zheh (rare, usually persian loan words) s/k/\x{06A9}/g; # k s/Y'/\x{06d3}/g; # yeh barree + hamza above (ligature) s/y/\x{06cc}/g; # same as ya' in arabic s/Y/\x{06d2}/g; # yeh barree s/N/\x{06BA}/g; # Ghunna s/\'/\x{0621}/g; s/\|/\x{0622}/g; s/\>/\x{0623}/g; s/\&/\x{0624}/g; s/\