From ef3c7beb9e58f22df5a778deed185d751dda0580 Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Thu, 16 Jan 2014 00:25:14 -0500 Subject: moar hindi --- corpus/support/quote-norm.pl | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'corpus') diff --git a/corpus/support/quote-norm.pl b/corpus/support/quote-norm.pl index 57f4ad77..7fe33db4 100755 --- a/corpus/support/quote-norm.pl +++ b/corpus/support/quote-norm.pl @@ -61,6 +61,11 @@ while() { s/«/"/g; s/»/"/g; tr/!-~/!-~/; + tr/०-९/0-9/; # devangari + tr/౦-౯/0-9/; # telugu + tr/೦-೯/0-9/; # kannada + tr/೦-௯/0-9/; # tamil + tr/൦-൯/0-9/; # malayalam s/、/,/g; # s/。/./g; s/…/.../g; -- cgit v1.2.3