summaryrefslogtreecommitdiff
path: root/corpus/support/quote-norm.pl
diff options
context:
space:
mode:
authorChris Dyer <redpony@gmail.com>2014-01-17 04:07:22 -0500
committerChris Dyer <redpony@gmail.com>2014-01-17 04:07:22 -0500
commit1f89eb5d2f72cc52b5aeb6a22a2a1661924cd300 (patch)
tree313195d13f572db5cb2c6aae42f6933a1e3ffe2b /corpus/support/quote-norm.pl
parent8d7312c7ee413c636ded9aaace714fb519485c27 (diff)
parentef3c7beb9e58f22df5a778deed185d751dda0580 (diff)
Merge branch 'master' of https://github.com/redpony/cdec
Diffstat (limited to 'corpus/support/quote-norm.pl')
-rwxr-xr-xcorpus/support/quote-norm.pl5
1 files changed, 5 insertions, 0 deletions
diff --git a/corpus/support/quote-norm.pl b/corpus/support/quote-norm.pl
index 57f4ad77..7fe33db4 100755
--- a/corpus/support/quote-norm.pl
+++ b/corpus/support/quote-norm.pl
@@ -61,6 +61,11 @@ while(<STDIN>) {
s/«/"/g;
s/»/"/g;
tr/!-~/!-~/;
+ tr/०-९/0-9/; # devangari
+ tr/౦-౯/0-9/; # telugu
+ tr/೦-೯/0-9/; # kannada
+ tr/೦-௯/0-9/; # tamil
+ tr/൦-൯/0-9/; # malayalam
s/、/,/g;
# s/。/./g;
s/…/.../g;