From 552793bbd50f634ea755b84d47ddcc6cd4f158f2 Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Tue, 6 Nov 2012 00:02:38 -0500 Subject: add lowercase script --- corpus/lowercase.pl | 9 +++++++++ 1 file changed, 9 insertions(+) create mode 100755 corpus/lowercase.pl (limited to 'corpus/lowercase.pl') diff --git a/corpus/lowercase.pl b/corpus/lowercase.pl new file mode 100755 index 00000000..688e493b --- /dev/null +++ b/corpus/lowercase.pl @@ -0,0 +1,9 @@ +#!/usr/bin/perl -w +use strict; +binmode(STDIN,":utf8"); +binmode(STDOUT,":utf8"); +while(<>) { + $_ = lc $_; + print; +} + -- cgit v1.2.3 From 4f452c5bf5cd0ed3cb50d31012f93a50366b3aac Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Sun, 17 Mar 2013 23:26:24 -0400 Subject: fix possible utf8 bug --- corpus/lowercase.pl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'corpus/lowercase.pl') diff --git a/corpus/lowercase.pl b/corpus/lowercase.pl index 688e493b..9fd91dac 100755 --- a/corpus/lowercase.pl +++ b/corpus/lowercase.pl @@ -2,7 +2,7 @@ use strict; binmode(STDIN,":utf8"); binmode(STDOUT,":utf8"); -while(<>) { +while() { $_ = lc $_; print; } -- cgit v1.2.3