From b66e838ed52decc0be1eb5817b2a77c3840db2c5 Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Tue, 3 Jun 2014 16:58:29 -0400 Subject: fix for nonjoining chars --- corpus/support/quote-norm.pl | 1 + 1 file changed, 1 insertion(+) (limited to 'corpus') diff --git a/corpus/support/quote-norm.pl b/corpus/support/quote-norm.pl index 0366fad5..3eee0666 100755 --- a/corpus/support/quote-norm.pl +++ b/corpus/support/quote-norm.pl @@ -40,6 +40,7 @@ while() { # Regularlize spaces: s/\x{ad}//g; # soft hyphen + s/\x{200C}//g; # zero-width non-joiner s/\x{a0}/ /g; # non-breaking space s/\x{2009}/ /g; # thin space s/\x{2028}/ /g; # "line separator" -- cgit v1.2.3