summaryrefslogtreecommitdiff
path: root/corpus/support/quote-norm.pl
diff options
context:
space:
mode:
authorPatrick Simianer <p@simianer.de>2014-06-12 13:56:42 +0200
committerPatrick Simianer <p@simianer.de>2014-06-12 13:56:42 +0200
commita39aa79b18347e22ef36ebc0da5a7eb220bcb23f (patch)
tree2c0f3009f8e381002bfeb82c0ea3bd0c41125761 /corpus/support/quote-norm.pl
parent62bd9a4bdcea606d6ff2031fa4b207ef20caac31 (diff)
parent0e2f8d3d049f06afb08b4639c6a28aa5461cdc78 (diff)
Merge remote-tracking branch 'upstream/master'
Diffstat (limited to 'corpus/support/quote-norm.pl')
-rwxr-xr-xcorpus/support/quote-norm.pl1
1 files changed, 1 insertions, 0 deletions
diff --git a/corpus/support/quote-norm.pl b/corpus/support/quote-norm.pl
index 0366fad5..3eee0666 100755
--- a/corpus/support/quote-norm.pl
+++ b/corpus/support/quote-norm.pl
@@ -40,6 +40,7 @@ while(<STDIN>) {
# Regularlize spaces:
s/\x{ad}//g; # soft hyphen
+ s/\x{200C}//g; # zero-width non-joiner
s/\x{a0}/ /g; # non-breaking space
s/\x{2009}/ /g; # thin space
s/\x{2028}/ /g; # "line separator"