diff options
author | Chris Dyer <cdyer@allegro.clab.cs.cmu.edu> | 2014-06-03 16:58:29 -0400 |
---|---|---|
committer | Chris Dyer <cdyer@allegro.clab.cs.cmu.edu> | 2014-06-03 16:58:29 -0400 |
commit | 0e2f8d3d049f06afb08b4639c6a28aa5461cdc78 (patch) | |
tree | 27d98e9faaf0bbfe24a501f8eecf04149c6cc8e9 /corpus/support | |
parent | 83ed21822dc65004097ab1049fb28c518b90506c (diff) |
fix for nonjoining chars
Diffstat (limited to 'corpus/support')
-rwxr-xr-x | corpus/support/quote-norm.pl | 1 |
1 files changed, 1 insertions, 0 deletions
diff --git a/corpus/support/quote-norm.pl b/corpus/support/quote-norm.pl index 0366fad5..3eee0666 100755 --- a/corpus/support/quote-norm.pl +++ b/corpus/support/quote-norm.pl @@ -40,6 +40,7 @@ while(<STDIN>) { # Regularlize spaces: s/\x{ad}//g; # soft hyphen + s/\x{200C}//g; # zero-width non-joiner s/\x{a0}/ /g; # non-breaking space s/\x{2009}/ /g; # thin space s/\x{2028}/ /g; # "line separator" |