diff options
author | Patrick Simianer <p@simianer.de> | 2016-07-05 11:01:46 +0200 |
---|---|---|
committer | Patrick Simianer <p@simianer.de> | 2016-07-05 11:01:46 +0200 |
commit | 2b1d7f881c19c4d4b5afae194e02d3300c7675d0 (patch) | |
tree | 5a06ee7de98640a39244b57bb369697176b44ebf /normalize_punctuation | |
parent | 69949dda35c3ea21d8e926e5f0a596a0a0f61c6a (diff) |
mv
Diffstat (limited to 'normalize_punctuation')
-rwxr-xr-x | normalize_punctuation | 46 |
1 files changed, 0 insertions, 46 deletions
diff --git a/normalize_punctuation b/normalize_punctuation deleted file mode 100755 index 108de44..0000000 --- a/normalize_punctuation +++ /dev/null @@ -1,46 +0,0 @@ -#!/usr/bin/perl -w -# adapted from the moses scripts - -use strict; - -my ($language) = @ARGV; - -while(<STDIN>) { - s/\r//g; - # normalize unicode punctuation - s/„/\"/g; - s/“/\"/g; - s/”/\"/g; - s/–/-/g; - s/—/ - /g; s/ +/ /g; - s/´/\'/g; - s/([a-z])‘([a-z])/$1\'$2/gi; - s/([a-z])’([a-z])/$1\'$2/gi; - s/‘/\"/g; - s/‚/\"/g; - s/’/\"/g; - s/''/\"/g; - s/´´/\"/g; - s/…/.../g; - # French quotes - s/ « / \"/g; - s/« /\"/g; - s/«/\"/g; - s/ » /\" /g; - s/ »/\"/g; - s/»/\"/g; - # handle pseudo-spaces - s/ \%/\%/g; - s/nº /nº /g; - s/ :/:/g; - s/ ºC/ ºC/g; - s/ cm/ cm/g; - s/ \?/\?/g; - s/ \!/\!/g; - s/ ;/;/g; - s/, /, /g; s/ +/ /g; - - print STDERR $_ if //; - - print $_; -} |