From 2b1d7f881c19c4d4b5afae194e02d3300c7675d0 Mon Sep 17 00:00:00 2001 From: Patrick Simianer Date: Tue, 5 Jul 2016 11:01:46 +0200 Subject: mv --- normalize_punctuation | 46 ---------------------------------------------- 1 file changed, 46 deletions(-) delete mode 100755 normalize_punctuation (limited to 'normalize_punctuation') diff --git a/normalize_punctuation b/normalize_punctuation deleted file mode 100755 index 108de44..0000000 --- a/normalize_punctuation +++ /dev/null @@ -1,46 +0,0 @@ -#!/usr/bin/perl -w -# adapted from the moses scripts - -use strict; - -my ($language) = @ARGV; - -while() { - s/\r//g; - # normalize unicode punctuation - s/„/\"/g; - s/“/\"/g; - s/”/\"/g; - s/–/-/g; - s/—/ - /g; s/ +/ /g; - s/´/\'/g; - s/([a-z])‘([a-z])/$1\'$2/gi; - s/([a-z])’([a-z])/$1\'$2/gi; - s/‘/\"/g; - s/‚/\"/g; - s/’/\"/g; - s/''/\"/g; - s/´´/\"/g; - s/…/.../g; - # French quotes - s/ « / \"/g; - s/« /\"/g; - s/«/\"/g; - s/ » /\" /g; - s/ »/\"/g; - s/»/\"/g; - # handle pseudo-spaces - s/ \%/\%/g; - s/nº /nº /g; - s/ :/:/g; - s/ ºC/ ºC/g; - s/ cm/ cm/g; - s/ \?/\?/g; - s/ \!/\!/g; - s/ ;/;/g; - s/, /, /g; s/ +/ /g; - - print STDERR $_ if //; - - print $_; -} -- cgit v1.2.3