summaryrefslogtreecommitdiff
path: root/corpus/support/quote-norm.pl
diff options
context:
space:
mode:
authorChris Dyer <cdyer@allegro.clab.cs.cmu.edu>2012-12-05 20:27:30 -0500
committerChris Dyer <cdyer@allegro.clab.cs.cmu.edu>2012-12-05 20:27:30 -0500
commit8bc9e5d9cfe634191c83bd735af4571525133cfe (patch)
tree9cedab6a92b0e8e7bac0a930f23ba0850b152aa7 /corpus/support/quote-norm.pl
parentd7b60765395a4fe6da23992f45afd18dfc093aeb (diff)
remove logging, you should be using pv
Diffstat (limited to 'corpus/support/quote-norm.pl')
-rwxr-xr-xcorpus/support/quote-norm.pl7
1 files changed, 6 insertions, 1 deletions
diff --git a/corpus/support/quote-norm.pl b/corpus/support/quote-norm.pl
index 0c5b9c26..72b0064d 100755
--- a/corpus/support/quote-norm.pl
+++ b/corpus/support/quote-norm.pl
@@ -18,13 +18,18 @@ while(<STDIN>) {
s/(\W)(euro?)(\d*\.\d+|\d+)/$1EUR $3/gi;
s/&\s*#45\s*;\s*&\s*#45\s*;/--/g;
s/&\s*#45\s*;/--/g;
+ s/�c/--/g;
s/ ,,/ "/g;
s/``/"/g;
s/''/"/g;
+ s/[「」]/"/g;
s/〃/"/g;
s/¨/"/g;
s/¡/ ¡ /g;
s/¿/ ¿ /g;
+ # â<U+0080><U+0099>
+ s/â(\x{80}\x{99}|\x{80}\x{98})/'/g;
+ s/â(\x{80}\x{9c}|\x{80}\x{9d})/"/g;
s/ˇ/'/g;
s/´/'/g;
s/`/'/g;
@@ -39,7 +44,7 @@ while(<STDIN>) {
s/»/"/g;
tr/!-~/!-~/;
s/、/,/g;
- s/。/./g;
+ # s/。/./g;
s/…/.../g;
s/―/--/g;
s/–/--/g;