blob: 723e78cbe6e00c561cb0bc0e806361f80acd9f0c (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
|
#!/usr/bin/perl -w
use IO::Handle;
STDOUT->autoflush(1);
while (<>) {
$output = "";
@tokens = split;
$lspace = 0;
$qflag = 0;
for ($i=0; $i<=$#tokens; $i++) {
$token = $tokens[$i];
$prev = $next = "";
$rspace = 1;
if ($i > 0) {
$prev = $tokens[$i-1];
}
if ($i < $#tokens) {
$next = $tokens[$i+1];
}
# possessives join to the left
if ($token =~ /^(n't|'(s|m|re|ll|ve|d))$/) {
$lspace = 0;
} elsif ($token eq "'" && $prev =~ /s$/) {
$lspace = 0;
# hyphen only when a hyphen, not a dash
} elsif ($token eq "-" && $prev =~ /[A-Za-z0-9]$/ && $next =~ /^[A-Za-z0-9]/) {
$lspace = $rspace = 0;
# quote marks alternate
} elsif ($token eq '"') {
if ($qflag) {
$lspace = 0;
} else {
$rspace = 0;
}
$qflag = !$qflag;
# period joins on both sides when a decimal point
} elsif ($token eq "." && $prev =~ /\d$/ && $next =~ /\d$/) {
$lspace = $rspace = 0;
# Left joiners
} elsif ($token =~ /^[.,:;?!%)\]]$/) {
$lspace = 0;
# Right joiners
} elsif ($token =~ /^[$(\[]$/) {
$rspace = 0;
# Joiners on both sides
} elsif ($token =~ /^[\/]$/) {
$lspace = $rspace = 0;
}
if ($lspace) {
$output .= " ";
}
$output .= $token;
$lspace = $rspace;
}
print "$output\n";
}
|