summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--TODO1
-rwxr-xr-xexternal/detokenizer.perl3
-rwxr-xr-xexternal/truecase.perl3
3 files changed, 4 insertions, 3 deletions
diff --git a/TODO b/TODO
index 591c3ef..e69de29 100644
--- a/TODO
+++ b/TODO
@@ -1 +0,0 @@
-fix upper case if first thing is a comma or .
diff --git a/external/detokenizer.perl b/external/detokenizer.perl
index a8de7e8..dc12609 100755
--- a/external/detokenizer.perl
+++ b/external/detokenizer.perl
@@ -192,7 +192,8 @@ sub detokenize {
#add trailing break
$text .= "\n" unless $text =~ /\n$/;
- $text =~ s/^([[:punct:]\s]*)([[:alpha:]])/ucsecondarg($1, $2)/e if $UPPERCASE_SENT;
+ #$text =~ s/^([[:punct:]\s]*)([[:alpha:]])/ucsecondarg($1, $2)/e if $UPPERCASE_SENT;
+ $text =~ s/^([\.:\?\!;\s]*)([[:alpha:]])/ucsecondarg($1, $2)/e if $UPPERCASE_SENT;
return $text;
}
diff --git a/external/truecase.perl b/external/truecase.perl
index 0a4d366..b724510 100755
--- a/external/truecase.perl
+++ b/external/truecase.perl
@@ -28,7 +28,8 @@ while(<MODEL>) {
close(MODEL);
my %SENTENCE_END = ("."=>1,":"=>1,"?"=>1,"!"=>1);
-my %DELAYED_SENTENCE_START = ("("=>1,"["=>1,"\""=>1,"'"=>1,"&apos;"=>1,"&quot;"=>1,"&#91;"=>1,"&#93;"=>1);
+#my %DELAYED_SENTENCE_START = ("("=>1,"["=>1,"\""=>1,"'"=>1,"&apos;"=>1,"&quot;"=>1,"&#91;"=>1,"&#93;"=>1);
+my %DELAYED_SENTENCE_START = ("\""=>1,"'"=>1,"&apos;"=>1,"&quot;"=>1);
while(<STDIN>) {
chop;