diff options
| -rw-r--r-- | TODO | 1 | ||||
| -rwxr-xr-x | external/detokenizer.perl | 3 | ||||
| -rwxr-xr-x | external/truecase.perl | 3 | 
3 files changed, 4 insertions, 3 deletions
@@ -1 +0,0 @@ -fix upper case if first thing is a comma or . diff --git a/external/detokenizer.perl b/external/detokenizer.perl index a8de7e8..dc12609 100755 --- a/external/detokenizer.perl +++ b/external/detokenizer.perl @@ -192,7 +192,8 @@ sub detokenize {  	#add trailing break  	$text .= "\n" unless $text =~ /\n$/; -        $text =~ s/^([[:punct:]\s]*)([[:alpha:]])/ucsecondarg($1, $2)/e if $UPPERCASE_SENT; +  #$text =~ s/^([[:punct:]\s]*)([[:alpha:]])/ucsecondarg($1, $2)/e if $UPPERCASE_SENT; +  $text =~ s/^([\.:\?\!;\s]*)([[:alpha:]])/ucsecondarg($1, $2)/e if $UPPERCASE_SENT;  	return $text;  } diff --git a/external/truecase.perl b/external/truecase.perl index 0a4d366..b724510 100755 --- a/external/truecase.perl +++ b/external/truecase.perl @@ -28,7 +28,8 @@ while(<MODEL>) {  close(MODEL);  my %SENTENCE_END = ("."=>1,":"=>1,"?"=>1,"!"=>1); -my %DELAYED_SENTENCE_START = ("("=>1,"["=>1,"\""=>1,"'"=>1,"'"=>1,"""=>1,"["=>1,"]"=>1); +#my %DELAYED_SENTENCE_START = ("("=>1,"["=>1,"\""=>1,"'"=>1,"'"=>1,"""=>1,"["=>1,"]"=>1); +my %DELAYED_SENTENCE_START = ("\""=>1,"'"=>1,"'"=>1,"""=>1);  while(<STDIN>) {    chop;  | 
