diff options
Diffstat (limited to 'corpus')
| -rwxr-xr-x | corpus/support/tokenizer.pl | 13 | 
1 files changed, 3 insertions, 10 deletions
diff --git a/corpus/support/tokenizer.pl b/corpus/support/tokenizer.pl index 475b9a1c..7771201f 100755 --- a/corpus/support/tokenizer.pl +++ b/corpus/support/tokenizer.pl @@ -666,10 +666,10 @@ sub deep_proc_token {  	    return $line;  	} -	if($line =~ /^(([a-z]\.)+)(\.*)$/i){ +        if ($line =~ /^(([a-z]|ए|बी|सी|डी|ई|एफ|जी|एच|आई|जे|के|एल|एम|एन|ओ|पी|क़यू|आर|एस|टी|यू|वी|डबल्यू|एक्स|वाई|ज़ेड|ज़ी)(\.([a-z]|ए|बी|सी|डी|ई|एफ|जी|एच|आई|जे|के|एल|एम|एन|ओ|पी|क़यू|आर|एस|टी|यू|वी|डबल्यू|एक्स|वाई|ज़ेड|ज़ी))+)(\.?)(\.*)$/i){  	    ## I.B.M. -	    my $t1 = $1; -	    my $t3 = $3; +	    my $t1 = $1 . $5; +	    my $t3 = $6;  	    return $t1 . " ". proc_token($t3);  	} @@ -703,10 +703,3 @@ sub deep_proc_token {      return $line;  } - - - - - -		    -  | 
