diff options
author | Chris Dyer <cdyer@allegro.clab.cs.cmu.edu> | 2014-09-15 23:00:01 -0400 |
---|---|---|
committer | Chris Dyer <cdyer@allegro.clab.cs.cmu.edu> | 2014-09-15 23:00:01 -0400 |
commit | 3822a2063e36b6ced948e5c22910a373c6c691b2 (patch) | |
tree | 208dab2df693adf2f8bdb9561eb6dbd92e2ac5c6 /corpus/support/tokenizer.pl | |
parent | 24e0ff67a5e63ff987755ef128d2b056fbdcd603 (diff) |
migrate to new Cython version
Diffstat (limited to 'corpus/support/tokenizer.pl')
-rwxr-xr-x | corpus/support/tokenizer.pl | 2 |
1 files changed, 1 insertions, 1 deletions
diff --git a/corpus/support/tokenizer.pl b/corpus/support/tokenizer.pl index f57bc87a..aa285be4 100755 --- a/corpus/support/tokenizer.pl +++ b/corpus/support/tokenizer.pl @@ -386,7 +386,7 @@ sub deep_proc_token { } ##### step 1: separate by punct T2 on the boundary - my $t2 = '\`|\!|\@|\+|\=|\[|\]|\<|\>|\||\(|\)|\{|\}|\?|\"|;'; + my $t2 = '\`|\!|\@|\+|\=|\[|\]|\<|\>|\||\(|\)|\{|\}|\?|\"|;|●|○'; if($line =~ s/^(($t2)+)/$1 /){ return proc_line($line); } |