From 118013befdcdcaf96c64657439c441f0108fbdcc Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Mon, 15 Sep 2014 23:00:01 -0400 Subject: migrate to new Cython version --- corpus/support/tokenizer.pl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'corpus/support/tokenizer.pl') diff --git a/corpus/support/tokenizer.pl b/corpus/support/tokenizer.pl index f57bc87a..aa285be4 100755 --- a/corpus/support/tokenizer.pl +++ b/corpus/support/tokenizer.pl @@ -386,7 +386,7 @@ sub deep_proc_token { } ##### step 1: separate by punct T2 on the boundary - my $t2 = '\`|\!|\@|\+|\=|\[|\]|\<|\>|\||\(|\)|\{|\}|\?|\"|;'; + my $t2 = '\`|\!|\@|\+|\=|\[|\]|\<|\>|\||\(|\)|\{|\}|\?|\"|;|●|○'; if($line =~ s/^(($t2)+)/$1 /){ return proc_line($line); } -- cgit v1.2.3