summaryrefslogtreecommitdiff
path: root/corpus/support/tokenizer.pl
diff options
context:
space:
mode:
authorChris Dyer <cdyer@allegro.clab.cs.cmu.edu>2014-09-15 23:00:01 -0400
committerChris Dyer <cdyer@allegro.clab.cs.cmu.edu>2014-09-15 23:00:01 -0400
commit118013befdcdcaf96c64657439c441f0108fbdcc (patch)
treef19b3a0c9d91f6eadbfbca21740abe00ad93aa30 /corpus/support/tokenizer.pl
parent604b4464e7e3f8b90145c7039d5672e6638c0dd3 (diff)
migrate to new Cython version
Diffstat (limited to 'corpus/support/tokenizer.pl')
-rwxr-xr-xcorpus/support/tokenizer.pl2
1 files changed, 1 insertions, 1 deletions
diff --git a/corpus/support/tokenizer.pl b/corpus/support/tokenizer.pl
index f57bc87a..aa285be4 100755
--- a/corpus/support/tokenizer.pl
+++ b/corpus/support/tokenizer.pl
@@ -386,7 +386,7 @@ sub deep_proc_token {
}
##### step 1: separate by punct T2 on the boundary
- my $t2 = '\`|\!|\@|\+|\=|\[|\]|\<|\>|\||\(|\)|\{|\}|\?|\"|;';
+ my $t2 = '\`|\!|\@|\+|\=|\[|\]|\<|\>|\||\(|\)|\{|\}|\?|\"|;|●|○';
if($line =~ s/^(($t2)+)/$1 /){
return proc_line($line);
}