summaryrefslogtreecommitdiff
path: root/corpus/support/tokenizer.pl
diff options
context:
space:
mode:
authorChris Dyer <cdyer@allegro.clab.cs.cmu.edu>2014-03-18 02:05:25 -0400
committerChris Dyer <cdyer@allegro.clab.cs.cmu.edu>2014-03-18 02:05:25 -0400
commit3a5aeb67de3d7156e77ee94625ed3714117d3b43 (patch)
treefc4aa0ffa2a414d333637f099943106ef459e24d /corpus/support/tokenizer.pl
parent766629370bbecfb05513aed9cd16f783be5e1543 (diff)
chris edits
Diffstat (limited to 'corpus/support/tokenizer.pl')
-rwxr-xr-xcorpus/support/tokenizer.pl4
1 files changed, 4 insertions, 0 deletions
diff --git a/corpus/support/tokenizer.pl b/corpus/support/tokenizer.pl
index 7771201f..f57bc87a 100755
--- a/corpus/support/tokenizer.pl
+++ b/corpus/support/tokenizer.pl
@@ -240,6 +240,10 @@ sub proc_token {
return $token;
}
+ if($token =~ /^\d+(.\d+)+(亿|百万|万|千)?$/){
+ return $token;
+ }
+
## 1,234,345.34
if($token =~ /^\d+(\.\d{3})*,\d+$/){
## number