diff options
author | Chris Dyer <cdyer@allegro.clab.cs.cmu.edu> | 2014-03-18 02:05:25 -0400 |
---|---|---|
committer | Chris Dyer <cdyer@allegro.clab.cs.cmu.edu> | 2014-03-18 02:05:25 -0400 |
commit | 3a5aeb67de3d7156e77ee94625ed3714117d3b43 (patch) | |
tree | fc4aa0ffa2a414d333637f099943106ef459e24d /corpus/support | |
parent | 766629370bbecfb05513aed9cd16f783be5e1543 (diff) |
chris edits
Diffstat (limited to 'corpus/support')
-rwxr-xr-x | corpus/support/tokenizer.pl | 4 |
1 files changed, 4 insertions, 0 deletions
diff --git a/corpus/support/tokenizer.pl b/corpus/support/tokenizer.pl index 7771201f..f57bc87a 100755 --- a/corpus/support/tokenizer.pl +++ b/corpus/support/tokenizer.pl @@ -240,6 +240,10 @@ sub proc_token { return $token; } + if($token =~ /^\d+(.\d+)+(亿|百万|万|千)?$/){ + return $token; + } + ## 1,234,345.34 if($token =~ /^\d+(\.\d{3})*,\d+$/){ ## number |