From ed56625e5edeadbe9297680b07e269c42b7ea420 Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Thu, 27 Feb 2014 19:45:08 -0500 Subject: ptb to normal --- corpus/support/quote-norm.pl | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'corpus/support/quote-norm.pl') diff --git a/corpus/support/quote-norm.pl b/corpus/support/quote-norm.pl index 1d9bb96f..33604027 100755 --- a/corpus/support/quote-norm.pl +++ b/corpus/support/quote-norm.pl @@ -11,6 +11,15 @@ while() { # Delete control characters: s/[\x{00}-\x{1f}]//g; + # PTB --> normal + s/-LRB-/(/g; + s/-RRB-/)/g; + s/-LSB-/[/g; + s/-RSB-/]/g; + s/-LCB-/{/g; + s/-RCB-/}/g; + s/ gon na / gonna /g; + # Regularize named HTML/XML escapes: s/&\s*lt\s*;//gi; # HTML closing angle bracket -- cgit v1.2.3