diff options
author | Chris Dyer <cdyer@allegro.clab.cs.cmu.edu> | 2014-10-24 15:31:16 -0400 |
---|---|---|
committer | Chris Dyer <cdyer@allegro.clab.cs.cmu.edu> | 2014-10-24 15:31:16 -0400 |
commit | 0968a3e56e5fb8d3afee2b1c4904d76160b0fb17 (patch) | |
tree | 1bdf3ae8c1b5c90f26f3ea114f675ad73d66af4d /corpus | |
parent | cba04ba80d7fff5cdf6f80c32b83c0ac3bb7b1b9 (diff) |
conll2cdec conversion
Diffstat (limited to 'corpus')
-rwxr-xr-x | corpus/conll2cdec.pl | 39 |
1 files changed, 39 insertions, 0 deletions
diff --git a/corpus/conll2cdec.pl b/corpus/conll2cdec.pl new file mode 100755 index 00000000..f65b86f8 --- /dev/null +++ b/corpus/conll2cdec.pl @@ -0,0 +1,39 @@ +#!/usr/bin/perl -w +use strict; + +my @xx; +my @yy; +my @os; +my $sec = undef; +my $i = 0; +while(<>) { + chomp; + if (/^\s*$/) { + print "<seg id=\"$i\""; + $i++; + for (my $j = 0; $j < $sec; $j++) { + my @oo = (); + for (my $k = 0; $k < scalar @xx; $k++) { + my $sym = $os[$k]->[$j]; + $sym =~ s/"/'/g; + push @oo, $sym; + } + my $zz = $j + 1; + print " feat$zz=\"@oo\""; + } + + print "> @xx ||| @yy </seg>\n"; + @xx = (); + @yy = (); + @os = (); + } else { + my ($x, @fs) = split /\s+/; + my $y = pop @fs; + if (!defined $sec) { $sec = scalar @fs; } + die unless $sec == scalar @fs; + push @xx, $x; + push @yy, $y; + push @os, \@fs; + } +} + |