From 1b8181bf0d6e9137e6b9ccdbe414aec37377a1a9 Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Sun, 18 Nov 2012 13:35:42 -0500 Subject: major restructure of the training code --- training/dep-reorder/conll2reordering-forest.pl | 65 ------------------------- training/dep-reorder/george.conll | 4 -- training/dep-reorder/scripts/conll2simplecfg.pl | 57 ---------------------- 3 files changed, 126 deletions(-) delete mode 100755 training/dep-reorder/conll2reordering-forest.pl delete mode 100644 training/dep-reorder/george.conll delete mode 100755 training/dep-reorder/scripts/conll2simplecfg.pl (limited to 'training/dep-reorder') diff --git a/training/dep-reorder/conll2reordering-forest.pl b/training/dep-reorder/conll2reordering-forest.pl deleted file mode 100755 index 3cd226be..00000000 --- a/training/dep-reorder/conll2reordering-forest.pl +++ /dev/null @@ -1,65 +0,0 @@ -#!/usr/bin/perl -w -use strict; - -my $script_dir; BEGIN { use Cwd qw/ abs_path cwd /; use File::Basename; $script_dir = dirname(abs_path($0)); push @INC, $script_dir; } -my $FIRST_CONV = "$script_dir/scripts/conll2simplecfg.pl"; -my $CDEC = "$script_dir/../../decoder/cdec"; - -our $tfile1 = "grammar1.$$"; -our $tfile2 = "text.$$"; - -die "Usage: $0 parses.conll\n" unless scalar @ARGV == 1; -open C, "<$ARGV[0]" or die "Can't read $ARGV[0]: $!"; - -END { unlink $tfile1; unlink "$tfile1.cfg"; unlink $tfile2; } - -my $first = 1; -open T, ">$tfile1" or die "Can't write $tfile1: $!"; -my $lc = 0; -my $flag = 0; -my @words = (); -while() { - print T; - chomp; - if (/^$/) { - if ($first) { $first = undef; } else { if ($flag) { print "\n"; $flag = 0; } } - $first = undef; - close T; - open SO, ">$tfile2" or die "Can't write $tfile2: $!"; - print SO "@words\n"; - close SO; - @words=(); - `$FIRST_CONV < $tfile1 > $tfile1.cfg`; - if ($? != 0) { - die "Error code: $?"; - } - my $cfg = `$CDEC -n -S 10000 -f scfg -g $tfile1.cfg -i $tfile2 --show_cfg_search_space 2>/dev/null`; - if ($? != 0) { - die "Error code: $?"; - } - my @rules = split /\n/, $cfg; - shift @rules; # get rid of output - for my $rule (@rules) { - my ($lhs, $f, $e, $feats) = split / \|\|\| /, $rule; - $f =~ s/,\d\]/\]/g; - $feats = 'TOP=1' unless $feats; - if ($lhs =~ /\[Goal_\d+\]/) { $lhs = '[S]'; } - print "$lhs ||| $f ||| $feats\n"; - if ($e eq '[1] [2]') { - my ($a, $b) = split /\s+/, $f; - $feats =~ s/=1$//; - my ($x, $y) = split /_/, $feats; - print "$lhs ||| $b $a ||| ${y}_$x=1\n"; - } - $flag = 1; - } - open T, ">$tfile1" or die "Can't write $tfile1: $!"; - $lc = -1; - } else { - my ($ind, $word, @dmmy) = split /\s+/; - push @words, $word; - } - $lc++; -} -close T; - diff --git a/training/dep-reorder/george.conll b/training/dep-reorder/george.conll deleted file mode 100644 index 7eebb360..00000000 --- a/training/dep-reorder/george.conll +++ /dev/null @@ -1,4 +0,0 @@ -1 George _ GEORGE _ _ 2 X _ _ -2 hates _ HATES _ _ 0 X _ _ -3 broccoli _ BROC _ _ 2 X _ _ - diff --git a/training/dep-reorder/scripts/conll2simplecfg.pl b/training/dep-reorder/scripts/conll2simplecfg.pl deleted file mode 100755 index b101347a..00000000 --- a/training/dep-reorder/scripts/conll2simplecfg.pl +++ /dev/null @@ -1,57 +0,0 @@ -#!/usr/bin/perl -w -use strict; - -# 1 在 _ 10 _ _ 4 X _ _ -# 2 门厅 _ 3 _ _ 1 X _ _ -# 3 下面 _ 23 _ _ 4 X _ _ -# 4 。 _ 45 _ _ 0 X _ _ - -my @ldeps; -my @rdeps; -@ldeps=(); for (my $i =0; $i <1000; $i++) { push @ldeps, []; } -@rdeps=(); for (my $i =0; $i <1000; $i++) { push @rdeps, []; } -my $rootcat = 0; -my @cats = ('S'); -my $len = 0; -my @noposcats = ('S'); -while(<>) { - chomp; - if (/^\s*$/) { - write_cfg($len); - $len = 0; - @cats=('S'); - @noposcats = ('S'); - @ldeps=(); for (my $i =0; $i <1000; $i++) { push @ldeps, []; } - @rdeps=(); for (my $i =0; $i <1000; $i++) { push @rdeps, []; } - next; - } - $len++; - my ($pos, $word, $d1, $xcat, $d2, $d3, $headpos, $deptype) = split /\s+/; - my $cat = "C$xcat"; - my $catpos = $cat . "_$pos"; - push @cats, $catpos; - push @noposcats, $cat; - print "[$catpos] ||| $word ||| $word ||| Word=1\n"; - if ($headpos == 0) { $rootcat = $pos; } - if ($pos < $headpos) { - push @{$ldeps[$headpos]}, $pos; - } else { - push @{$rdeps[$headpos]}, $pos; - } -} - -sub write_cfg { - my $len = shift; - for (my $i = 1; $i <= $len; $i++) { - my @lds = @{$ldeps[$i]}; - for my $ld (@lds) { - print "[$cats[$i]] ||| [$cats[$ld],1] [$cats[$i],2] ||| [1] [2] ||| $noposcats[$ld]_$noposcats[$i]=1\n"; - } - my @rds = @{$rdeps[$i]}; - for my $rd (@rds) { - print "[$cats[$i]] ||| [$cats[$i],1] [$cats[$rd],2] ||| [1] [2] ||| $noposcats[$i]_$noposcats[$rd]=1\n"; - } - } - print "[S] ||| [$cats[$rootcat],1] ||| [1] ||| TOP=1\n"; -} - -- cgit v1.2.3