summaryrefslogtreecommitdiff
path: root/training/dep-reorder
diff options
context:
space:
mode:
authorChris Dyer <cdyer@allegro.clab.cs.cmu.edu>2012-11-18 13:35:42 -0500
committerChris Dyer <cdyer@allegro.clab.cs.cmu.edu>2012-11-18 13:35:42 -0500
commit1b8181bf0d6e9137e6b9ccdbe414aec37377a1a9 (patch)
tree33e5f3aa5abff1f41314cf8f6afbd2c2c40e4bfd /training/dep-reorder
parent7c4665949fb93fb3de402e4ce1d19bef67850d05 (diff)
major restructure of the training code
Diffstat (limited to 'training/dep-reorder')
-rwxr-xr-xtraining/dep-reorder/conll2reordering-forest.pl65
-rw-r--r--training/dep-reorder/george.conll4
-rwxr-xr-xtraining/dep-reorder/scripts/conll2simplecfg.pl57
3 files changed, 0 insertions, 126 deletions
diff --git a/training/dep-reorder/conll2reordering-forest.pl b/training/dep-reorder/conll2reordering-forest.pl
deleted file mode 100755
index 3cd226be..00000000
--- a/training/dep-reorder/conll2reordering-forest.pl
+++ /dev/null
@@ -1,65 +0,0 @@
-#!/usr/bin/perl -w
-use strict;
-
-my $script_dir; BEGIN { use Cwd qw/ abs_path cwd /; use File::Basename; $script_dir = dirname(abs_path($0)); push @INC, $script_dir; }
-my $FIRST_CONV = "$script_dir/scripts/conll2simplecfg.pl";
-my $CDEC = "$script_dir/../../decoder/cdec";
-
-our $tfile1 = "grammar1.$$";
-our $tfile2 = "text.$$";
-
-die "Usage: $0 parses.conll\n" unless scalar @ARGV == 1;
-open C, "<$ARGV[0]" or die "Can't read $ARGV[0]: $!";
-
-END { unlink $tfile1; unlink "$tfile1.cfg"; unlink $tfile2; }
-
-my $first = 1;
-open T, ">$tfile1" or die "Can't write $tfile1: $!";
-my $lc = 0;
-my $flag = 0;
-my @words = ();
-while(<C>) {
- print T;
- chomp;
- if (/^$/) {
- if ($first) { $first = undef; } else { if ($flag) { print "\n"; $flag = 0; } }
- $first = undef;
- close T;
- open SO, ">$tfile2" or die "Can't write $tfile2: $!";
- print SO "@words\n";
- close SO;
- @words=();
- `$FIRST_CONV < $tfile1 > $tfile1.cfg`;
- if ($? != 0) {
- die "Error code: $?";
- }
- my $cfg = `$CDEC -n -S 10000 -f scfg -g $tfile1.cfg -i $tfile2 --show_cfg_search_space 2>/dev/null`;
- if ($? != 0) {
- die "Error code: $?";
- }
- my @rules = split /\n/, $cfg;
- shift @rules; # get rid of output
- for my $rule (@rules) {
- my ($lhs, $f, $e, $feats) = split / \|\|\| /, $rule;
- $f =~ s/,\d\]/\]/g;
- $feats = 'TOP=1' unless $feats;
- if ($lhs =~ /\[Goal_\d+\]/) { $lhs = '[S]'; }
- print "$lhs ||| $f ||| $feats\n";
- if ($e eq '[1] [2]') {
- my ($a, $b) = split /\s+/, $f;
- $feats =~ s/=1$//;
- my ($x, $y) = split /_/, $feats;
- print "$lhs ||| $b $a ||| ${y}_$x=1\n";
- }
- $flag = 1;
- }
- open T, ">$tfile1" or die "Can't write $tfile1: $!";
- $lc = -1;
- } else {
- my ($ind, $word, @dmmy) = split /\s+/;
- push @words, $word;
- }
- $lc++;
-}
-close T;
-
diff --git a/training/dep-reorder/george.conll b/training/dep-reorder/george.conll
deleted file mode 100644
index 7eebb360..00000000
--- a/training/dep-reorder/george.conll
+++ /dev/null
@@ -1,4 +0,0 @@
-1 George _ GEORGE _ _ 2 X _ _
-2 hates _ HATES _ _ 0 X _ _
-3 broccoli _ BROC _ _ 2 X _ _
-
diff --git a/training/dep-reorder/scripts/conll2simplecfg.pl b/training/dep-reorder/scripts/conll2simplecfg.pl
deleted file mode 100755
index b101347a..00000000
--- a/training/dep-reorder/scripts/conll2simplecfg.pl
+++ /dev/null
@@ -1,57 +0,0 @@
-#!/usr/bin/perl -w
-use strict;
-
-# 1 在 _ 10 _ _ 4 X _ _
-# 2 门厅 _ 3 _ _ 1 X _ _
-# 3 下面 _ 23 _ _ 4 X _ _
-# 4 。 _ 45 _ _ 0 X _ _
-
-my @ldeps;
-my @rdeps;
-@ldeps=(); for (my $i =0; $i <1000; $i++) { push @ldeps, []; }
-@rdeps=(); for (my $i =0; $i <1000; $i++) { push @rdeps, []; }
-my $rootcat = 0;
-my @cats = ('S');
-my $len = 0;
-my @noposcats = ('S');
-while(<>) {
- chomp;
- if (/^\s*$/) {
- write_cfg($len);
- $len = 0;
- @cats=('S');
- @noposcats = ('S');
- @ldeps=(); for (my $i =0; $i <1000; $i++) { push @ldeps, []; }
- @rdeps=(); for (my $i =0; $i <1000; $i++) { push @rdeps, []; }
- next;
- }
- $len++;
- my ($pos, $word, $d1, $xcat, $d2, $d3, $headpos, $deptype) = split /\s+/;
- my $cat = "C$xcat";
- my $catpos = $cat . "_$pos";
- push @cats, $catpos;
- push @noposcats, $cat;
- print "[$catpos] ||| $word ||| $word ||| Word=1\n";
- if ($headpos == 0) { $rootcat = $pos; }
- if ($pos < $headpos) {
- push @{$ldeps[$headpos]}, $pos;
- } else {
- push @{$rdeps[$headpos]}, $pos;
- }
-}
-
-sub write_cfg {
- my $len = shift;
- for (my $i = 1; $i <= $len; $i++) {
- my @lds = @{$ldeps[$i]};
- for my $ld (@lds) {
- print "[$cats[$i]] ||| [$cats[$ld],1] [$cats[$i],2] ||| [1] [2] ||| $noposcats[$ld]_$noposcats[$i]=1\n";
- }
- my @rds = @{$rdeps[$i]};
- for my $rd (@rds) {
- print "[$cats[$i]] ||| [$cats[$i],1] [$cats[$rd],2] ||| [1] [2] ||| $noposcats[$i]_$noposcats[$rd]=1\n";
- }
- }
- print "[S] ||| [$cats[$rootcat],1] ||| [1] ||| TOP=1\n";
-}
-