summaryrefslogtreecommitdiff
path: root/mteval/mbr.pl
blob: ff7763d81a33c444c2d7bf332ecff7dbdac9b6e1 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
#!/usr/bin/perl -w
#parallelize mbr computation from k-best list
#Example usage : ./mbr.pl <kbest input file> <mbr output directory>
use strict;
use File::Temp;
use FileHandle;
use Cwd qw(getcwd);

sub create_qsub;
my $SCRIPT_DIR; BEGIN { use Cwd qw/ abs_path /; use File::Basename; $SCRIPT_DIR = dirname(abs_path($0)); push @INC, $SCRIPT_DIR, "$SCRIPT_DIR/../environment"; }
my $MBR = "$SCRIPT_DIR/mbr_kbest";

die unless -x $MBR;
my $kbest = shift @ARGV;
die "First parameter must be a kbest file!" unless -r $kbest;

#create directory to store seperate kbest files and mbr output
my $dir = shift @ARGV;
if (-e $dir)
{ die "$dir exists\n";
}{ mkdir $dir or die;}
die "Second parameter must be a directory!" unless -d $dir;

#my @a = grep { /^--weights$/ } @ARGV;
#die "Please specify a weights file with --weights" unless scalar @a;

open SCORE, "< $kbest" or next;
my @lines = <SCORE>;
my $num = @lines;
my %sent_id=();
my $cdir = `pwd`;

my %ids=();

my $fh;
my $file_count=0;

#temp file to store qsub commands
my $fn = File::Temp::tempnam("/tmp", "mbr-");
mkdir $fn or die "Couldn't create $fn: $!";

#split the kbest list into per sentence files
foreach my $line (@lines)
{
    my @parts = split(/ /, $line);
    my $sentid = $parts[0];
    if ($sent_id{$sentid})
    {
	if(defined $fh){ print $fh $line;}
    }
    else
    {
	if (defined $fh){ 

	    $fh->close;
	    #create qsub entry
	    create_qsub("$file_count");
	    $file_count++;
	}
	
	$fh=FileHandle->new(">$dir/kbest.mbr.$file_count");
	print $fh ($line);
	$sent_id{$sentid}++;

    }
}

$fh->close;
#create last qsub entry
create_qsub("$file_count");
$file_count++;


sleep 1;

print STDERR "Waiting...\n";
my $flag;
do {
  sleep 5;
  $flag = undef;
  my $stat = `qstat`;
  my @lines = split /\n/, $stat;
  for my $ln (@lines) {
    my ($x, @rest) = split /\./, $ln;
    if ($ids{$x}) { $flag = 1; }
  }
} while ($flag);

#consolidate mbr output into one kbest file
for (my $i = 0; $i < $file_count; $i++) {
  open F, "<$dir/$i.mbr-txt" or die "Couldn't read $dir/$i.mbr-txt: $!";
  while(<F>) {
    print;
  }
  close F;
}

#`rm -rf $fn`;


sub create_qsub {

    my $file_n = shift;
    my $sfn = "$fn/$file_n.mbr";
 
  open F, ">$sfn" or die "Couldn't create $sfn: $!";
  print F "cd $cdir";
  print F "$MBR < $dir/kbest.mbr.$file_n @ARGV -L > $dir/$file_n.mbr-txt\n";
  close F;
 # `sleep 10`;
  my $o = `qsub -q batch -l pmem=1000mb,walltime=06:00:00 $sfn -k n -e /dev/null`;
  #my $o = `qsub -q batch -l pmem=1000mb,walltime=06:00:00 $sfn -k n`;
  my ($x, @rest) = split /\./, $o;
  $ids{$x}=1;

}