From f6b4959363f84b05b3a81855d70359933f22647a Mon Sep 17 00:00:00 2001 From: "vladimir.eidelman" Date: Sun, 5 Sep 2010 05:17:18 +0000 Subject: Add script to parallelize mbr_kbest git-svn-id: https://ws10smt.googlecode.com/svn/trunk@642 ec762483-ff6d-05da-a07a-a48fb63a330f --- mteval/mbr.pl | 116 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 116 insertions(+) create mode 100755 mteval/mbr.pl (limited to 'mteval') diff --git a/mteval/mbr.pl b/mteval/mbr.pl new file mode 100755 index 00000000..798da2d1 --- /dev/null +++ b/mteval/mbr.pl @@ -0,0 +1,116 @@ +#parallelize mbr computation from k-best list +#Example usage : ./mbr.pl + +#!/usr/bin/perl -w +use strict; +use File::Temp; +use FileHandle; + +sub create_qsub; + +my $MBR = "/chomes/vlad/ws10smt/mteval/mbr_kbest"; + +die unless -x $MBR; +my $kbest = shift @ARGV; +die "First parameter must be a kbest file!" unless -r $kbest; + +#create directory to store seperate kbest files and mbr output +my $dir = shift @ARGV; +if (-e $dir) +{ die "$dir exists\n"; +}{ mkdir $dir or die;} +die "Second parameter must be a directory!" unless -d $dir; + +#my @a = grep { /^--weights$/ } @ARGV; +#die "Please specify a weights file with --weights" unless scalar @a; + +open SCORE, "< $kbest" or next; +my @lines = ; +my $num = @lines; +my %sent_id=(); +my $cdir = `pwd`; + +my %ids=(); + +my $fh; +my $file_count=0; + +#temp file to store qsub commands +my $fn = File::Temp::tempnam("/tmp", "mbr-"); +mkdir $fn or die "Couldn't create $fn: $!"; + +#split the kbest list into per sentence files +foreach my $line (@lines) +{ + my @parts = split(/ /, $line); + my $sentid = $parts[0]; + if ($sent_id{$sentid}) + { + if(defined $fh){ print $fh $line;} + } + else + { + if (defined $fh){ + + $fh->close; + #create qsub entry + create_qsub("$file_count"); + $file_count++; + } + + $fh=FileHandle->new(">$dir/kbest.mbr.$file_count"); + print $fh ($line); + $sent_id{$sentid}++; + + } +} + +$fh->close; +#create last qsub entry +create_qsub("$file_count"); +$file_count++; + + +sleep 1; + +print STDERR "Waiting...\n"; +my $flag; +do { + sleep 5; + $flag = undef; + my $stat = `qstat`; + my @lines = split /\n/, $stat; + for my $ln (@lines) { + my ($x, @rest) = split /\./, $ln; + if ($ids{$x}) { $flag = 1; } + } +} while ($flag); + +#consolidate mbr output into one kbest file +for (my $i = 0; $i < $file_count; $i++) { + open F, "<$dir/$i.mbr-txt" or die "Couldn't read $dir/$i.mbr-txt: $!"; + while() { + print; + } + close F; +} + +#`rm -rf $fn`; + + +sub create_qsub { + + my $file_n = shift; + my $sfn = "$fn/$file_n.mbr"; + + open F, ">$sfn" or die "Couldn't create $sfn: $!"; + print F "cd $cdir"; + print F "$MBR < $dir/kbest.mbr.$file_n @ARGV -L > $dir/$file_n.mbr-txt\n"; + close F; + # `sleep 10`; + my $o = `qsub -q batch -l pmem=1000mb,walltime=06:00:00 $sfn -k n -e /dev/null`; + #my $o = `qsub -q batch -l pmem=1000mb,walltime=06:00:00 $sfn -k n`; + my ($x, @rest) = split /\./, $o; + $ids{$x}=1; + +} -- cgit v1.2.3