summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorvladimir.eidelman <vladimir.eidelman@ec762483-ff6d-05da-a07a-a48fb63a330f>2010-09-05 05:17:18 +0000
committervladimir.eidelman <vladimir.eidelman@ec762483-ff6d-05da-a07a-a48fb63a330f>2010-09-05 05:17:18 +0000
commit7f248cf66ffdcfbf7a857473b665b4090454ba44 (patch)
treec21ab7c84eef42664cbb7386f06749b1a46116a0
parent2ed61b40bd0f3f1c377a415df8af4fab58122a52 (diff)
Add script to parallelize mbr_kbest
git-svn-id: https://ws10smt.googlecode.com/svn/trunk@642 ec762483-ff6d-05da-a07a-a48fb63a330f
-rwxr-xr-xmteval/mbr.pl116
1 files changed, 116 insertions, 0 deletions
diff --git a/mteval/mbr.pl b/mteval/mbr.pl
new file mode 100755
index 00000000..798da2d1
--- /dev/null
+++ b/mteval/mbr.pl
@@ -0,0 +1,116 @@
+#parallelize mbr computation from k-best list
+#Example usage : ./mbr.pl <kbest input file> <mbr output directory>
+
+#!/usr/bin/perl -w
+use strict;
+use File::Temp;
+use FileHandle;
+
+sub create_qsub;
+
+my $MBR = "/chomes/vlad/ws10smt/mteval/mbr_kbest";
+
+die unless -x $MBR;
+my $kbest = shift @ARGV;
+die "First parameter must be a kbest file!" unless -r $kbest;
+
+#create directory to store seperate kbest files and mbr output
+my $dir = shift @ARGV;
+if (-e $dir)
+{ die "$dir exists\n";
+}{ mkdir $dir or die;}
+die "Second parameter must be a directory!" unless -d $dir;
+
+#my @a = grep { /^--weights$/ } @ARGV;
+#die "Please specify a weights file with --weights" unless scalar @a;
+
+open SCORE, "< $kbest" or next;
+my @lines = <SCORE>;
+my $num = @lines;
+my %sent_id=();
+my $cdir = `pwd`;
+
+my %ids=();
+
+my $fh;
+my $file_count=0;
+
+#temp file to store qsub commands
+my $fn = File::Temp::tempnam("/tmp", "mbr-");
+mkdir $fn or die "Couldn't create $fn: $!";
+
+#split the kbest list into per sentence files
+foreach my $line (@lines)
+{
+ my @parts = split(/ /, $line);
+ my $sentid = $parts[0];
+ if ($sent_id{$sentid})
+ {
+ if(defined $fh){ print $fh $line;}
+ }
+ else
+ {
+ if (defined $fh){
+
+ $fh->close;
+ #create qsub entry
+ create_qsub("$file_count");
+ $file_count++;
+ }
+
+ $fh=FileHandle->new(">$dir/kbest.mbr.$file_count");
+ print $fh ($line);
+ $sent_id{$sentid}++;
+
+ }
+}
+
+$fh->close;
+#create last qsub entry
+create_qsub("$file_count");
+$file_count++;
+
+
+sleep 1;
+
+print STDERR "Waiting...\n";
+my $flag;
+do {
+ sleep 5;
+ $flag = undef;
+ my $stat = `qstat`;
+ my @lines = split /\n/, $stat;
+ for my $ln (@lines) {
+ my ($x, @rest) = split /\./, $ln;
+ if ($ids{$x}) { $flag = 1; }
+ }
+} while ($flag);
+
+#consolidate mbr output into one kbest file
+for (my $i = 0; $i < $file_count; $i++) {
+ open F, "<$dir/$i.mbr-txt" or die "Couldn't read $dir/$i.mbr-txt: $!";
+ while(<F>) {
+ print;
+ }
+ close F;
+}
+
+#`rm -rf $fn`;
+
+
+sub create_qsub {
+
+ my $file_n = shift;
+ my $sfn = "$fn/$file_n.mbr";
+
+ open F, ">$sfn" or die "Couldn't create $sfn: $!";
+ print F "cd $cdir";
+ print F "$MBR < $dir/kbest.mbr.$file_n @ARGV -L > $dir/$file_n.mbr-txt\n";
+ close F;
+ # `sleep 10`;
+ my $o = `qsub -q batch -l pmem=1000mb,walltime=06:00:00 $sfn -k n -e /dev/null`;
+ #my $o = `qsub -q batch -l pmem=1000mb,walltime=06:00:00 $sfn -k n`;
+ my ($x, @rest) = split /\./, $o;
+ $ids{$x}=1;
+
+}