From 671c21451542e2dd20e45b4033d44d8e8735f87b Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Thu, 3 Dec 2009 16:33:55 -0500 Subject: initial check in --- training/cluster-em.pl | 110 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 110 insertions(+) create mode 100755 training/cluster-em.pl (limited to 'training/cluster-em.pl') diff --git a/training/cluster-em.pl b/training/cluster-em.pl new file mode 100755 index 00000000..175870da --- /dev/null +++ b/training/cluster-em.pl @@ -0,0 +1,110 @@ +#!/usr/bin/perl -w + +use strict; +my $SCRIPT_DIR; BEGIN { use Cwd qw/ abs_path /; use File::Basename; $SCRIPT_DIR = dirname(abs_path($0)); push @INC, $SCRIPT_DIR; } +use Getopt::Long; +my $parallel = 1; + +my $CWD=`pwd`; chomp $CWD; +my $BIN_DIR = "/chomes/redpony/cdyer-svn-repo/cdec/src"; +my $OPTIMIZER = "$BIN_DIR/mr_em_train"; +my $DECODER = "$BIN_DIR/cdec"; +my $COMBINER_CACHE_SIZE = 150; +my $PARALLEL = "/chomes/redpony/svn-trunk/sa-utils/parallelize.pl"; +die "Can't find $OPTIMIZER" unless -f $OPTIMIZER; +die "Can't execute $OPTIMIZER" unless -x $OPTIMIZER; +die "Can't find $DECODER" unless -f $DECODER; +die "Can't execute $DECODER" unless -x $DECODER; +die "Can't find $PARALLEL" unless -f $PARALLEL; +die "Can't execute $PARALLEL" unless -x $PARALLEL; +my $restart = ''; +if ($ARGV[0] && $ARGV[0] eq '--restart') { shift @ARGV; $restart = 1; } + +die "Usage: $0 [--restart] training.corpus weights.init grammar.file [grammar2.file] ...\n" unless (scalar @ARGV >= 3); + +my $training_corpus = shift @ARGV; +my $initial_weights = shift @ARGV; +my @in_grammar_files = @ARGV; +my $pmem="2500mb"; +my $nodes = 40; +my $max_iteration = 1000; +my $CFLAG = "-C 1"; +unless ($parallel) { $CFLAG = "-C 500"; } +my @grammar_files; +for my $g (@in_grammar_files) { + unless ($g =~ /^\//) { $g = $CWD . '/' . $g; } + die "Can't find $g" unless -f $g; + push @grammar_files, $g; +} + +print STDERR <