#!/usr/bin/perl -w use strict; my $SCRIPT_DIR; BEGIN { use Cwd qw/ abs_path /; use File::Basename; $SCRIPT_DIR = dirname(abs_path($0)); push @INC, $SCRIPT_DIR; } use Getopt::Long; my $parallel = 1; my $CWD=`pwd`; chomp $CWD; my $BIN_DIR = "/chomes/redpony/cdyer-svn-repo/cdec/src"; my $OPTIMIZER = "$BIN_DIR/mr_em_train"; my $DECODER = "$BIN_DIR/cdec"; my $COMBINER_CACHE_SIZE = 150; my $PARALLEL = "/chomes/redpony/svn-trunk/sa-utils/parallelize.pl"; die "Can't find $OPTIMIZER" unless -f $OPTIMIZER; die "Can't execute $OPTIMIZER" unless -x $OPTIMIZER; die "Can't find $DECODER" unless -f $DECODER; die "Can't execute $DECODER" unless -x $DECODER; die "Can't find $PARALLEL" unless -f $PARALLEL; die "Can't execute $PARALLEL" unless -x $PARALLEL; my $restart = ''; if ($ARGV[0] && $ARGV[0] eq '--restart') { shift @ARGV; $restart = 1; } die "Usage: $0 [--restart] training.corpus weights.init grammar.file [grammar2.file] ...\n" unless (scalar @ARGV >= 3); my $training_corpus = shift @ARGV; my $initial_weights = shift @ARGV; my @in_grammar_files = @ARGV; my $pmem="2500mb"; my $nodes = 40; my $max_iteration = 1000; my $CFLAG = "-C 1"; unless ($parallel) { $CFLAG = "-C 500"; } my @grammar_files; for my $g (@in_grammar_files) { unless ($g =~ /^\//) { $g = $CWD . '/' . $g; } die "Can't find $g" unless -f $g; push @grammar_files, $g; } print STDERR <