From 0964b959fc2be173a2adbd52d2d1d143f7458496 Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Mon, 10 Feb 2014 21:41:57 -0500 Subject: transition away from checking in big data files --- compound-split/compound-split.pl | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) (limited to 'compound-split/compound-split.pl') diff --git a/compound-split/compound-split.pl b/compound-split/compound-split.pl index 62259146..93ac3b20 100755 --- a/compound-split/compound-split.pl +++ b/compound-split/compound-split.pl @@ -35,6 +35,7 @@ die "Don't know about language: $LANG\n" unless -d "./$LANG"; my $CONFIG="cdec-$LANG.ini"; die "Can't find $CONFIG" unless -f $CONFIG; die "--output must be '1best' or 'plf'\n" unless ($OUTPUT =~ /^(plf|1best)$/); +check_dependencies($CONFIG, $LANG); print STDERR "(Run with --help for options)\n"; print STDERR "LANGUAGE: $LANG\n"; print STDERR " OUTPUT: $OUTPUT\n"; @@ -146,3 +147,31 @@ Usage: $0 [OPTIONS] < file.txt EOT exit(1); } + +sub check_dependencies { + my ($conf, $lang) = @_; + my @files = (); + open F, "<$conf" or die "Can't read $conf: $!"; + while(){ + chomp; + my @x = split /\s+/; + for my $f (@x) { + push @files, $f if ($f =~ /\.gz$/); + } + } + close F; + my $c = 0; + for my $file (@files) { + $c++ if -f $file; + } + if ($c != scalar @files) { + print STDERR <