diff options
author | Chris Dyer <cdyer@allegro.clab.cs.cmu.edu> | 2014-02-10 21:41:57 -0500 |
---|---|---|
committer | Chris Dyer <cdyer@allegro.clab.cs.cmu.edu> | 2014-02-10 21:41:57 -0500 |
commit | 2b772ed8c1dcfecbb473f63cb0ef65b1dfb574dd (patch) | |
tree | 2fcf27048b54211f050bab4a4b5aa2ab0922d0c7 /compound-split | |
parent | 31b5d03c75b5d07afb54251e39fcf3e610d16141 (diff) |
transition away from checking in big data files
Diffstat (limited to 'compound-split')
-rwxr-xr-x | compound-split/compound-split.pl | 29 | ||||
-rwxr-xr-x | compound-split/install-data-deps.sh | 9 |
2 files changed, 38 insertions, 0 deletions
diff --git a/compound-split/compound-split.pl b/compound-split/compound-split.pl index 62259146..93ac3b20 100755 --- a/compound-split/compound-split.pl +++ b/compound-split/compound-split.pl @@ -35,6 +35,7 @@ die "Don't know about language: $LANG\n" unless -d "./$LANG"; my $CONFIG="cdec-$LANG.ini"; die "Can't find $CONFIG" unless -f $CONFIG; die "--output must be '1best' or 'plf'\n" unless ($OUTPUT =~ /^(plf|1best)$/); +check_dependencies($CONFIG, $LANG); print STDERR "(Run with --help for options)\n"; print STDERR "LANGUAGE: $LANG\n"; print STDERR " OUTPUT: $OUTPUT\n"; @@ -146,3 +147,31 @@ Usage: $0 [OPTIONS] < file.txt EOT exit(1); } + +sub check_dependencies { + my ($conf, $lang) = @_; + my @files = (); + open F, "<$conf" or die "Can't read $conf: $!"; + while(<F>){ + chomp; + my @x = split /\s+/; + for my $f (@x) { + push @files, $f if ($f =~ /\.gz$/); + } + } + close F; + my $c = 0; + for my $file (@files) { + $c++ if -f $file; + } + if ($c != scalar @files) { + print STDERR <<EOT; +Missing data dependencies; to install, please run: + + $script_dir/install-data-deps.sh + +EOT + exit(1); + } +} + diff --git a/compound-split/install-data-deps.sh b/compound-split/install-data-deps.sh new file mode 100755 index 00000000..942bfdcd --- /dev/null +++ b/compound-split/install-data-deps.sh @@ -0,0 +1,9 @@ +#!/bin/sh +set -e + +data_version=csplit-data-01.tar.gz + +curl -f http://demo.clab.cs.cmu.edu/cdec/$data_version -o $data_version + +tar xzf $data_version + |