summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChris Dyer <cdyer@allegro.clab.cs.cmu.edu>2014-02-10 21:41:57 -0500
committerChris Dyer <cdyer@allegro.clab.cs.cmu.edu>2014-02-10 21:41:57 -0500
commit2b772ed8c1dcfecbb473f63cb0ef65b1dfb574dd (patch)
tree2fcf27048b54211f050bab4a4b5aa2ab0922d0c7
parent31b5d03c75b5d07afb54251e39fcf3e610d16141 (diff)
transition away from checking in big data files
-rwxr-xr-xcompound-split/compound-split.pl29
-rwxr-xr-xcompound-split/install-data-deps.sh9
2 files changed, 38 insertions, 0 deletions
diff --git a/compound-split/compound-split.pl b/compound-split/compound-split.pl
index 62259146..93ac3b20 100755
--- a/compound-split/compound-split.pl
+++ b/compound-split/compound-split.pl
@@ -35,6 +35,7 @@ die "Don't know about language: $LANG\n" unless -d "./$LANG";
my $CONFIG="cdec-$LANG.ini";
die "Can't find $CONFIG" unless -f $CONFIG;
die "--output must be '1best' or 'plf'\n" unless ($OUTPUT =~ /^(plf|1best)$/);
+check_dependencies($CONFIG, $LANG);
print STDERR "(Run with --help for options)\n";
print STDERR "LANGUAGE: $LANG\n";
print STDERR " OUTPUT: $OUTPUT\n";
@@ -146,3 +147,31 @@ Usage: $0 [OPTIONS] < file.txt
EOT
exit(1);
}
+
+sub check_dependencies {
+ my ($conf, $lang) = @_;
+ my @files = ();
+ open F, "<$conf" or die "Can't read $conf: $!";
+ while(<F>){
+ chomp;
+ my @x = split /\s+/;
+ for my $f (@x) {
+ push @files, $f if ($f =~ /\.gz$/);
+ }
+ }
+ close F;
+ my $c = 0;
+ for my $file (@files) {
+ $c++ if -f $file;
+ }
+ if ($c != scalar @files) {
+ print STDERR <<EOT;
+Missing data dependencies; to install, please run:
+
+ $script_dir/install-data-deps.sh
+
+EOT
+ exit(1);
+ }
+}
+
diff --git a/compound-split/install-data-deps.sh b/compound-split/install-data-deps.sh
new file mode 100755
index 00000000..942bfdcd
--- /dev/null
+++ b/compound-split/install-data-deps.sh
@@ -0,0 +1,9 @@
+#!/bin/sh
+set -e
+
+data_version=csplit-data-01.tar.gz
+
+curl -f http://demo.clab.cs.cmu.edu/cdec/$data_version -o $data_version
+
+tar xzf $data_version
+