diff options
author | redpony <redpony@ec762483-ff6d-05da-a07a-a48fb63a330f> | 2010-06-24 04:05:17 +0000 |
---|---|---|
committer | redpony <redpony@ec762483-ff6d-05da-a07a-a48fb63a330f> | 2010-06-24 04:05:17 +0000 |
commit | 840d1b84a977b46180350bdb965a118150f238d9 (patch) | |
tree | 333f8d69ecd4456491bf985a5bbc26716b2ab56f /gi/pipeline | |
parent | 5ed01d87524dc4471e4fe601e528b2753f0038b6 (diff) |
pipeline scripts
git-svn-id: https://ws10smt.googlecode.com/svn/trunk@17 ec762483-ff6d-05da-a07a-a48fb63a330f
Diffstat (limited to 'gi/pipeline')
-rwxr-xr-x | gi/pipeline/filter-for-test-set.pl | 73 | ||||
-rwxr-xr-x | gi/pipeline/local-gi-pipeline.pl | 2 | ||||
-rwxr-xr-x | gi/pipeline/scripts/sort-by-key.sh (renamed from gi/pipeline/sort-by-key.sh) | 0 |
3 files changed, 74 insertions, 1 deletions
diff --git a/gi/pipeline/filter-for-test-set.pl b/gi/pipeline/filter-for-test-set.pl new file mode 100755 index 00000000..61edaf67 --- /dev/null +++ b/gi/pipeline/filter-for-test-set.pl @@ -0,0 +1,73 @@ +#!/usr/bin/perl -w +use strict; +my $SCRIPT_DIR; BEGIN { use Cwd qw/ abs_path cwd /; use File::Basename; $SCRIPT_DIR = dirname(abs_path($0)); push @INC, $SCRIPT_DIR; } + +use Getopt::Long "GetOptions"; +use IPC::Run3; +use File::Temp qw ( tempdir ); +my $TEMP_DIR = tempdir( CLEANUP => 1 ); + +my $GZIP = 'gzip'; +my $ZCAT = 'gunzip -c'; + +my $EXTOOLS = "$SCRIPT_DIR/../../extools"; +die "Can't find extools: $EXTOOLS" unless -e $EXTOOLS && -d $EXTOOLS; + +my $FILTER = "$EXTOOLS/filter_grammar"; +my $SCORE = "$EXTOOLS/score_grammar"; + +assert_exec($FILTER, $SCORE); + +usage() unless scalar @ARGV == 3; +my $corpus = $ARGV[0]; +my $grammar = $ARGV[1]; +my $testset = $ARGV[2]; +die "Can't find corpus: $corpus" unless -f $corpus; +die "Can't find corpus: $grammar" unless -f $grammar; +die "Can't find corpus: $testset" unless -f $testset; +print STDERR " CORPUS: $corpus\n"; +print STDERR " GRAMMAR: $corpus\n"; +print STDERR "TEST SET: $corpus\n"; +print STDERR "Extracting...\n"; + +safesystem("$ZCAT $grammar | $FILTER $testset | $SCORE $corpus") or die "Failed"; + +sub usage { + print <<EOT; + +Usage: $0 corpus.src_trg_al grammar.gz test-set.txt > filtered-grammar.scfg.txt + +Filter and score a grammar for a test set. + +EOT + exit 1; +}; + +sub assert_exec { + my @files = @_; + for my $file (@files) { + die "Can't find $file - did you run make?\n" unless -e $file; + die "Can't execute $file" unless -e $file; + } +}; + +sub safesystem { + print STDERR "Executing: @_\n"; + system(@_); + if ($? == -1) { + print STDERR "ERROR: Failed to execute: @_\n $!\n"; + exit(1); + } + elsif ($? & 127) { + printf STDERR "ERROR: Execution of: @_\n died with signal %d, %s coredump\n", + ($? & 127), ($? & 128) ? 'with' : 'without'; + exit(1); + } + else { + my $exitcode = $? >> 8; + print STDERR "Exit code: $exitcode\n" if $exitcode; + return ! $exitcode; + } +} + + diff --git a/gi/pipeline/local-gi-pipeline.pl b/gi/pipeline/local-gi-pipeline.pl index e52ad4ec..4707d5a3 100755 --- a/gi/pipeline/local-gi-pipeline.pl +++ b/gi/pipeline/local-gi-pipeline.pl @@ -28,7 +28,7 @@ my $S2L = "$PYPSCRIPTS/spans2labels.py"; my $PYP_TOPICS_TRAIN="$PYPTOOLS/pyp-topics-train"; -my $SORT_KEYS = "$SCRIPT_DIR/sort-by-key.sh"; +my $SORT_KEYS = "$SCRIPT_DIR/scripts/sort-by-key.sh"; my $EXTRACTOR = "$EXTOOLS/extractor"; my $FILTER = "$EXTOOLS/filter_grammar"; my $SCORER = "$EXTOOLS/score_grammar"; diff --git a/gi/pipeline/sort-by-key.sh b/gi/pipeline/scripts/sort-by-key.sh index 948dd4df..948dd4df 100755 --- a/gi/pipeline/sort-by-key.sh +++ b/gi/pipeline/scripts/sort-by-key.sh |