diff options
author | redpony <redpony@ec762483-ff6d-05da-a07a-a48fb63a330f> | 2010-07-02 19:23:08 +0000 |
---|---|---|
committer | redpony <redpony@ec762483-ff6d-05da-a07a-a48fb63a330f> | 2010-07-02 19:23:08 +0000 |
commit | 7f772c1ade536ae1e51a1da3e8e78301646aaa0d (patch) | |
tree | 242d4720a701c45755d14b02117dc5ada81f700a /gi | |
parent | 11c4cc3836e9e0e38bc4250500381ce7723799ee (diff) |
filter and score in a single file
git-svn-id: https://ws10smt.googlecode.com/svn/trunk@118 ec762483-ff6d-05da-a07a-a48fb63a330f
Diffstat (limited to 'gi')
-rwxr-xr-x | gi/pipeline/filter-for-test-set.pl | 68 | ||||
-rwxr-xr-x | gi/pipeline/local-gi-pipeline.pl | 6 |
2 files changed, 2 insertions, 72 deletions
diff --git a/gi/pipeline/filter-for-test-set.pl b/gi/pipeline/filter-for-test-set.pl deleted file mode 100755 index 1747c603..00000000 --- a/gi/pipeline/filter-for-test-set.pl +++ /dev/null @@ -1,68 +0,0 @@ -#!/usr/bin/perl -w -use strict; -my $SCRIPT_DIR; BEGIN { use Cwd qw/ abs_path cwd /; use File::Basename; $SCRIPT_DIR = dirname(abs_path($0)); push @INC, $SCRIPT_DIR; } - -my $GZIP = 'gzip'; -my $ZCAT = 'gunzip -c'; - -my $EXTOOLS = "$SCRIPT_DIR/../../extools"; -die "Can't find extools: $EXTOOLS" unless -e $EXTOOLS && -d $EXTOOLS; - -my $FILTER = "$EXTOOLS/filter_grammar"; -my $SCORE = "$EXTOOLS/score_grammar"; - -assert_exec($FILTER, $SCORE); - -usage() unless scalar @ARGV == 3; -my $corpus = $ARGV[0]; -my $grammar = $ARGV[1]; -my $testset = $ARGV[2]; -die "Can't find corpus: $corpus" unless -f $corpus; -die "Can't find corpus: $grammar" unless -f $grammar; -die "Can't find corpus: $testset" unless -f $testset; -print STDERR " CORPUS: $corpus\n"; -print STDERR " GRAMMAR: $corpus\n"; -print STDERR "TEST SET: $corpus\n"; -print STDERR "Extracting...\n"; - -safesystem("$ZCAT $grammar | $FILTER $testset | $SCORE -c $corpus") or die "Failed"; - -sub usage { - print <<EOT; - -Usage: $0 corpus.src_trg_al grammar.gz test-set.txt > filtered-grammar.scfg.txt - -Filter and score a grammar for a test set. - -EOT - exit 1; -}; - -sub assert_exec { - my @files = @_; - for my $file (@files) { - die "Can't find $file - did you run make?\n" unless -e $file; - die "Can't execute $file" unless -e $file; - } -}; - -sub safesystem { - print STDERR "Executing: @_\n"; - system(@_); - if ($? == -1) { - print STDERR "ERROR: Failed to execute: @_\n $!\n"; - exit(1); - } - elsif ($? & 127) { - printf STDERR "ERROR: Execution of: @_\n died with signal %d, %s coredump\n", - ($? & 127), ($? & 128) ? 'with' : 'without'; - exit(1); - } - else { - my $exitcode = $? >> 8; - print STDERR "Exit code: $exitcode\n" if $exitcode; - return ! $exitcode; - } -} - - diff --git a/gi/pipeline/local-gi-pipeline.pl b/gi/pipeline/local-gi-pipeline.pl index e66ca602..acd6b94c 100755 --- a/gi/pipeline/local-gi-pipeline.pl +++ b/gi/pipeline/local-gi-pipeline.pl @@ -28,11 +28,9 @@ my $PYP_TOPICS_TRAIN="$PYPTOOLS/pyp-contexts-train"; my $SORT_KEYS = "$SCRIPT_DIR/scripts/sort-by-key.sh"; my $EXTRACTOR = "$EXTOOLS/extractor"; -my $FILTER = "$EXTOOLS/filter_grammar"; -my $SCORER = "$EXTOOLS/score_grammar"; my $TOPIC_TRAIN = "$PYPTOOLS/pyp-contexts-train"; -assert_exec($SORT_KEYS, $REDUCER, $EXTRACTOR, $FILTER, $SCORER, $PYP_TOPICS_TRAIN, $S2L, $C2D, $TOPIC_TRAIN); +assert_exec($SORT_KEYS, $REDUCER, $EXTRACTOR, $PYP_TOPICS_TRAIN, $S2L, $C2D, $TOPIC_TRAIN); my $OUTPUT = './giwork'; @@ -67,7 +65,7 @@ if ($BIDIR) { $res = grammar_extract(); } print STDERR "\n!!!COMPLETE!!!\n"; -print STDERR "GRAMMAR: $res\n\nYou should probably run:\n\n $SCRIPT_DIR/filter-for-test-set.pl $CORPUS $res TESTSET.TXT > filtered-grammar.scfg\n\n"; +print STDERR "GRAMMAR: $res\n\nYou should probably run:\n\n zcat $res | $SCRIPT_DIR/../../extools/filter_score_grammar -c $CORPUS -t TESTSET.TXT > filtered-grammar.scfg\n\n"; exit 0; sub context_dir { |