summaryrefslogtreecommitdiff
path: root/gi/pipeline
diff options
context:
space:
mode:
authorredpony <redpony@ec762483-ff6d-05da-a07a-a48fb63a330f>2010-07-02 19:23:08 +0000
committerredpony <redpony@ec762483-ff6d-05da-a07a-a48fb63a330f>2010-07-02 19:23:08 +0000
commit36b0eac74f5d8f8674659826a72276b47d687bd6 (patch)
treeaf55f23790cb50810637af56e3de8fcdb6db7868 /gi/pipeline
parent0f0ffedff50f371128a1305e475dafdc19aaa26c (diff)
filter and score in a single file
git-svn-id: https://ws10smt.googlecode.com/svn/trunk@118 ec762483-ff6d-05da-a07a-a48fb63a330f
Diffstat (limited to 'gi/pipeline')
-rwxr-xr-xgi/pipeline/filter-for-test-set.pl68
-rwxr-xr-xgi/pipeline/local-gi-pipeline.pl6
2 files changed, 2 insertions, 72 deletions
diff --git a/gi/pipeline/filter-for-test-set.pl b/gi/pipeline/filter-for-test-set.pl
deleted file mode 100755
index 1747c603..00000000
--- a/gi/pipeline/filter-for-test-set.pl
+++ /dev/null
@@ -1,68 +0,0 @@
-#!/usr/bin/perl -w
-use strict;
-my $SCRIPT_DIR; BEGIN { use Cwd qw/ abs_path cwd /; use File::Basename; $SCRIPT_DIR = dirname(abs_path($0)); push @INC, $SCRIPT_DIR; }
-
-my $GZIP = 'gzip';
-my $ZCAT = 'gunzip -c';
-
-my $EXTOOLS = "$SCRIPT_DIR/../../extools";
-die "Can't find extools: $EXTOOLS" unless -e $EXTOOLS && -d $EXTOOLS;
-
-my $FILTER = "$EXTOOLS/filter_grammar";
-my $SCORE = "$EXTOOLS/score_grammar";
-
-assert_exec($FILTER, $SCORE);
-
-usage() unless scalar @ARGV == 3;
-my $corpus = $ARGV[0];
-my $grammar = $ARGV[1];
-my $testset = $ARGV[2];
-die "Can't find corpus: $corpus" unless -f $corpus;
-die "Can't find corpus: $grammar" unless -f $grammar;
-die "Can't find corpus: $testset" unless -f $testset;
-print STDERR " CORPUS: $corpus\n";
-print STDERR " GRAMMAR: $corpus\n";
-print STDERR "TEST SET: $corpus\n";
-print STDERR "Extracting...\n";
-
-safesystem("$ZCAT $grammar | $FILTER $testset | $SCORE -c $corpus") or die "Failed";
-
-sub usage {
- print <<EOT;
-
-Usage: $0 corpus.src_trg_al grammar.gz test-set.txt > filtered-grammar.scfg.txt
-
-Filter and score a grammar for a test set.
-
-EOT
- exit 1;
-};
-
-sub assert_exec {
- my @files = @_;
- for my $file (@files) {
- die "Can't find $file - did you run make?\n" unless -e $file;
- die "Can't execute $file" unless -e $file;
- }
-};
-
-sub safesystem {
- print STDERR "Executing: @_\n";
- system(@_);
- if ($? == -1) {
- print STDERR "ERROR: Failed to execute: @_\n $!\n";
- exit(1);
- }
- elsif ($? & 127) {
- printf STDERR "ERROR: Execution of: @_\n died with signal %d, %s coredump\n",
- ($? & 127), ($? & 128) ? 'with' : 'without';
- exit(1);
- }
- else {
- my $exitcode = $? >> 8;
- print STDERR "Exit code: $exitcode\n" if $exitcode;
- return ! $exitcode;
- }
-}
-
-
diff --git a/gi/pipeline/local-gi-pipeline.pl b/gi/pipeline/local-gi-pipeline.pl
index e66ca602..acd6b94c 100755
--- a/gi/pipeline/local-gi-pipeline.pl
+++ b/gi/pipeline/local-gi-pipeline.pl
@@ -28,11 +28,9 @@ my $PYP_TOPICS_TRAIN="$PYPTOOLS/pyp-contexts-train";
my $SORT_KEYS = "$SCRIPT_DIR/scripts/sort-by-key.sh";
my $EXTRACTOR = "$EXTOOLS/extractor";
-my $FILTER = "$EXTOOLS/filter_grammar";
-my $SCORER = "$EXTOOLS/score_grammar";
my $TOPIC_TRAIN = "$PYPTOOLS/pyp-contexts-train";
-assert_exec($SORT_KEYS, $REDUCER, $EXTRACTOR, $FILTER, $SCORER, $PYP_TOPICS_TRAIN, $S2L, $C2D, $TOPIC_TRAIN);
+assert_exec($SORT_KEYS, $REDUCER, $EXTRACTOR, $PYP_TOPICS_TRAIN, $S2L, $C2D, $TOPIC_TRAIN);
my $OUTPUT = './giwork';
@@ -67,7 +65,7 @@ if ($BIDIR) {
$res = grammar_extract();
}
print STDERR "\n!!!COMPLETE!!!\n";
-print STDERR "GRAMMAR: $res\n\nYou should probably run:\n\n $SCRIPT_DIR/filter-for-test-set.pl $CORPUS $res TESTSET.TXT > filtered-grammar.scfg\n\n";
+print STDERR "GRAMMAR: $res\n\nYou should probably run:\n\n zcat $res | $SCRIPT_DIR/../../extools/filter_score_grammar -c $CORPUS -t TESTSET.TXT > filtered-grammar.scfg\n\n";
exit 0;
sub context_dir {