summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authortrevor.cohn <trevor.cohn@ec762483-ff6d-05da-a07a-a48fb63a330f>2010-07-22 19:05:54 +0000
committertrevor.cohn <trevor.cohn@ec762483-ff6d-05da-a07a-a48fb63a330f>2010-07-22 19:05:54 +0000
commit0732fff632e792ae2268e9ef1c9c230624098eb7 (patch)
tree344784ef25043f34bf6d9fe74b2579ade072a890
parentd6cdcf776b2f4541a7ee80c1a489d4a0fee41be3 (diff)
Added option to apply tags to source-side
git-svn-id: https://ws10smt.googlecode.com/svn/trunk@367 ec762483-ff6d-05da-a07a-a48fb63a330f
-rwxr-xr-xgi/pipeline/evaluation-pipeline.pl2
-rwxr-xr-xgi/pipeline/local-gi-pipeline.pl4
-rwxr-xr-xgi/pipeline/scripts/patch-corpus.pl20
-rwxr-xr-xgi/posterior-regularisation/prjava/train-PR-cluster.sh2
4 files changed, 21 insertions, 7 deletions
diff --git a/gi/pipeline/evaluation-pipeline.pl b/gi/pipeline/evaluation-pipeline.pl
index 16b9a571..b2656985 100755
--- a/gi/pipeline/evaluation-pipeline.pl
+++ b/gi/pipeline/evaluation-pipeline.pl
@@ -143,7 +143,7 @@ for my $feat (@features) {
if (!defined $rs) { die "DON'T KNOW ABOUT FEATURE $feat\n"; }
my @xfs = @$rs;
@xfeats = (@xfeats, @xfs);
- $FEATURIZER_OPTS .= " -f $feat" unless $feat=="BackoffRule";
+ $FEATURIZER_OPTS .= " -f $feat" unless $feat eq "BackoffRule";
}
print STDERR "X-FEATS: @xfeats\n";
diff --git a/gi/pipeline/local-gi-pipeline.pl b/gi/pipeline/local-gi-pipeline.pl
index 96df34ea..f72637af 100755
--- a/gi/pipeline/local-gi-pipeline.pl
+++ b/gi/pipeline/local-gi-pipeline.pl
@@ -183,7 +183,9 @@ sub setup_data {
copy($CORPUS, $CORPUS_LEX);
if ($TAGGED_CORPUS) {
die "Can't find $TAGGED_CORPUS" unless -f $TAGGED_CORPUS;
- my $cmd="$PATCH_CORPUS $TAGGED_CORPUS $CORPUS_LEX > $CORPUS_CLUSTER";
+ my $opt="";
+ $opt = "-s" if ($LANGUAGE eq "source");
+ my $cmd="$PATCH_CORPUS $opt $TAGGED_CORPUS $CORPUS_LEX > $CORPUS_CLUSTER";
safesystem($cmd) or die "Failed to extract contexts.";
} else {
symlink($LEX_NAME, $CORPUS_CLUSTER);
diff --git a/gi/pipeline/scripts/patch-corpus.pl b/gi/pipeline/scripts/patch-corpus.pl
index 2b181837..200022bc 100755
--- a/gi/pipeline/scripts/patch-corpus.pl
+++ b/gi/pipeline/scripts/patch-corpus.pl
@@ -2,7 +2,13 @@
use strict;
my $PATCH = shift @ARGV;
-die "Usage: $0 tagged.en[_fr] < lexical.en_fr_al[_...]\n" unless $PATCH;
+my $TGT = 1;
+if ($PATCH eq "-s") {
+ undef $TGT;
+ $PATCH = shift @ARGV;
+}
+
+die "Usage: $0 [-s] tagged.en[_fr] < lexical.en_fr_al[_...]\n" unless $PATCH;
open P, "<$PATCH" or die "Can't read tagged corpus $PATCH: $!";
my $first=<P>; close P;
@@ -24,9 +30,15 @@ while(my $pline = <P>) {
chomp $line;
@fields = split / \|\|\| /, $line;
my @pwords = split /\s+/, $pline;
- my @lwords = split /\s+/, $fields[1];
- die "Length mismatch in line $line!\n" unless (scalar @pwords == scalar @lwords);
- $fields[1] = $pline;
+ if ($TGT) {
+ my @lwords = split /\s+/, $fields[1];
+ die "Length mismatch in line $line!\n" unless (scalar @pwords == scalar @lwords);
+ $fields[1] = $pline;
+ } else {
+ my @lwords = split /\s+/, $fields[0];
+ die "Length mismatch in line $line!\n" unless (scalar @pwords == scalar @lwords);
+ $fields[0] = $pline;
+ }
print join ' ||| ', @fields;
print "\n";
}
diff --git a/gi/posterior-regularisation/prjava/train-PR-cluster.sh b/gi/posterior-regularisation/prjava/train-PR-cluster.sh
index 6c2f62cd..aa434fa8 100755
--- a/gi/posterior-regularisation/prjava/train-PR-cluster.sh
+++ b/gi/posterior-regularisation/prjava/train-PR-cluster.sh
@@ -1,4 +1,4 @@
#!/bin/sh
d=`dirname $0`
-java -ea -Xmx3g -cp $d/prjava.jar:$d/lib/trove-2.0.2.jar:$d/lib/optimization.jar:$d/lib/jopt-simple-3.2.jar:$d/lib/lib/commons-math-2.1.jar phrase.Trainer $*
+java -ea -Xmx30g -cp $d/prjava.jar:$d/lib/trove-2.0.2.jar:$d/lib/optimization.jar:$d/lib/jopt-simple-3.2.jar:$d/lib/lib/commons-math-2.1.jar phrase.Trainer $*