summaryrefslogtreecommitdiff
path: root/word-aligner/support/merge_corpus.pl
diff options
context:
space:
mode:
authorredpony <redpony@ec762483-ff6d-05da-a07a-a48fb63a330f>2010-06-22 05:12:27 +0000
committerredpony <redpony@ec762483-ff6d-05da-a07a-a48fb63a330f>2010-06-22 05:12:27 +0000
commit0172721855098ca02b207231a654dffa5e4eb1c9 (patch)
tree8069c3a62e2d72bd64a2cdeee9724b2679c8a56b /word-aligner/support/merge_corpus.pl
parent37728b8be4d0b3df9da81fdda2198ff55b4b2d91 (diff)
initial checkin
git-svn-id: https://ws10smt.googlecode.com/svn/trunk@2 ec762483-ff6d-05da-a07a-a48fb63a330f
Diffstat (limited to 'word-aligner/support/merge_corpus.pl')
-rwxr-xr-xword-aligner/support/merge_corpus.pl18
1 files changed, 18 insertions, 0 deletions
diff --git a/word-aligner/support/merge_corpus.pl b/word-aligner/support/merge_corpus.pl
new file mode 100755
index 00000000..02827903
--- /dev/null
+++ b/word-aligner/support/merge_corpus.pl
@@ -0,0 +1,18 @@
+#!/usr/bin/perl -w
+use strict;
+die "Usage: $0 corpus.e|f corpus.f|e" unless scalar @ARGV == 2;
+
+my ($a, $b) = @ARGV;
+open A, "<$a" or die "Can't read $a: $!";
+open B, "<$b" or die "Can't read $a: $!";
+
+while(<A>) {
+ chomp;
+ my $e = <B>;
+ die "Mismatched lines in $a and $b!" unless defined $e;
+ print "$_ ||| $e";
+}
+
+my $e = <B>;
+die "Mismatched lines in $a and $b!" unless !defined $e;
+