summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChris Dyer <redpony@gmail.com>2009-12-04 20:32:11 -0500
committerChris Dyer <redpony@gmail.com>2009-12-04 20:32:11 -0500
commit8780a924a060ced3468b71a088409d5ef83fc985 (patch)
tree0ee244184aa61b39f3bf0cea8922f0d4f97ed364
parent5361a6412834485dfd268b6a0fa4144d636f425f (diff)
oops, forgot to add tests
-rw-r--r--tests/IPC/Run3.pm850
-rw-r--r--tests/README11
-rwxr-xr-xtests/run-system-tests.pl121
-rw-r--r--tests/system_tests/australia-align/australia.scfg.gzbin0 -> 564058 bytes
-rw-r--r--tests/system_tests/australia-align/cdec.ini3
-rw-r--r--tests/system_tests/australia-align/gold.statistics8
-rw-r--r--tests/system_tests/australia-align/gold.stdout1
-rw-r--r--tests/system_tests/australia-align/input.txt1
-rw-r--r--tests/system_tests/australia-align/stderr48
-rw-r--r--tests/system_tests/australia-align/weights7
-rw-r--r--tests/system_tests/australia/australia.scfg.gzbin0 -> 564058 bytes
-rw-r--r--tests/system_tests/australia/cdec.ini2
-rw-r--r--tests/system_tests/australia/gold.statistics8
-rw-r--r--tests/system_tests/australia/gold.stdout0
-rw-r--r--tests/system_tests/australia/input.txt1
-rw-r--r--tests/system_tests/australia/stderr22
-rw-r--r--tests/system_tests/australia/weights7
-rw-r--r--tests/system_tests/controlled_synparse/cdec.ini3
-rw-r--r--tests/system_tests/controlled_synparse/gold.statistics16
-rw-r--r--tests/system_tests/controlled_synparse/gold.stdout36
-rw-r--r--tests/system_tests/controlled_synparse/input.txt2
-rw-r--r--tests/system_tests/controlled_synparse/scfg.biparse.gzbin0 -> 141 bytes
-rw-r--r--tests/system_tests/ftrans/cdec.ini3
-rw-r--r--tests/system_tests/ftrans/ftrans.pt4
-rw-r--r--tests/system_tests/ftrans/gold.statistics5
-rw-r--r--tests/system_tests/ftrans/gold.stdout4
-rw-r--r--tests/system_tests/ftrans/input.cfg5
-rw-r--r--tests/system_tests/ftrans/input.txt1
-rw-r--r--tests/system_tests/ftrans/weights5
-rw-r--r--tests/system_tests/unsup-align/cdec.ini6
-rw-r--r--tests/system_tests/unsup-align/gold.statistics96
-rw-r--r--tests/system_tests/unsup-align/gold.stdout8
-rw-r--r--tests/system_tests/unsup-align/input.txt8
-rw-r--r--tests/system_tests/unsup-align/unsup-align.lex-grammar17
-rw-r--r--tests/system_tests/unsup-align/weights19
-rwxr-xr-xtests/tools/compare-statistics.pl49
-rwxr-xr-xtests/tools/filter-stderr.pl30
37 files changed, 1407 insertions, 0 deletions
diff --git a/tests/IPC/Run3.pm b/tests/IPC/Run3.pm
new file mode 100644
index 00000000..00875511
--- /dev/null
+++ b/tests/IPC/Run3.pm
@@ -0,0 +1,850 @@
+package IPC::Run3;
+BEGIN { require 5.006_000; } # i.e. 5.6.0
+use strict;
+
+=head1 NAME
+
+IPC::Run3 - run a subprocess with input/ouput redirection
+
+=head1 VERSION
+
+version 0.043
+
+=cut
+
+our $VERSION = '0.043';
+
+=head1 SYNOPSIS
+
+ use IPC::Run3; # Exports run3() by default
+
+ run3 \@cmd, \$in, \$out, \$err;
+
+=head1 DESCRIPTION
+
+This module allows you to run a subprocess and redirect stdin, stdout,
+and/or stderr to files and perl data structures. It aims to satisfy 99% of the
+need for using C<system>, C<qx>, and C<open3>
+with a simple, extremely Perlish API.
+
+Speed, simplicity, and portability are paramount. (That's speed of Perl code;
+which is often much slower than the kind of buffered I/O that this module uses
+to spool input to and output from the child command.)
+
+=cut
+
+use Exporter;
+our @ISA = qw(Exporter);
+our @EXPORT = qw( run3 );
+our %EXPORT_TAGS = ( all => \@EXPORT );
+
+use constant debugging => $ENV{IPCRUN3DEBUG} || $ENV{IPCRUNDEBUG} || 0;
+use constant profiling => $ENV{IPCRUN3PROFILE} || $ENV{IPCRUNPROFILE} || 0;
+use constant is_win32 => 0 <= index $^O, "Win32";
+
+BEGIN {
+ if ( is_win32 ) {
+ eval "use Win32 qw( GetOSName ); 1" or die $@;
+ }
+}
+
+#use constant is_win2k => is_win32 && GetOSName() =~ /Win2000/i;
+#use constant is_winXP => is_win32 && GetOSName() =~ /WinXP/i;
+
+use Carp qw( croak );
+use File::Temp qw( tempfile );
+use POSIX qw( dup dup2 );
+
+# We cache the handles of our temp files in order to
+# keep from having to incur the (largish) overhead of File::Temp
+my %fh_cache;
+my $fh_cache_pid = $$;
+
+my $profiler;
+
+sub _profiler { $profiler } # test suite access
+
+BEGIN {
+ if ( profiling ) {
+ eval "use Time::HiRes qw( gettimeofday ); 1" or die $@;
+ if ( $ENV{IPCRUN3PROFILE} =~ /\A\d+\z/ ) {
+ require IPC::Run3::ProfPP;
+ IPC::Run3::ProfPP->import;
+ $profiler = IPC::Run3::ProfPP->new(Level => $ENV{IPCRUN3PROFILE});
+ } else {
+ my ( $dest, undef, $class ) =
+ reverse split /(=)/, $ENV{IPCRUN3PROFILE}, 2;
+ $class = "IPC::Run3::ProfLogger"
+ unless defined $class && length $class;
+ if ( not eval "require $class" ) {
+ my $e = $@;
+ $class = "IPC::Run3::$class";
+ eval "require IPC::Run3::$class" or die $e;
+ }
+ $profiler = $class->new( Destination => $dest );
+ }
+ $profiler->app_call( [ $0, @ARGV ], scalar gettimeofday() );
+ }
+}
+
+
+END {
+ $profiler->app_exit( scalar gettimeofday() ) if profiling;
+}
+
+sub _binmode {
+ my ( $fh, $mode, $what ) = @_;
+ # if $mode is not given, then default to ":raw", except on Windows,
+ # where we default to ":crlf";
+ # otherwise if a proper layer string was given, use that,
+ # else use ":raw"
+ my $layer = !$mode
+ ? (is_win32 ? ":crlf" : ":raw")
+ : ($mode =~ /^:/ ? $mode : ":raw");
+ warn "binmode $what, $layer\n" if debugging >= 2;
+
+ binmode $fh, ":raw" unless $layer eq ":raw"; # remove all layers first
+ binmode $fh, $layer or croak "binmode $layer failed: $!";
+}
+
+sub _spool_data_to_child {
+ my ( $type, $source, $binmode_it ) = @_;
+
+ # If undef (not \undef) passed, they want the child to inherit
+ # the parent's STDIN.
+ return undef unless defined $source;
+
+ my $fh;
+ if ( ! $type ) {
+ open $fh, "<", $source or croak "$!: $source";
+ _binmode($fh, $binmode_it, "STDIN");
+ warn "run3(): feeding file '$source' to child STDIN\n"
+ if debugging >= 2;
+ } elsif ( $type eq "FH" ) {
+ $fh = $source;
+ warn "run3(): feeding filehandle '$source' to child STDIN\n"
+ if debugging >= 2;
+ } else {
+ $fh = $fh_cache{in} ||= tempfile;
+ truncate $fh, 0;
+ seek $fh, 0, 0;
+ _binmode($fh, $binmode_it, "STDIN");
+ my $seekit;
+ if ( $type eq "SCALAR" ) {
+
+ # When the run3()'s caller asks to feed an empty file
+ # to the child's stdin, we want to pass a live file
+ # descriptor to an empty file (like /dev/null) so that
+ # they don't get surprised by invalid fd errors and get
+ # normal EOF behaviors.
+ return $fh unless defined $$source; # \undef passed
+
+ warn "run3(): feeding SCALAR to child STDIN",
+ debugging >= 3
+ ? ( ": '", $$source, "' (", length $$source, " chars)" )
+ : (),
+ "\n"
+ if debugging >= 2;
+
+ $seekit = length $$source;
+ print $fh $$source or die "$! writing to temp file";
+
+ } elsif ( $type eq "ARRAY" ) {
+ warn "run3(): feeding ARRAY to child STDIN",
+ debugging >= 3 ? ( ": '", @$source, "'" ) : (),
+ "\n"
+ if debugging >= 2;
+
+ print $fh @$source or die "$! writing to temp file";
+ $seekit = grep length, @$source;
+ } elsif ( $type eq "CODE" ) {
+ warn "run3(): feeding output of CODE ref '$source' to child STDIN\n"
+ if debugging >= 2;
+ my $parms = []; # TODO: get these from $options
+ while (1) {
+ my $data = $source->( @$parms );
+ last unless defined $data;
+ print $fh $data or die "$! writing to temp file";
+ $seekit = length $data;
+ }
+ }
+
+ seek $fh, 0, 0 or croak "$! seeking on temp file for child's stdin"
+ if $seekit;
+ }
+
+ croak "run3() can't redirect $type to child stdin"
+ unless defined $fh;
+
+ return $fh;
+}
+
+sub _fh_for_child_output {
+ my ( $what, $type, $dest, $options ) = @_;
+
+ my $fh;
+ if ( $type eq "SCALAR" && $dest == \undef ) {
+ warn "run3(): redirecting child $what to oblivion\n"
+ if debugging >= 2;
+
+ $fh = $fh_cache{nul} ||= do {
+ open $fh, ">", File::Spec->devnull;
+ $fh;
+ };
+ } elsif ( $type eq "FH" ) {
+ $fh = $dest;
+ warn "run3(): redirecting $what to filehandle '$dest'\n"
+ if debugging >= 3;
+ } elsif ( !$type ) {
+ warn "run3(): feeding child $what to file '$dest'\n"
+ if debugging >= 2;
+
+ open $fh, $options->{"append_$what"} ? ">>" : ">", $dest
+ or croak "$!: $dest";
+ } else {
+ warn "run3(): capturing child $what\n"
+ if debugging >= 2;
+
+ $fh = $fh_cache{$what} ||= tempfile;
+ seek $fh, 0, 0;
+ truncate $fh, 0;
+ }
+
+ my $binmode_it = $options->{"binmode_$what"};
+ _binmode($fh, $binmode_it, uc $what);
+
+ return $fh;
+}
+
+sub _read_child_output_fh {
+ my ( $what, $type, $dest, $fh, $options ) = @_;
+
+ return if $type eq "SCALAR" && $dest == \undef;
+
+ seek $fh, 0, 0 or croak "$! seeking on temp file for child $what";
+
+ if ( $type eq "SCALAR" ) {
+ warn "run3(): reading child $what to SCALAR\n"
+ if debugging >= 3;
+
+ # two read()s are used instead of 1 so that the first will be
+ # logged even it reads 0 bytes; the second won't.
+ my $count = read $fh, $$dest, 10_000,
+ $options->{"append_$what"} ? length $$dest : 0;
+ while (1) {
+ croak "$! reading child $what from temp file"
+ unless defined $count;
+
+ last unless $count;
+
+ warn "run3(): read $count bytes from child $what",
+ debugging >= 3 ? ( ": '", substr( $$dest, -$count ), "'" ) : (),
+ "\n"
+ if debugging >= 2;
+
+ $count = read $fh, $$dest, 10_000, length $$dest;
+ }
+ } elsif ( $type eq "ARRAY" ) {
+ if ($options->{"append_$what"}) {
+ push @$dest, <$fh>;
+ } else {
+ @$dest = <$fh>;
+ }
+ if ( debugging >= 2 ) {
+ my $count = 0;
+ $count += length for @$dest;
+ warn
+ "run3(): read ",
+ scalar @$dest,
+ " records, $count bytes from child $what",
+ debugging >= 3 ? ( ": '", @$dest, "'" ) : (),
+ "\n";
+ }
+ } elsif ( $type eq "CODE" ) {
+ warn "run3(): capturing child $what to CODE ref\n"
+ if debugging >= 3;
+
+ local $_;
+ while ( <$fh> ) {
+ warn
+ "run3(): read ",
+ length,
+ " bytes from child $what",
+ debugging >= 3 ? ( ": '", $_, "'" ) : (),
+ "\n"
+ if debugging >= 2;
+
+ $dest->( $_ );
+ }
+ } else {
+ croak "run3() can't redirect child $what to a $type";
+ }
+
+}
+
+sub _type {
+ my ( $redir ) = @_;
+ return "FH" if eval { $redir->isa("IO::Handle") };
+ my $type = ref $redir;
+ return $type eq "GLOB" ? "FH" : $type;
+}
+
+sub _max_fd {
+ my $fd = dup(0);
+ POSIX::close $fd;
+ return $fd;
+}
+
+my $run_call_time;
+my $sys_call_time;
+my $sys_exit_time;
+
+sub run3 {
+ $run_call_time = gettimeofday() if profiling;
+
+ my $options = @_ && ref $_[-1] eq "HASH" ? pop : {};
+
+ my ( $cmd, $stdin, $stdout, $stderr ) = @_;
+
+ print STDERR "run3(): running ",
+ join( " ", map "'$_'", ref $cmd ? @$cmd : $cmd ),
+ "\n"
+ if debugging;
+
+ if ( ref $cmd ) {
+ croak "run3(): empty command" unless @$cmd;
+ croak "run3(): undefined command" unless defined $cmd->[0];
+ croak "run3(): command name ('')" unless length $cmd->[0];
+ } else {
+ croak "run3(): missing command" unless @_;
+ croak "run3(): undefined command" unless defined $cmd;
+ croak "run3(): command ('')" unless length $cmd;
+ }
+
+ foreach (qw/binmode_stdin binmode_stdout binmode_stderr/) {
+ if (my $mode = $options->{$_}) {
+ croak qq[option $_ must be a number or a proper layer string: "$mode"]
+ unless $mode =~ /^(:|\d+$)/;
+ }
+ }
+
+ my $in_type = _type $stdin;
+ my $out_type = _type $stdout;
+ my $err_type = _type $stderr;
+
+ if ($fh_cache_pid != $$) {
+ # fork detected, close all cached filehandles and clear the cache
+ close $_ foreach values %fh_cache;
+ %fh_cache = ();
+ $fh_cache_pid = $$;
+ }
+
+ # This routine procedes in stages so that a failure in an early
+ # stage prevents later stages from running, and thus from needing
+ # cleanup.
+
+ my $in_fh = _spool_data_to_child $in_type, $stdin,
+ $options->{binmode_stdin} if defined $stdin;
+
+ my $out_fh = _fh_for_child_output "stdout", $out_type, $stdout,
+ $options if defined $stdout;
+
+ my $tie_err_to_out =
+ defined $stderr && defined $stdout && $stderr eq $stdout;
+
+ my $err_fh = $tie_err_to_out
+ ? $out_fh
+ : _fh_for_child_output "stderr", $err_type, $stderr,
+ $options if defined $stderr;
+
+ # this should make perl close these on exceptions
+# local *STDIN_SAVE;
+ local *STDOUT_SAVE;
+ local *STDERR_SAVE;
+
+ my $saved_fd0 = dup( 0 ) if defined $in_fh;
+
+# open STDIN_SAVE, "<&STDIN"# or croak "run3(): $! saving STDIN"
+# if defined $in_fh;
+ open STDOUT_SAVE, ">&STDOUT" or croak "run3(): $! saving STDOUT"
+ if defined $out_fh;
+ open STDERR_SAVE, ">&STDERR" or croak "run3(): $! saving STDERR"
+ if defined $err_fh;
+
+ my $errno;
+ my $ok = eval {
+ # The open() call here seems to not force fd 0 in some cases;
+ # I ran in to trouble when using this in VCP, not sure why.
+ # the dup2() seems to work.
+ dup2( fileno $in_fh, 0 )
+# open STDIN, "<&=" . fileno $in_fh
+ or croak "run3(): $! redirecting STDIN"
+ if defined $in_fh;
+
+# close $in_fh or croak "$! closing STDIN temp file"
+# if ref $stdin;
+
+ open STDOUT, ">&" . fileno $out_fh
+ or croak "run3(): $! redirecting STDOUT"
+ if defined $out_fh;
+
+ open STDERR, ">&" . fileno $err_fh
+ or croak "run3(): $! redirecting STDERR"
+ if defined $err_fh;
+
+ $sys_call_time = gettimeofday() if profiling;
+
+ my $r = ref $cmd
+ ? system { $cmd->[0] }
+ is_win32
+ ? map {
+ # Probably need to offer a win32 escaping
+ # option, every command may be different.
+ ( my $s = $_ ) =~ s/"/"""/g;
+ $s = qq{"$s"};
+ $s;
+ } @$cmd
+ : @$cmd
+ : system $cmd;
+
+ $errno = $!; # save $!, because later failures will overwrite it
+ $sys_exit_time = gettimeofday() if profiling;
+ if ( debugging ) {
+ my $err_fh = defined $err_fh ? \*STDERR_SAVE : \*STDERR;
+ if ( defined $r && $r != -1 ) {
+ print $err_fh "run3(): \$? is $?\n";
+ } else {
+ print $err_fh "run3(): \$? is $?, \$! is $errno\n";
+ }
+ }
+
+ die $! if defined $r && $r == -1 && !$options->{return_if_system_error};
+
+ 1;
+ };
+ my $x = $@;
+
+ my @errs;
+
+ if ( defined $saved_fd0 ) {
+ dup2( $saved_fd0, 0 );
+ POSIX::close( $saved_fd0 );
+ }
+
+# open STDIN, "<&STDIN_SAVE"# or push @errs, "run3(): $! restoring STDIN"
+# if defined $in_fh;
+ open STDOUT, ">&STDOUT_SAVE" or push @errs, "run3(): $! restoring STDOUT"
+ if defined $out_fh;
+ open STDERR, ">&STDERR_SAVE" or push @errs, "run3(): $! restoring STDERR"
+ if defined $err_fh;
+
+ croak join ", ", @errs if @errs;
+
+ die $x unless $ok;
+
+ _read_child_output_fh "stdout", $out_type, $stdout, $out_fh, $options
+ if defined $out_fh && $out_type && $out_type ne "FH";
+ _read_child_output_fh "stderr", $err_type, $stderr, $err_fh, $options
+ if defined $err_fh && $err_type && $err_type ne "FH" && !$tie_err_to_out;
+ $profiler->run_exit(
+ $cmd,
+ $run_call_time,
+ $sys_call_time,
+ $sys_exit_time,
+ scalar gettimeofday()
+ ) if profiling;
+
+ $! = $errno; # restore $! from system()
+
+ return 1;
+}
+
+1;
+
+__END__
+
+=head2 C<< run3($cmd, $stdin, $stdout, $stderr, \%options) >>
+
+All parameters after C<$cmd> are optional.
+
+The parameters C<$stdin>, C<$stdout> and C<$stderr> indicate
+how the child's corresponding filehandle
+(C<STDIN>, C<STDOUT> and C<STDERR>, resp.) will be redirected.
+Because the redirects come last, this allows C<STDOUT> and C<STDERR> to default
+to the parent's by just not specifying them -- a common use case.
+
+C<run3> throws an exception if the wrapped C<system> call returned -1
+or anything went wrong with C<run3>'s processing of filehandles.
+Otherwise it returns true.
+It leaves C<$?> intact for inspection of exit and wait status.
+
+Note that a true return value from C<run3> doesn't mean that the command
+had a successful exit code. Hence you should always check C<$?>.
+
+See L</%options> for an option to handle the case of C<system>
+returning -1 yourself.
+
+=head3 C<$cmd>
+
+Usually C<$cmd> will be an ARRAY reference and the child is invoked via
+
+ system @$cmd;
+
+But C<$cmd> may also be a string in which case the child is invoked via
+
+ system $cmd;
+
+(cf. L<perlfunc/system> for the difference and the pitfalls of using
+the latter form).
+
+=head3 C<$stdin>, C<$stdout>, C<$stderr>
+
+The parameters C<$stdin>, C<$stdout> and C<$stderr>
+can take one of the following forms:
+
+=over 4
+
+=item C<undef> (or not specified at all)
+
+The child inherits the corresponding filehandle from the parent.
+
+ run3 \@cmd, $stdin; # child writes to same STDOUT and STDERR as parent
+ run3 \@cmd, undef, $stdout, $stderr; # child reads from same STDIN as parent
+
+=item C<\undef>
+
+The child's filehandle is redirected from or to the
+local equivalent of C</dev/null> (as returned by
+C<< File::Spec->devnull() >>).
+
+ run3 \@cmd, \undef, $stdout, $stderr; # child reads from /dev/null
+
+=item a simple scalar
+
+The parameter is taken to be the name of a file to read from
+or write to. In the latter case, the file will be opened via
+
+ open FH, ">", ...
+
+i.e. it is created if it doesn't exist and truncated otherwise.
+Note that the file is opened by the parent which will L<croak|Carp/croak>
+in case of failure.
+
+ run3 \@cmd, \undef, "out.txt"; # child writes to file "out.txt"
+
+=item a filehandle (either a reference to a GLOB or an C<IO::Handle>)
+
+The filehandle is inherited by the child.
+
+ open my $fh, ">", "out.txt";
+ print $fh "prologue\n";
+ ...
+ run3 \@cmd, \undef, $fh; # child writes to $fh
+ ...
+ print $fh "epilogue\n";
+ close $fh;
+
+=item a SCALAR reference
+
+The referenced scalar is treated as a string to be read from or
+written to. In the latter case, the previous content of the string
+is overwritten.
+
+ my $out;
+ run3 \@cmd, \undef, \$out; # child writes into string
+ run3 \@cmd, \<<EOF; # child reads from string (can use "here" notation)
+ Input
+ to
+ child
+ EOF
+
+=item an ARRAY reference
+
+For C<$stdin>, the elements of C<@$stdin> are simply spooled to the child.
+
+For C<$stdout> or C<$stderr>, the child's corresponding file descriptor
+is read line by line (as determined by the current setting of C<$/>)
+into C<@$stdout> or C<@$stderr>, resp. The previous content of the array
+is overwritten.
+
+ my @lines;
+ run3 \@cmd, \undef, \@lines; # child writes into array
+
+=item a CODE reference
+
+For C<$stdin>, C<&$stdin> will be called repeatedly (with no arguments) and
+the return values are spooled to the child. C<&$stdin> must signal the end of
+input by returning C<undef>.
+
+For C<$stdout> or C<$stderr>, the child's corresponding file descriptor
+is read line by line (as determined by the current setting of C<$/>)
+and C<&$stdout> or C<&$stderr>, resp., is called with the contents of the line.
+Note that there's no end-of-file indication.
+
+ my $i = 0;
+ sub producer {
+ return $i < 10 ? "line".$i++."\n" : undef;
+ }
+
+ run3 \@cmd, \&producer; # child reads 10 lines
+
+Note that this form of redirecting the child's I/O doesn't imply
+any form of concurrency between parent and child - run3()'s method of
+operation is the same no matter which form of redirection you specify.
+
+=back
+
+If the same value is passed for C<$stdout> and C<$stderr>, then the child
+will write both C<STDOUT> and C<STDERR> to the same filehandle.
+In general, this means that
+
+ run3 \@cmd, \undef, "foo.txt", "foo.txt";
+ run3 \@cmd, \undef, \$both, \$both;
+
+will DWIM and pass a single file handle to the child for both C<STDOUT> and
+C<STDERR>, collecting all into file "foo.txt" or C<$both>.
+
+=head3 C<\%options>
+
+The last parameter, C<\%options>, must be a hash reference if present.
+
+Currently the following
+keys are supported:
+
+=over 4
+
+=item C<binmode_stdin>, C<binmode_stdout>, C<binmode_stderr>
+
+The value must a "layer" as described in L<perlfunc/binmode>.
+If specified the corresponding
+parameter C<$stdin>, C<$stdout> or C<$stderr>, resp., operates
+with the given layer.
+
+For backward compatibility, a true value that doesn't start with ":"
+(e.g. a number) is interpreted as ":raw". If the value is false
+or not specified, the default is ":crlf" on Windows and ":raw" otherwise.
+
+Don't expect that values other than the built-in layers ":raw", ":crlf",
+and (on newer Perls) ":bytes", ":utf8", ":encoding(...)" will work.
+
+=item C<append_stdout>, C<append_stderr>
+
+If their value is true then the corresponding
+parameter C<$stdout> or C<$stderr>, resp., will append the child's output
+to the existing "contents" of the redirector. This only makes
+sense if the redirector is a simple scalar (the corresponding file
+is opened in append mode), a SCALAR reference (the output is
+appended to the previous contents of the string)
+or an ARRAY reference (the output is C<push>ed onto the
+previous contents of the array).
+
+=item C<return_if_system_error>
+
+If this is true C<run3> does B<not> throw an exception if C<system>
+returns -1 (cf. L<perlfunc/system> for possible
+failure scenarios.), but returns true instead.
+In this case C<$?> has the value -1 and C<$!>
+contains the errno of the failing C<system> call.
+
+=back
+
+=head1 HOW IT WORKS
+
+=over 4
+
+=item (1)
+
+For each redirector C<$stdin>, C<$stdout>, and C<$stderr>,
+C<run3()> furnishes a filehandle:
+
+=over 4
+
+=item *
+
+if the redirector already specifies a filehandle it just uses that
+
+=item *
+
+if the redirector specifies a filename, C<run3()> opens the file
+in the appropriate mode
+
+=item *
+
+in all other cases, C<run3()> opens a temporary file
+(using L<tempfile|Temp/tempfile>)
+
+=back
+
+=item (2)
+
+If C<run3()> opened a temporary file for C<$stdin> in step (1),
+it writes the data using the specified method (either
+from a string, an array or returnd by a function) to the temporary file and rewinds it.
+
+=item (3)
+
+C<run3()> saves the parent's C<STDIN>, C<STDOUT> and C<STDERR> by duplicating
+them to new filehandles. It duplicates the filehandles from step (1)
+to C<STDIN>, C<STDOUT> and C<STDERR>, resp.
+
+=item (4)
+
+C<run3()> runs the child by invoking L<system|perlfunc/system>
+with C<$cmd> as specified above.
+
+=item (5)
+
+C<run3()> restores the parent's C<STDIN>, C<STDOUT> and C<STDERR> saved in step (3).
+
+=item (6)
+
+If C<run3()> opened a temporary file for C<$stdout> or C<$stderr> in step (1),
+it rewinds it and reads back its contents using the specified method
+(either to a string, an array or by calling a function).
+
+=item (7)
+
+C<run3()> closes all filehandles that it opened explicitly in step (1).
+
+=back
+
+Note that when using temporary files, C<run3()> tries to amortize the overhead
+by reusing them (i.e. it keeps them open and rewinds and truncates them
+before the next operation).
+
+=head1 LIMITATIONS
+
+Often uses intermediate files (determined by File::Temp, and thus by the
+File::Spec defaults and the TMPDIR env. variable) for speed, portability and
+simplicity.
+
+Use extrem caution when using C<run3> in a threaded environment if
+concurrent calls of C<run3> are possible. Most likely, I/O from different
+invocations will get mixed up. The reason is that in most thread
+implementations all threads in a process share the same STDIN/STDOUT/STDERR.
+Known failures are Perl ithreads on Linux and Win32. Note that C<fork>
+on Win32 is emulated via Win32 threads and hence I/O mix up is possible
+between forked children here (C<run3> is "fork safe" on Unix, though).
+
+=head1 DEBUGGING
+
+To enable debugging use the IPCRUN3DEBUG environment variable to
+a non-zero integer value:
+
+ $ IPCRUN3DEBUG=1 myapp
+
+=head1 PROFILING
+
+To enable profiling, set IPCRUN3PROFILE to a number to enable emitting profile
+information to STDERR (1 to get timestamps, 2 to get a summary report at the
+END of the program, 3 to get mini reports after each run) or to a filename to
+emit raw data to a file for later analysis.
+
+=head1 COMPARISON
+
+Here's how it stacks up to existing APIs:
+
+=head2 compared to C<system()>, C<qx''>, C<open "...|">, C<open "|...">
+
+=over
+
+=item +
+
+redirects more than one file descriptor
+
+=item +
+
+returns TRUE on success, FALSE on failure
+
+=item +
+
+throws an error if problems occur in the parent process (or the pre-exec child)
+
+=item +
+
+allows a very perlish interface to Perl data structures and subroutines
+
+=item +
+
+allows 1 word invocations to avoid the shell easily:
+
+ run3 ["foo"]; # does not invoke shell
+
+=item -
+
+does not return the exit code, leaves it in $?
+
+=back
+
+=head2 compared to C<open2()>, C<open3()>
+
+=over
+
+=item +
+
+no lengthy, error prone polling/select loop needed
+
+=item +
+
+hides OS dependancies
+
+=item +
+
+allows SCALAR, ARRAY, and CODE references to source and sink I/O
+
+=item +
+
+I/O parameter order is like C<open3()> (not like C<open2()>).
+
+=item -
+
+does not allow interaction with the subprocess
+
+=back
+
+=head2 compared to L<IPC::Run::run()|IPC::Run/run>
+
+=over
+
+=item +
+
+smaller, lower overhead, simpler, more portable
+
+=item +
+
+no select() loop portability issues
+
+=item +
+
+does not fall prey to Perl closure leaks
+
+=item -
+
+does not allow interaction with the subprocess (which
+IPC::Run::run() allows by redirecting subroutines)
+
+=item -
+
+lacks many features of C<IPC::Run::run()> (filters, pipes,
+redirects, pty support)
+
+=back
+
+=head1 COPYRIGHT
+
+Copyright 2003, R. Barrie Slaymaker, Jr., All Rights Reserved
+
+=head1 LICENSE
+
+You may use this module under the terms of the BSD, Artistic, or GPL licenses,
+any version.
+
+=head1 AUTHOR
+
+Barrie Slaymaker E<lt>C<barries@slaysys.com>E<gt>
+
+Ricardo SIGNES E<lt>C<rjbs@cpan.org>E<gt> performed some routine maintenance in
+2005, thanks to help from the following ticket and/or patch submitters: Jody
+Belka, Roderich Schupp, David Morel, and anonymous others.
+
+=cut
diff --git a/tests/README b/tests/README
new file mode 100644
index 00000000..d6b7cf51
--- /dev/null
+++ b/tests/README
@@ -0,0 +1,11 @@
+IPC::Run3 code is included, which is copyright 2003 by R. Barrie Slaymaker and
+makes the disclaimer:
+
+You may use this module under the terms of the BSD, Artistic, or GPL licenses,
+any version.
+
+For more information, refer to:
+ http://search.cpan.org/~rjbs/IPC-Run3-0.043/lib/IPC/Run3.pm
+
+The rest of the code and data is subject to the cdec BSD license.
+
diff --git a/tests/run-system-tests.pl b/tests/run-system-tests.pl
new file mode 100755
index 00000000..738000dc
--- /dev/null
+++ b/tests/run-system-tests.pl
@@ -0,0 +1,121 @@
+#!/usr/bin/perl -w
+use strict;
+my $script_dir; BEGIN { use Cwd qw/ abs_path cwd /; use File::Basename; $script_dir = dirname(abs_path($0)); push @INC, $script_dir; }
+
+use IPC::Run3;
+use File::Temp qw ( tempdir );
+my $TEMP_DIR = tempdir( CLEANUP => 1 );
+
+#my $cwd = cwd();
+#die "Sanity failed: $cwd" unless -d $cwd;
+my $DECODER = "$script_dir/../src/cdec";
+my $FILTER = "$script_dir/tools/filter-stderr.pl";
+my $COMPARE_STATS = "$script_dir/tools/compare-statistics.pl";
+
+die "Can't find $DECODER" unless -f $DECODER;
+die "Can't execute $DECODER" unless -x $DECODER;
+die "Can't find $FILTER" unless -f $FILTER;
+die "Can't execute $FILTER" unless -x $FILTER;
+die "Can't find $COMPARE_STATS" unless -f $COMPARE_STATS;
+die "Can't execute $COMPARE_STATS" unless -x $COMPARE_STATS;
+
+my $TEST_DIR = "$script_dir/system_tests";
+opendir DIR, $TEST_DIR or die "Can't open $TEST_DIR: $!";
+#my @test_dirs = grep { /^\./ && −d "$some_dir/$_" } readdir(DIR);
+my @tests = grep { !/^\./ && -d "$TEST_DIR/$_" } readdir(DIR);
+closedir DIR;
+
+print STDERR " DECODER: $DECODER\n";
+print STDERR " TESTS: @tests\n";
+print STDERR "TEMP DIR: $TEMP_DIR\n";
+
+my $FAIL = 0;
+my $PASS = 0;
+for my $test (@tests) {
+ print "TEST: $test\n";
+ chdir "$TEST_DIR/$test" or die "Can't chdir to $TEST_DIR/$test: $!";
+ my $CMD = "$DECODER";
+ unless (-f 'gold.statistics') {
+ print " missing gold.statistics -- SKIPPING\n";
+ $FAIL++;
+ next;
+ }
+ unless (-f 'gold.stdout') {
+ print " missing gold.stdout -- SKIPPING\n";
+ $FAIL++;
+ next;
+ }
+ if (-f 'cdec.ini') {
+ $CMD .= ' -c cdec.ini';
+ }
+ if (-f 'weights') {
+ $CMD .= ' -w weights';
+ }
+ if (-f 'input.txt') {
+ $CMD .= ' -i input.txt';
+ }
+
+ run3 $CMD, \undef, "$TEMP_DIR/stdout", "$TEMP_DIR/stderr";
+ if ($? != 0) {
+ print STDERR " non-zero exit! command: $CMD\n";
+ $FAIL++;
+ } else {
+ die unless -f "$TEMP_DIR/stdout";
+ my $failed = 0;
+ run3 "diff gold.stdout $TEMP_DIR/stdout";
+ if ($? != 0) {
+ print STDERR " FAILED differences in output!\n";
+ $failed = 1;
+ }
+ die unless -f "$TEMP_DIR/stderr";
+ run3 "$FILTER", "$TEMP_DIR/stderr", "$TEMP_DIR/test.statistics";
+ if ($? != 0) {
+ print STDERR " non-zero exit: $FILTER\n";
+ $FAIL++;
+ next;
+ }
+ my @lines;
+ run3 "$COMPARE_STATS gold.statistics", "$TEMP_DIR/test.statistics", \@lines;
+ if (scalar @lines != 1) {
+ print STDERR " unexpected output: @lines\n";
+ $FAIL++;
+ next;
+ }
+ my $l = $lines[0]; chomp $l;
+ if ($l =~ /^(\d+) (\d+)$/) {
+ my $passes = $1;
+ my $total = $2;
+ my $pct = $passes * 100 / $total;
+ $pct = sprintf "%.2f", $pct;
+
+ if ($total == $passes) {
+ if ($failed) {
+ print " (decoder statistics match, though)\n";
+ } else {
+ print " PASSED\n";
+ }
+ } else {
+ if ($failed) {
+ print " ($pct of decoder search statistics match)\n";
+ } else {
+ print " FAILED $pct of decoder search statistics match\n";
+ }
+ }
+ } else {
+ $failed = 1;
+ print STDERR " bad format: $l\n";
+ }
+ if ($failed) { $FAIL++; } else { $PASS++; }
+ }
+}
+
+my $TOT = $PASS + $FAIL;
+print "\nSUMMARY: $PASS / $TOT TESTS PASSED\n";
+if ($FAIL != 0) {
+ print " !!! THERE WERE FAILURES - DECODER IS ACTING SUSPICIOUSLY !!!\n\n";
+ exit 1;
+} else {
+ print "\n";
+ exit 0;
+}
+
diff --git a/tests/system_tests/australia-align/australia.scfg.gz b/tests/system_tests/australia-align/australia.scfg.gz
new file mode 100644
index 00000000..0cd70f1f
--- /dev/null
+++ b/tests/system_tests/australia-align/australia.scfg.gz
Binary files differ
diff --git a/tests/system_tests/australia-align/cdec.ini b/tests/system_tests/australia-align/cdec.ini
new file mode 100644
index 00000000..eba98941
--- /dev/null
+++ b/tests/system_tests/australia-align/cdec.ini
@@ -0,0 +1,3 @@
+formalism=scfg
+grammar=australia.scfg.gz
+aligner=true
diff --git a/tests/system_tests/australia-align/gold.statistics b/tests/system_tests/australia-align/gold.statistics
new file mode 100644
index 00000000..8101bd40
--- /dev/null
+++ b/tests/system_tests/australia-align/gold.statistics
@@ -0,0 +1,8 @@
+-lm_nodes 77
+-lm_edges 244232
+-lm_paths 3.79555e+28
+-lm_trans australia is have diplomatic relations with north korea one of the few countries .
+-lm_viterbi -12.7893
+constr_nodes 111
+constr_edges 305
+constr_paths 9899
diff --git a/tests/system_tests/australia-align/gold.stdout b/tests/system_tests/australia-align/gold.stdout
new file mode 100644
index 00000000..310ac348
--- /dev/null
+++ b/tests/system_tests/australia-align/gold.stdout
@@ -0,0 +1 @@
+0-0 1-1 2-11 3-12 3-13 4-7 4-8 5-9 5-10 6-4 7-5 8-6 9-2 9-3 10-14
diff --git a/tests/system_tests/australia-align/input.txt b/tests/system_tests/australia-align/input.txt
new file mode 100644
index 00000000..482d3e4b
--- /dev/null
+++ b/tests/system_tests/australia-align/input.txt
@@ -0,0 +1 @@
+澳洲 是 与 北韩 有 邦交 的 少数 国家 之一 。 ||| australia is one of the few countries that has diplomatic relations with north korea .
diff --git a/tests/system_tests/australia-align/stderr b/tests/system_tests/australia-align/stderr
new file mode 100644
index 00000000..c49e3bd0
--- /dev/null
+++ b/tests/system_tests/australia-align/stderr
@@ -0,0 +1,48 @@
+cdec v1.0 (c) 2009 by Chris Dyer
+Configuration file: cdec.ini
+Reading SCFG grammar from australia.scfg.gz
+ 33737 rules read.
+Reading weights from weights
+Loaded 7 feature weights
+Reading input from input.txt
+
+INPUT: 澳洲 是 与 北韩 有 邦交 的 少数 国家 之一 。 ||| australia is one of the few ...
+ id = 0
+ Goal category: [S]
+ ...........
+ -LM forest (nodes/edges): 77/244232
+ -LM forest (paths): 3.79555e+28
+ -LM Viterbi: australia is have diplomatic relations with north korea one of the few countries .
+ -LM Viterbi: -12.7893
+ Goal category: [CAT_76]
+ ...............
+ Constr. forest (nodes/edges): 111/305
+ Constr. forest (paths): 9899
+ Constr. VitTree: (CAT_76 (CAT_75 (CAT_73 (CAT_13 (CAT_12 australia is)) (CAT_66 (CAT_55 (CAT_40 one of the few countries) (CAT_5 that has) diplomatic relations) with north korea)) (CAT_11 .)))
+ 1 0.367526 0 0 0 0 0 0 0 0 0 0 0 0 0
+ 0.367526 0.999979 0.100222 0.0997905 0.0210297 0 0 0.00481735 8.02707e-12 0 0 0 0 0 0.0587194
+ 0 9.42288e-12 0 8.53331e-07 3.77376e-06 0 0 8.02707e-12 0.0310404 0 0 0.999994 0.947965 0.947965 9.71274e-07
+ 0 0 0 0 0 0 0 0 0 0 0 0.947965 1 1 0
+ 0 0 7.54036e-06 6.96573e-05 0 0 0 0.742293 0.999244 0 0 0.0310422 0 0 0.000677908
+ 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0
+ 0 0.02397 0.328274 0.330722 0.961649 0.321117 0.263254 0.257688 0.000755969 0 0 3.82289e-06 0 0 0.0265549
+ 0 0 0.130232 0.130232 0.321285 1 0.565141 0 0 0 0 0 0 0 0
+ 0 0 0.108129 0.10919 0.260861 0.565141 1 0 0 0 0 0 0 0 2.53515e-12
+ 0 0.100047 0.999815 0.996401 0.365844 0.130232 0.108128 0 0 0 0 0 0 0 0.112835
+ 0 0.058724 0.112814 0.112464 0.0214041 0 0 0.00590826 0 0 0 0 0 0 1
+
+ 012345678901234
+0*..............0
+1.*.............1
+2...........*...2
+3............**.3
+4.......**......4
+5.........**....5
+6....*..........6
+7.....*.........7
+8......*........8
+9..**...........9
+0..............*0
+ 012345678901234
+
+Translation: 1.41218 secs (1 calls)
diff --git a/tests/system_tests/australia-align/weights b/tests/system_tests/australia-align/weights
new file mode 100644
index 00000000..a280184c
--- /dev/null
+++ b/tests/system_tests/australia-align/weights
@@ -0,0 +1,7 @@
+WordPenalty -2.844814
+LanguageModel 1.0
+PhraseModel_0 -1.066893
+PhraseModel_1 -0.752247
+PhraseModel_2 -0.589793
+PassThrough -20.0
+Glue 0
diff --git a/tests/system_tests/australia/australia.scfg.gz b/tests/system_tests/australia/australia.scfg.gz
new file mode 100644
index 00000000..0cd70f1f
--- /dev/null
+++ b/tests/system_tests/australia/australia.scfg.gz
Binary files differ
diff --git a/tests/system_tests/australia/cdec.ini b/tests/system_tests/australia/cdec.ini
new file mode 100644
index 00000000..dfc3cb20
--- /dev/null
+++ b/tests/system_tests/australia/cdec.ini
@@ -0,0 +1,2 @@
+formalism=scfg
+grammar=australia.scfg.gz
diff --git a/tests/system_tests/australia/gold.statistics b/tests/system_tests/australia/gold.statistics
new file mode 100644
index 00000000..050d12dd
--- /dev/null
+++ b/tests/system_tests/australia/gold.statistics
@@ -0,0 +1,8 @@
+-lm_nodes 77
+-lm_edges 244232
+-lm_paths 3.79555e+28
+-lm_trans australia is have diplomatic relations with north korea one of the few countries .
+-lm_viterbi -12.7893
+constr_nodes 185
+constr_edges 827
+constr_paths 279358
diff --git a/tests/system_tests/australia/gold.stdout b/tests/system_tests/australia/gold.stdout
new file mode 100644
index 00000000..e69de29b
--- /dev/null
+++ b/tests/system_tests/australia/gold.stdout
diff --git a/tests/system_tests/australia/input.txt b/tests/system_tests/australia/input.txt
new file mode 100644
index 00000000..f14bf535
--- /dev/null
+++ b/tests/system_tests/australia/input.txt
@@ -0,0 +1 @@
+澳洲 是 与 北韩 有 邦交 的 少数 国家 之一 。 ||| ((('australia',0,1),),(('is',0,1),),(('one',0,1),),(('of',0,1),),(('the',0,4),('a',0,4),('a',0,1),('the',0,1),),(('small',0,1),('tiny',0,1),('miniscule',0,1),('handful',0,2),),(('number',0,1),('group',0,1),),(('of',0,2),),(('few',0,1),),(('countries',0,1),),(('that',0,1),),(('has',0,1),('have',0,1),),(('diplomatic',0,1),),(('relations',0,1),),(('with',0,1),),(('north',0,1),),(('korea',0,1),),(('.',0,1),),)
diff --git a/tests/system_tests/australia/stderr b/tests/system_tests/australia/stderr
new file mode 100644
index 00000000..c41fb9fb
--- /dev/null
+++ b/tests/system_tests/australia/stderr
@@ -0,0 +1,22 @@
+cdec v1.0 (c) 2009 by Chris Dyer
+Configuration file: cdec.ini
+Reading SCFG grammar from australia.scfg.gz
+ 33737 rules read.
+Reading weights from weights
+Loaded 7 feature weights
+Reading input from input.txt
+
+INPUT: 澳洲 是 与 北韩 有 邦交 的 少数 国家 之一 。 ||| ...
+ id = 0
+ Goal category: [S]
+ ...........
+ -LM forest (nodes/edges): 77/244232
+ -LM forest (paths): 3.79555e+28
+ -LM Viterbi: australia is have diplomatic relations with north korea one of the few countries .
+ -LM Viterbi: -12.7893
+ Goal category: [CAT_76]
+ ..................
+ Constr. forest (nodes/edges): 185/827
+ Constr. forest (paths): 279358
+ Constr. VitTree: (CAT_76 (CAT_75 (CAT_1 (CAT_0 australia)) (CAT_74 is (CAT_31 one of (CAT_20 a few countries)) that (CAT_36 have diplomatic relations (CAT_15 with north korea)) .)))
+Translation: 2.69099 secs (1 calls)
diff --git a/tests/system_tests/australia/weights b/tests/system_tests/australia/weights
new file mode 100644
index 00000000..a280184c
--- /dev/null
+++ b/tests/system_tests/australia/weights
@@ -0,0 +1,7 @@
+WordPenalty -2.844814
+LanguageModel 1.0
+PhraseModel_0 -1.066893
+PhraseModel_1 -0.752247
+PhraseModel_2 -0.589793
+PassThrough -20.0
+Glue 0
diff --git a/tests/system_tests/controlled_synparse/cdec.ini b/tests/system_tests/controlled_synparse/cdec.ini
new file mode 100644
index 00000000..b5c7d4d8
--- /dev/null
+++ b/tests/system_tests/controlled_synparse/cdec.ini
@@ -0,0 +1,3 @@
+formalism=scfg
+grammar=scfg.biparse.gz
+k_best=100
diff --git a/tests/system_tests/controlled_synparse/gold.statistics b/tests/system_tests/controlled_synparse/gold.statistics
new file mode 100644
index 00000000..3885cf94
--- /dev/null
+++ b/tests/system_tests/controlled_synparse/gold.statistics
@@ -0,0 +1,16 @@
+-lm_nodes 11
+-lm_edges 18
+-lm_paths 18
+-lm_trans a d c b
+-lm_trans 0
+constr_nodes 8
+constr_edges 8
+constr_paths 1
+-lm_nodes 11
+-lm_edges 18
+-lm_paths 18
+-lm_trans a d c b
+-lm_trans 0
+constr_nodes 12
+constr_edges 14
+constr_paths 3
diff --git a/tests/system_tests/controlled_synparse/gold.stdout b/tests/system_tests/controlled_synparse/gold.stdout
new file mode 100644
index 00000000..f3e0ce08
--- /dev/null
+++ b/tests/system_tests/controlled_synparse/gold.stdout
@@ -0,0 +1,36 @@
+0 ||| c b c d ||| F1bad2=1;F5=1;F6=1;F7=1;Glue=3
+0 ||| c d c b e ||| F1bad2=1;F4=1;F5=1;F7=1;Glue=1
+0 ||| c b c d ||| F1bad2=1;F6=1;F7=1;F9=1;Glue=2
+0 ||| b d c b e ||| F1bad1=1;F4=1;F5=1;F7=1;Glue=1
+0 ||| c b c d ||| F1bad2=1;F3=1;F5=1;F7=1;Glue=1
+0 ||| a d c b e ||| F1=1;F4=1;F5=1;F7=1;Glue=1
+0 ||| b b c d ||| F1bad1=1;F3=1;F5=1;F7=1;Glue=1
+0 ||| c d c b ||| F1bad2=1;F2=1;F5=1;F7=1;Glue=1
+0 ||| a b c d ||| F1=1;F3=1;F5=1;F7=1;Glue=1
+0 ||| b b c d ||| F1bad1=1;F5=1;F6=1;F7=1;Glue=3
+0 ||| b d c b ||| F1bad1=1;F2=1;F5=1;F7=1;Glue=1
+0 ||| a d c b ||| F1=1;F2=1;F5=1;F7=1;Glue=1
+0 ||| b b c d ||| F1bad1=1;F6=1;F7=1;F9=1;Glue=2
+0 ||| c c b d ||| F1bad2=1;F6=1;F7=1;F8=1;Glue=2
+0 ||| a b c d ||| F1=1;F6=1;F7=1;F9=1;Glue=2
+0 ||| a b c d ||| F1=1;F5=1;F6=1;F7=1;Glue=3
+0 ||| b c b d ||| F1bad1=1;F6=1;F7=1;F8=1;Glue=2
+0 ||| a c b d ||| F1=1;F6=1;F7=1;F8=1;Glue=2
+1 ||| c b c d ||| F1bad2=1;F5=1;F6=1;F7=1;Glue=3
+1 ||| c d c b e ||| F1bad2=1;F4=1;F5=1;F7=1;Glue=1
+1 ||| c b c d ||| F1bad2=1;F6=1;F7=1;F9=1;Glue=2
+1 ||| b d c b e ||| F1bad1=1;F4=1;F5=1;F7=1;Glue=1
+1 ||| c b c d ||| F1bad2=1;F3=1;F5=1;F7=1;Glue=1
+1 ||| a d c b e ||| F1=1;F4=1;F5=1;F7=1;Glue=1
+1 ||| b b c d ||| F1bad1=1;F3=1;F5=1;F7=1;Glue=1
+1 ||| c d c b ||| F1bad2=1;F2=1;F5=1;F7=1;Glue=1
+1 ||| a b c d ||| F1=1;F3=1;F5=1;F7=1;Glue=1
+1 ||| b b c d ||| F1bad1=1;F5=1;F6=1;F7=1;Glue=3
+1 ||| b d c b ||| F1bad1=1;F2=1;F5=1;F7=1;Glue=1
+1 ||| a d c b ||| F1=1;F2=1;F5=1;F7=1;Glue=1
+1 ||| b b c d ||| F1bad1=1;F6=1;F7=1;F9=1;Glue=2
+1 ||| c c b d ||| F1bad2=1;F6=1;F7=1;F8=1;Glue=2
+1 ||| a b c d ||| F1=1;F6=1;F7=1;F9=1;Glue=2
+1 ||| a b c d ||| F1=1;F5=1;F6=1;F7=1;Glue=3
+1 ||| b c b d ||| F1bad1=1;F6=1;F7=1;F8=1;Glue=2
+1 ||| a c b d ||| F1=1;F6=1;F7=1;F8=1;Glue=2
diff --git a/tests/system_tests/controlled_synparse/input.txt b/tests/system_tests/controlled_synparse/input.txt
new file mode 100644
index 00000000..2dbc09c8
--- /dev/null
+++ b/tests/system_tests/controlled_synparse/input.txt
@@ -0,0 +1,2 @@
+A B C D ||| a d c b
+A B C D ||| a b c d
diff --git a/tests/system_tests/controlled_synparse/scfg.biparse.gz b/tests/system_tests/controlled_synparse/scfg.biparse.gz
new file mode 100644
index 00000000..aaf1c41d
--- /dev/null
+++ b/tests/system_tests/controlled_synparse/scfg.biparse.gz
Binary files differ
diff --git a/tests/system_tests/ftrans/cdec.ini b/tests/system_tests/ftrans/cdec.ini
new file mode 100644
index 00000000..e8e004d6
--- /dev/null
+++ b/tests/system_tests/ftrans/cdec.ini
@@ -0,0 +1,3 @@
+formalism=fst
+k_best=1000
+grammar=ftrans.pt
diff --git a/tests/system_tests/ftrans/ftrans.pt b/tests/system_tests/ftrans/ftrans.pt
new file mode 100644
index 00000000..7e8c6f59
--- /dev/null
+++ b/tests/system_tests/ftrans/ftrans.pt
@@ -0,0 +1,4 @@
+b ||| B ||| F1=1 OtherFeat=1
+c ||| C ||| F2=0.2
+b c ||| BC ||| F3=0.4
+c b ||| CB ||| F4=1
diff --git a/tests/system_tests/ftrans/gold.statistics b/tests/system_tests/ftrans/gold.statistics
new file mode 100644
index 00000000..40cdfd4c
--- /dev/null
+++ b/tests/system_tests/ftrans/gold.statistics
@@ -0,0 +1,5 @@
+-lm_nodes 13
+-lm_edges 16
+-lm_paths 4
+-lm_trans CB
+-lm_viterbi 101
diff --git a/tests/system_tests/ftrans/gold.stdout b/tests/system_tests/ftrans/gold.stdout
new file mode 100644
index 00000000..25c615d1
--- /dev/null
+++ b/tests/system_tests/ftrans/gold.stdout
@@ -0,0 +1,4 @@
+0 ||| CB ||| F4=1;Inv=1
+0 ||| BC ||| F3=0.4;Mono=1
+0 ||| B C ||| F1=1;OtherFeat=1;F2=0.2;Mono=1
+0 ||| C B ||| F1=1;OtherFeat=1;F2=0.2;Inv=1
diff --git a/tests/system_tests/ftrans/input.cfg b/tests/system_tests/ftrans/input.cfg
new file mode 100644
index 00000000..b602c1cb
--- /dev/null
+++ b/tests/system_tests/ftrans/input.cfg
@@ -0,0 +1,5 @@
+[S] ||| [A]
+[A] ||| [B] [C] ||| Mono=1
+[A] ||| [C] [B] ||| Inv=1
+[B] ||| b
+[C] ||| c
diff --git a/tests/system_tests/ftrans/input.txt b/tests/system_tests/ftrans/input.txt
new file mode 100644
index 00000000..aa37b2e7
--- /dev/null
+++ b/tests/system_tests/ftrans/input.txt
@@ -0,0 +1 @@
+{"rules":[1,"[B] ||| b ||| b",2,"[C] ||| c ||| c",3,"[A] ||| [B,1] [C,2] ||| [1] [2] ||| Mono=1",4,"[A] ||| [C,1] [B,2] ||| [1] [2] ||| Inv=1",5,"[S] ||| [A,1] ||| [1]"],"features":["Mono","Inv"],"edges":[{"tail":[],"feats":[],"rule":1}],"node":{"in_edges":[0],"cat":"B"},"edges":[{"tail":[],"feats":[],"rule":2}],"node":{"in_edges":[1],"cat":"C"},"edges":[{"tail":[0,1],"feats":[0,1],"rule":3},{"tail":[1,0],"feats":[1,1],"rule":4}],"node":{"in_edges":[2,3],"cat":"A"},"edges":[{"tail":[2],"feats":[],"rule":5}],"node":{"in_edges":[4],"cat":"S"}}
diff --git a/tests/system_tests/ftrans/weights b/tests/system_tests/ftrans/weights
new file mode 100644
index 00000000..89d3a6d3
--- /dev/null
+++ b/tests/system_tests/ftrans/weights
@@ -0,0 +1,5 @@
+F1 1
+F3 10
+F4 100
+Mono 2
+Inv 1
diff --git a/tests/system_tests/unsup-align/cdec.ini b/tests/system_tests/unsup-align/cdec.ini
new file mode 100644
index 00000000..4016a201
--- /dev/null
+++ b/tests/system_tests/unsup-align/cdec.ini
@@ -0,0 +1,6 @@
+aligner=true
+grammar=unsup-align.lex-grammar
+cubepruning_pop_limit=1000000
+formalism=lexcrf
+feature_function=RelativeSentencePosition
+feature_function=MarkovJump
diff --git a/tests/system_tests/unsup-align/gold.statistics b/tests/system_tests/unsup-align/gold.statistics
new file mode 100644
index 00000000..afc49bfc
--- /dev/null
+++ b/tests/system_tests/unsup-align/gold.statistics
@@ -0,0 +1,96 @@
+-lm_nodes 2
+-lm_edges 3
+-lm_paths 2
+-lm_trans blue
+-lm_trans 0.4528
++lm_nodes 2
++lm_edges 3
++lm_paths 2
++lm_trans blue
+constr_nodes 3
+constr_edges 3
+constr_paths 1
+-lm_nodes 2
+-lm_edges 4
+-lm_paths 3
+-lm_trans house
+-lm_trans 0.673643
++lm_nodes 2
++lm_edges 4
++lm_paths 3
++lm_trans house
+constr_nodes 3
+constr_edges 3
+constr_paths 1
+-lm_nodes 4
+-lm_edges 16
+-lm_paths 49
+-lm_trans the the
+-lm_viterbi 1.42559
++lm_nodes 7
++lm_edges 20
++lm_paths 49
++lm_trans the house
+constr_nodes 8
+constr_edges 11
+constr_paths 4
+-lm_nodes 4
+-lm_edges 12
+-lm_paths 25
+-lm_trans house house
+-lm_viterbi 1.34729
++lm_nodes 7
++lm_edges 16
++lm_paths 25
++lm_trans house blue
+constr_nodes 8
+constr_edges 11
+constr_paths 4
+-lm_nodes 4
+-lm_edges 14
+-lm_paths 36
+-lm_trans the the
+-lm_viterbi 1.42559
++lm_nodes 7
++lm_edges 18
++lm_paths 36
++lm_trans the the
+constr_nodes 8
+constr_edges 11
+constr_paths 4
+-lm_nodes 2
+-lm_edges 5
+-lm_paths 4
+-lm_trans the
+-lm_trans 0.712796
++lm_nodes 2
++lm_edges 5
++lm_paths 4
++lm_trans the
+constr_nodes 3
+constr_edges 3
+constr_paths 1
+-lm_nodes 4
+-lm_edges 14
+-lm_paths 36
+-lm_trans the the
+-lm_viterbi 1.42559
++lm_nodes 7
++lm_edges 18
++lm_paths 36
++lm_trans the the
+constr_nodes 8
+constr_edges 11
+constr_paths 4
+-lm_nodes 4
+-lm_edges 10
+-lm_paths 16
+-lm_trans thet thet
+-lm_trans 0
++lm_nodes 7
++lm_edges 14
++lm_paths 16
++lm_trans end thet
+constr_nodes 8
+constr_edges 11
+constr_paths 4
diff --git a/tests/system_tests/unsup-align/gold.stdout b/tests/system_tests/unsup-align/gold.stdout
new file mode 100644
index 00000000..b94ab862
--- /dev/null
+++ b/tests/system_tests/unsup-align/gold.stdout
@@ -0,0 +1,8 @@
+0-0
+0-0
+0-0 1-1
+0-1 1-0
+0-0 1-1
+0-0
+0-0 1-1
+0-0 1-1
diff --git a/tests/system_tests/unsup-align/input.txt b/tests/system_tests/unsup-align/input.txt
new file mode 100644
index 00000000..b97f81cf
--- /dev/null
+++ b/tests/system_tests/unsup-align/input.txt
@@ -0,0 +1,8 @@
+bleue ||| blue
+maison ||| house
+la maison ||| the house
+maison bleue ||| blue house
+la fleur ||| the flower
+la ||| the
+la bouche ||| the mouth
+le fin ||| thet end
diff --git a/tests/system_tests/unsup-align/unsup-align.lex-grammar b/tests/system_tests/unsup-align/unsup-align.lex-grammar
new file mode 100644
index 00000000..30b1a61a
--- /dev/null
+++ b/tests/system_tests/unsup-align/unsup-align.lex-grammar
@@ -0,0 +1,17 @@
+bleue ||| blue ||| F1000001=1
+bleue ||| house ||| F1000002=1
+bouche ||| the ||| F1000003=1
+bouche ||| mouth ||| F1000004=1
+fin ||| thet ||| F1000005=1
+fin ||| end ||| F1000006=1
+fleur ||| the ||| F1000007=1
+fleur ||| flower ||| F1000008=1
+la ||| the ||| F1000009=1
+la ||| house ||| F1000010=1
+la ||| flower ||| F1000011=1
+la ||| mouth ||| F1000012=1
+le ||| thet ||| F1000013=1
+le ||| end ||| F1000014=1
+maison ||| house ||| F1000015=1
+maison ||| the ||| F1000016=1
+maison ||| blue ||| F1000017=1
diff --git a/tests/system_tests/unsup-align/weights b/tests/system_tests/unsup-align/weights
new file mode 100644
index 00000000..7d9012c5
--- /dev/null
+++ b/tests/system_tests/unsup-align/weights
@@ -0,0 +1,19 @@
+RelativeSentencePosition -0.1
+MarkovJump -0.2
+F1000001 0.45280036748928199
+F1000002 -0.30603801277140658
+F1000003 0.0087200168696079348
+F1000004 0.25201383750998718
+F1000005 0
+F1000006 0
+F1000007 0.0087200038242073886
+F1000008 0.25201333505199081
+F1000009 0.7127956550520711
+F1000010 -0.4803381673023227
+F1000011 -0.42444387021307117
+F1000012 -0.42444295555037082
+F1000013 0
+F1000014 0
+F1000015 0.67364292245615709
+F1000016 -0.38087265233441997
+F1000017 -0.34457059973906817
diff --git a/tests/tools/compare-statistics.pl b/tests/tools/compare-statistics.pl
new file mode 100755
index 00000000..3122c534
--- /dev/null
+++ b/tests/tools/compare-statistics.pl
@@ -0,0 +1,49 @@
+#!/usr/bin/perl -w
+use strict;
+
+die "Usage: $0 gold.statistics < test.statistics\n" unless scalar @ARGV == 1;
+
+my $gold_file = shift @ARGV;
+open G, "<$gold_file" or die "Can't read $gold_file: $!";
+my @gold_keys = ();
+my @gold_vals = ();
+while(<G>) {
+ chomp;
+ if (/^([^ ]+)\s*(.*)$/) {
+ push @gold_keys, $1;
+ push @gold_vals, $2;
+ } else {
+ die "Unexpected line in $gold_file: $_\n";
+ }
+}
+
+my $sc = 0;
+my $MATCH = 0;
+my $MISMATCH = 0;
+while(<>) {
+ my $gold_key = $gold_keys[$sc];
+ my $gold_val = $gold_vals[$sc];
+ $sc++;
+ if (/^([^ ]+)\s*(.*)$/) {
+ my $test_key = $1;
+ my $test_val = $2;
+ if ($test_key ne $gold_key) {
+ die "Missing key in output! Expected '$gold_key' but got '$test_key'\n";
+ }
+ if ($gold_val ne 'IGNORE') {
+ if ($gold_val eq $test_val) { $MATCH++; } else {
+ $MISMATCH++;
+ print STDERR "[VALUE FAILURE] key: '$gold_key'\n expected value: '$gold_val'\n actual value: '$test_val'\n";
+ }
+ }
+ } else {
+ die "Unexpected line in test data: $_\n";
+ }
+}
+
+my $TOT = $MISMATCH + $MATCH;
+
+print "$MATCH $TOT\n";
+
+if ($MISMATCH > 0) { exit 1; } else { exit 0; }
+
diff --git a/tests/tools/filter-stderr.pl b/tests/tools/filter-stderr.pl
new file mode 100755
index 00000000..29a97298
--- /dev/null
+++ b/tests/tools/filter-stderr.pl
@@ -0,0 +1,30 @@
+#!/usr/bin/perl -w
+use strict;
+
+my $REAL = '(\+|-)?[1-9][0-9]*(\.[0-9]*)?|(\+|-)?[1-9]\.[0-9]*e(\+|-)?[0-9]+';
+
+while(<>) {
+if (/-LM forest\s+\(nodes\/edges\): (\d+)\/(\d+)/) { print "-lm_nodes $1\n-lm_edges $2\n"; }
+if (/-LM forest\s+\(paths\): (.+)$/) { print "-lm_paths $1\n"; }
+# -LM Viterbi: -12.7893
+if (/-LM\s+Viterbi:\s+($REAL)/) {
+ print "-lm_viterbi $1\n";
+} elsif (/-LM\s+Viterbi:\s+(.+)$/) {
+ # -LM Viterbi: australia is have diplomatic relations with north korea one of the few countries .
+ print "-lm_trans $1\n";
+}
+#Constr. forest (nodes/edges): 111/305
+#Constr. forest (paths): 9899
+if (/Constr\. forest\s+\(nodes\/edges\): (\d+)\/(\d+)/) { print "constr_nodes $1\nconstr_edges $2\n"; }
+if (/Constr\. forest\s+\(paths\): (.+)$/) { print "constr_paths $1\n"; }
+
+if (/\+LM forest\s+\(nodes\/edges\): (\d+)\/(\d+)/) { print "+lm_nodes $1\n+lm_edges $2\n"; }
+if (/\+LM forest\s+\(paths\): (.+)$/) { print "+lm_paths $1\n"; }
+if (/\+LM\s+Viterbi:\s+($REAL)/) {
+ print "+lm_viterbi $1\n";
+} elsif (/\+LM\s+Viterbi:\s+(.+)$/) {
+ # -LM Viterbi: australia is have diplomatic relations with north korea one of the few countries .
+ print "+lm_trans $1\n";
+}
+
+}