diff options
author | Chris Dyer <redpony@gmail.com> | 2009-12-04 20:32:11 -0500 |
---|---|---|
committer | Chris Dyer <redpony@gmail.com> | 2009-12-04 20:32:11 -0500 |
commit | 8780a924a060ced3468b71a088409d5ef83fc985 (patch) | |
tree | 0ee244184aa61b39f3bf0cea8922f0d4f97ed364 | |
parent | 5361a6412834485dfd268b6a0fa4144d636f425f (diff) |
oops, forgot to add tests
37 files changed, 1407 insertions, 0 deletions
diff --git a/tests/IPC/Run3.pm b/tests/IPC/Run3.pm new file mode 100644 index 00000000..00875511 --- /dev/null +++ b/tests/IPC/Run3.pm @@ -0,0 +1,850 @@ +package IPC::Run3; +BEGIN { require 5.006_000; } # i.e. 5.6.0 +use strict; + +=head1 NAME + +IPC::Run3 - run a subprocess with input/ouput redirection + +=head1 VERSION + +version 0.043 + +=cut + +our $VERSION = '0.043'; + +=head1 SYNOPSIS + + use IPC::Run3; # Exports run3() by default + + run3 \@cmd, \$in, \$out, \$err; + +=head1 DESCRIPTION + +This module allows you to run a subprocess and redirect stdin, stdout, +and/or stderr to files and perl data structures. It aims to satisfy 99% of the +need for using C<system>, C<qx>, and C<open3> +with a simple, extremely Perlish API. + +Speed, simplicity, and portability are paramount. (That's speed of Perl code; +which is often much slower than the kind of buffered I/O that this module uses +to spool input to and output from the child command.) + +=cut + +use Exporter; +our @ISA = qw(Exporter); +our @EXPORT = qw( run3 ); +our %EXPORT_TAGS = ( all => \@EXPORT ); + +use constant debugging => $ENV{IPCRUN3DEBUG} || $ENV{IPCRUNDEBUG} || 0; +use constant profiling => $ENV{IPCRUN3PROFILE} || $ENV{IPCRUNPROFILE} || 0; +use constant is_win32 => 0 <= index $^O, "Win32"; + +BEGIN { + if ( is_win32 ) { + eval "use Win32 qw( GetOSName ); 1" or die $@; + } +} + +#use constant is_win2k => is_win32 && GetOSName() =~ /Win2000/i; +#use constant is_winXP => is_win32 && GetOSName() =~ /WinXP/i; + +use Carp qw( croak ); +use File::Temp qw( tempfile ); +use POSIX qw( dup dup2 ); + +# We cache the handles of our temp files in order to +# keep from having to incur the (largish) overhead of File::Temp +my %fh_cache; +my $fh_cache_pid = $$; + +my $profiler; + +sub _profiler { $profiler } # test suite access + +BEGIN { + if ( profiling ) { + eval "use Time::HiRes qw( gettimeofday ); 1" or die $@; + if ( $ENV{IPCRUN3PROFILE} =~ /\A\d+\z/ ) { + require IPC::Run3::ProfPP; + IPC::Run3::ProfPP->import; + $profiler = IPC::Run3::ProfPP->new(Level => $ENV{IPCRUN3PROFILE}); + } else { + my ( $dest, undef, $class ) = + reverse split /(=)/, $ENV{IPCRUN3PROFILE}, 2; + $class = "IPC::Run3::ProfLogger" + unless defined $class && length $class; + if ( not eval "require $class" ) { + my $e = $@; + $class = "IPC::Run3::$class"; + eval "require IPC::Run3::$class" or die $e; + } + $profiler = $class->new( Destination => $dest ); + } + $profiler->app_call( [ $0, @ARGV ], scalar gettimeofday() ); + } +} + + +END { + $profiler->app_exit( scalar gettimeofday() ) if profiling; +} + +sub _binmode { + my ( $fh, $mode, $what ) = @_; + # if $mode is not given, then default to ":raw", except on Windows, + # where we default to ":crlf"; + # otherwise if a proper layer string was given, use that, + # else use ":raw" + my $layer = !$mode + ? (is_win32 ? ":crlf" : ":raw") + : ($mode =~ /^:/ ? $mode : ":raw"); + warn "binmode $what, $layer\n" if debugging >= 2; + + binmode $fh, ":raw" unless $layer eq ":raw"; # remove all layers first + binmode $fh, $layer or croak "binmode $layer failed: $!"; +} + +sub _spool_data_to_child { + my ( $type, $source, $binmode_it ) = @_; + + # If undef (not \undef) passed, they want the child to inherit + # the parent's STDIN. + return undef unless defined $source; + + my $fh; + if ( ! $type ) { + open $fh, "<", $source or croak "$!: $source"; + _binmode($fh, $binmode_it, "STDIN"); + warn "run3(): feeding file '$source' to child STDIN\n" + if debugging >= 2; + } elsif ( $type eq "FH" ) { + $fh = $source; + warn "run3(): feeding filehandle '$source' to child STDIN\n" + if debugging >= 2; + } else { + $fh = $fh_cache{in} ||= tempfile; + truncate $fh, 0; + seek $fh, 0, 0; + _binmode($fh, $binmode_it, "STDIN"); + my $seekit; + if ( $type eq "SCALAR" ) { + + # When the run3()'s caller asks to feed an empty file + # to the child's stdin, we want to pass a live file + # descriptor to an empty file (like /dev/null) so that + # they don't get surprised by invalid fd errors and get + # normal EOF behaviors. + return $fh unless defined $$source; # \undef passed + + warn "run3(): feeding SCALAR to child STDIN", + debugging >= 3 + ? ( ": '", $$source, "' (", length $$source, " chars)" ) + : (), + "\n" + if debugging >= 2; + + $seekit = length $$source; + print $fh $$source or die "$! writing to temp file"; + + } elsif ( $type eq "ARRAY" ) { + warn "run3(): feeding ARRAY to child STDIN", + debugging >= 3 ? ( ": '", @$source, "'" ) : (), + "\n" + if debugging >= 2; + + print $fh @$source or die "$! writing to temp file"; + $seekit = grep length, @$source; + } elsif ( $type eq "CODE" ) { + warn "run3(): feeding output of CODE ref '$source' to child STDIN\n" + if debugging >= 2; + my $parms = []; # TODO: get these from $options + while (1) { + my $data = $source->( @$parms ); + last unless defined $data; + print $fh $data or die "$! writing to temp file"; + $seekit = length $data; + } + } + + seek $fh, 0, 0 or croak "$! seeking on temp file for child's stdin" + if $seekit; + } + + croak "run3() can't redirect $type to child stdin" + unless defined $fh; + + return $fh; +} + +sub _fh_for_child_output { + my ( $what, $type, $dest, $options ) = @_; + + my $fh; + if ( $type eq "SCALAR" && $dest == \undef ) { + warn "run3(): redirecting child $what to oblivion\n" + if debugging >= 2; + + $fh = $fh_cache{nul} ||= do { + open $fh, ">", File::Spec->devnull; + $fh; + }; + } elsif ( $type eq "FH" ) { + $fh = $dest; + warn "run3(): redirecting $what to filehandle '$dest'\n" + if debugging >= 3; + } elsif ( !$type ) { + warn "run3(): feeding child $what to file '$dest'\n" + if debugging >= 2; + + open $fh, $options->{"append_$what"} ? ">>" : ">", $dest + or croak "$!: $dest"; + } else { + warn "run3(): capturing child $what\n" + if debugging >= 2; + + $fh = $fh_cache{$what} ||= tempfile; + seek $fh, 0, 0; + truncate $fh, 0; + } + + my $binmode_it = $options->{"binmode_$what"}; + _binmode($fh, $binmode_it, uc $what); + + return $fh; +} + +sub _read_child_output_fh { + my ( $what, $type, $dest, $fh, $options ) = @_; + + return if $type eq "SCALAR" && $dest == \undef; + + seek $fh, 0, 0 or croak "$! seeking on temp file for child $what"; + + if ( $type eq "SCALAR" ) { + warn "run3(): reading child $what to SCALAR\n" + if debugging >= 3; + + # two read()s are used instead of 1 so that the first will be + # logged even it reads 0 bytes; the second won't. + my $count = read $fh, $$dest, 10_000, + $options->{"append_$what"} ? length $$dest : 0; + while (1) { + croak "$! reading child $what from temp file" + unless defined $count; + + last unless $count; + + warn "run3(): read $count bytes from child $what", + debugging >= 3 ? ( ": '", substr( $$dest, -$count ), "'" ) : (), + "\n" + if debugging >= 2; + + $count = read $fh, $$dest, 10_000, length $$dest; + } + } elsif ( $type eq "ARRAY" ) { + if ($options->{"append_$what"}) { + push @$dest, <$fh>; + } else { + @$dest = <$fh>; + } + if ( debugging >= 2 ) { + my $count = 0; + $count += length for @$dest; + warn + "run3(): read ", + scalar @$dest, + " records, $count bytes from child $what", + debugging >= 3 ? ( ": '", @$dest, "'" ) : (), + "\n"; + } + } elsif ( $type eq "CODE" ) { + warn "run3(): capturing child $what to CODE ref\n" + if debugging >= 3; + + local $_; + while ( <$fh> ) { + warn + "run3(): read ", + length, + " bytes from child $what", + debugging >= 3 ? ( ": '", $_, "'" ) : (), + "\n" + if debugging >= 2; + + $dest->( $_ ); + } + } else { + croak "run3() can't redirect child $what to a $type"; + } + +} + +sub _type { + my ( $redir ) = @_; + return "FH" if eval { $redir->isa("IO::Handle") }; + my $type = ref $redir; + return $type eq "GLOB" ? "FH" : $type; +} + +sub _max_fd { + my $fd = dup(0); + POSIX::close $fd; + return $fd; +} + +my $run_call_time; +my $sys_call_time; +my $sys_exit_time; + +sub run3 { + $run_call_time = gettimeofday() if profiling; + + my $options = @_ && ref $_[-1] eq "HASH" ? pop : {}; + + my ( $cmd, $stdin, $stdout, $stderr ) = @_; + + print STDERR "run3(): running ", + join( " ", map "'$_'", ref $cmd ? @$cmd : $cmd ), + "\n" + if debugging; + + if ( ref $cmd ) { + croak "run3(): empty command" unless @$cmd; + croak "run3(): undefined command" unless defined $cmd->[0]; + croak "run3(): command name ('')" unless length $cmd->[0]; + } else { + croak "run3(): missing command" unless @_; + croak "run3(): undefined command" unless defined $cmd; + croak "run3(): command ('')" unless length $cmd; + } + + foreach (qw/binmode_stdin binmode_stdout binmode_stderr/) { + if (my $mode = $options->{$_}) { + croak qq[option $_ must be a number or a proper layer string: "$mode"] + unless $mode =~ /^(:|\d+$)/; + } + } + + my $in_type = _type $stdin; + my $out_type = _type $stdout; + my $err_type = _type $stderr; + + if ($fh_cache_pid != $$) { + # fork detected, close all cached filehandles and clear the cache + close $_ foreach values %fh_cache; + %fh_cache = (); + $fh_cache_pid = $$; + } + + # This routine procedes in stages so that a failure in an early + # stage prevents later stages from running, and thus from needing + # cleanup. + + my $in_fh = _spool_data_to_child $in_type, $stdin, + $options->{binmode_stdin} if defined $stdin; + + my $out_fh = _fh_for_child_output "stdout", $out_type, $stdout, + $options if defined $stdout; + + my $tie_err_to_out = + defined $stderr && defined $stdout && $stderr eq $stdout; + + my $err_fh = $tie_err_to_out + ? $out_fh + : _fh_for_child_output "stderr", $err_type, $stderr, + $options if defined $stderr; + + # this should make perl close these on exceptions +# local *STDIN_SAVE; + local *STDOUT_SAVE; + local *STDERR_SAVE; + + my $saved_fd0 = dup( 0 ) if defined $in_fh; + +# open STDIN_SAVE, "<&STDIN"# or croak "run3(): $! saving STDIN" +# if defined $in_fh; + open STDOUT_SAVE, ">&STDOUT" or croak "run3(): $! saving STDOUT" + if defined $out_fh; + open STDERR_SAVE, ">&STDERR" or croak "run3(): $! saving STDERR" + if defined $err_fh; + + my $errno; + my $ok = eval { + # The open() call here seems to not force fd 0 in some cases; + # I ran in to trouble when using this in VCP, not sure why. + # the dup2() seems to work. + dup2( fileno $in_fh, 0 ) +# open STDIN, "<&=" . fileno $in_fh + or croak "run3(): $! redirecting STDIN" + if defined $in_fh; + +# close $in_fh or croak "$! closing STDIN temp file" +# if ref $stdin; + + open STDOUT, ">&" . fileno $out_fh + or croak "run3(): $! redirecting STDOUT" + if defined $out_fh; + + open STDERR, ">&" . fileno $err_fh + or croak "run3(): $! redirecting STDERR" + if defined $err_fh; + + $sys_call_time = gettimeofday() if profiling; + + my $r = ref $cmd + ? system { $cmd->[0] } + is_win32 + ? map { + # Probably need to offer a win32 escaping + # option, every command may be different. + ( my $s = $_ ) =~ s/"/"""/g; + $s = qq{"$s"}; + $s; + } @$cmd + : @$cmd + : system $cmd; + + $errno = $!; # save $!, because later failures will overwrite it + $sys_exit_time = gettimeofday() if profiling; + if ( debugging ) { + my $err_fh = defined $err_fh ? \*STDERR_SAVE : \*STDERR; + if ( defined $r && $r != -1 ) { + print $err_fh "run3(): \$? is $?\n"; + } else { + print $err_fh "run3(): \$? is $?, \$! is $errno\n"; + } + } + + die $! if defined $r && $r == -1 && !$options->{return_if_system_error}; + + 1; + }; + my $x = $@; + + my @errs; + + if ( defined $saved_fd0 ) { + dup2( $saved_fd0, 0 ); + POSIX::close( $saved_fd0 ); + } + +# open STDIN, "<&STDIN_SAVE"# or push @errs, "run3(): $! restoring STDIN" +# if defined $in_fh; + open STDOUT, ">&STDOUT_SAVE" or push @errs, "run3(): $! restoring STDOUT" + if defined $out_fh; + open STDERR, ">&STDERR_SAVE" or push @errs, "run3(): $! restoring STDERR" + if defined $err_fh; + + croak join ", ", @errs if @errs; + + die $x unless $ok; + + _read_child_output_fh "stdout", $out_type, $stdout, $out_fh, $options + if defined $out_fh && $out_type && $out_type ne "FH"; + _read_child_output_fh "stderr", $err_type, $stderr, $err_fh, $options + if defined $err_fh && $err_type && $err_type ne "FH" && !$tie_err_to_out; + $profiler->run_exit( + $cmd, + $run_call_time, + $sys_call_time, + $sys_exit_time, + scalar gettimeofday() + ) if profiling; + + $! = $errno; # restore $! from system() + + return 1; +} + +1; + +__END__ + +=head2 C<< run3($cmd, $stdin, $stdout, $stderr, \%options) >> + +All parameters after C<$cmd> are optional. + +The parameters C<$stdin>, C<$stdout> and C<$stderr> indicate +how the child's corresponding filehandle +(C<STDIN>, C<STDOUT> and C<STDERR>, resp.) will be redirected. +Because the redirects come last, this allows C<STDOUT> and C<STDERR> to default +to the parent's by just not specifying them -- a common use case. + +C<run3> throws an exception if the wrapped C<system> call returned -1 +or anything went wrong with C<run3>'s processing of filehandles. +Otherwise it returns true. +It leaves C<$?> intact for inspection of exit and wait status. + +Note that a true return value from C<run3> doesn't mean that the command +had a successful exit code. Hence you should always check C<$?>. + +See L</%options> for an option to handle the case of C<system> +returning -1 yourself. + +=head3 C<$cmd> + +Usually C<$cmd> will be an ARRAY reference and the child is invoked via + + system @$cmd; + +But C<$cmd> may also be a string in which case the child is invoked via + + system $cmd; + +(cf. L<perlfunc/system> for the difference and the pitfalls of using +the latter form). + +=head3 C<$stdin>, C<$stdout>, C<$stderr> + +The parameters C<$stdin>, C<$stdout> and C<$stderr> +can take one of the following forms: + +=over 4 + +=item C<undef> (or not specified at all) + +The child inherits the corresponding filehandle from the parent. + + run3 \@cmd, $stdin; # child writes to same STDOUT and STDERR as parent + run3 \@cmd, undef, $stdout, $stderr; # child reads from same STDIN as parent + +=item C<\undef> + +The child's filehandle is redirected from or to the +local equivalent of C</dev/null> (as returned by +C<< File::Spec->devnull() >>). + + run3 \@cmd, \undef, $stdout, $stderr; # child reads from /dev/null + +=item a simple scalar + +The parameter is taken to be the name of a file to read from +or write to. In the latter case, the file will be opened via + + open FH, ">", ... + +i.e. it is created if it doesn't exist and truncated otherwise. +Note that the file is opened by the parent which will L<croak|Carp/croak> +in case of failure. + + run3 \@cmd, \undef, "out.txt"; # child writes to file "out.txt" + +=item a filehandle (either a reference to a GLOB or an C<IO::Handle>) + +The filehandle is inherited by the child. + + open my $fh, ">", "out.txt"; + print $fh "prologue\n"; + ... + run3 \@cmd, \undef, $fh; # child writes to $fh + ... + print $fh "epilogue\n"; + close $fh; + +=item a SCALAR reference + +The referenced scalar is treated as a string to be read from or +written to. In the latter case, the previous content of the string +is overwritten. + + my $out; + run3 \@cmd, \undef, \$out; # child writes into string + run3 \@cmd, \<<EOF; # child reads from string (can use "here" notation) + Input + to + child + EOF + +=item an ARRAY reference + +For C<$stdin>, the elements of C<@$stdin> are simply spooled to the child. + +For C<$stdout> or C<$stderr>, the child's corresponding file descriptor +is read line by line (as determined by the current setting of C<$/>) +into C<@$stdout> or C<@$stderr>, resp. The previous content of the array +is overwritten. + + my @lines; + run3 \@cmd, \undef, \@lines; # child writes into array + +=item a CODE reference + +For C<$stdin>, C<&$stdin> will be called repeatedly (with no arguments) and +the return values are spooled to the child. C<&$stdin> must signal the end of +input by returning C<undef>. + +For C<$stdout> or C<$stderr>, the child's corresponding file descriptor +is read line by line (as determined by the current setting of C<$/>) +and C<&$stdout> or C<&$stderr>, resp., is called with the contents of the line. +Note that there's no end-of-file indication. + + my $i = 0; + sub producer { + return $i < 10 ? "line".$i++."\n" : undef; + } + + run3 \@cmd, \&producer; # child reads 10 lines + +Note that this form of redirecting the child's I/O doesn't imply +any form of concurrency between parent and child - run3()'s method of +operation is the same no matter which form of redirection you specify. + +=back + +If the same value is passed for C<$stdout> and C<$stderr>, then the child +will write both C<STDOUT> and C<STDERR> to the same filehandle. +In general, this means that + + run3 \@cmd, \undef, "foo.txt", "foo.txt"; + run3 \@cmd, \undef, \$both, \$both; + +will DWIM and pass a single file handle to the child for both C<STDOUT> and +C<STDERR>, collecting all into file "foo.txt" or C<$both>. + +=head3 C<\%options> + +The last parameter, C<\%options>, must be a hash reference if present. + +Currently the following +keys are supported: + +=over 4 + +=item C<binmode_stdin>, C<binmode_stdout>, C<binmode_stderr> + +The value must a "layer" as described in L<perlfunc/binmode>. +If specified the corresponding +parameter C<$stdin>, C<$stdout> or C<$stderr>, resp., operates +with the given layer. + +For backward compatibility, a true value that doesn't start with ":" +(e.g. a number) is interpreted as ":raw". If the value is false +or not specified, the default is ":crlf" on Windows and ":raw" otherwise. + +Don't expect that values other than the built-in layers ":raw", ":crlf", +and (on newer Perls) ":bytes", ":utf8", ":encoding(...)" will work. + +=item C<append_stdout>, C<append_stderr> + +If their value is true then the corresponding +parameter C<$stdout> or C<$stderr>, resp., will append the child's output +to the existing "contents" of the redirector. This only makes +sense if the redirector is a simple scalar (the corresponding file +is opened in append mode), a SCALAR reference (the output is +appended to the previous contents of the string) +or an ARRAY reference (the output is C<push>ed onto the +previous contents of the array). + +=item C<return_if_system_error> + +If this is true C<run3> does B<not> throw an exception if C<system> +returns -1 (cf. L<perlfunc/system> for possible +failure scenarios.), but returns true instead. +In this case C<$?> has the value -1 and C<$!> +contains the errno of the failing C<system> call. + +=back + +=head1 HOW IT WORKS + +=over 4 + +=item (1) + +For each redirector C<$stdin>, C<$stdout>, and C<$stderr>, +C<run3()> furnishes a filehandle: + +=over 4 + +=item * + +if the redirector already specifies a filehandle it just uses that + +=item * + +if the redirector specifies a filename, C<run3()> opens the file +in the appropriate mode + +=item * + +in all other cases, C<run3()> opens a temporary file +(using L<tempfile|Temp/tempfile>) + +=back + +=item (2) + +If C<run3()> opened a temporary file for C<$stdin> in step (1), +it writes the data using the specified method (either +from a string, an array or returnd by a function) to the temporary file and rewinds it. + +=item (3) + +C<run3()> saves the parent's C<STDIN>, C<STDOUT> and C<STDERR> by duplicating +them to new filehandles. It duplicates the filehandles from step (1) +to C<STDIN>, C<STDOUT> and C<STDERR>, resp. + +=item (4) + +C<run3()> runs the child by invoking L<system|perlfunc/system> +with C<$cmd> as specified above. + +=item (5) + +C<run3()> restores the parent's C<STDIN>, C<STDOUT> and C<STDERR> saved in step (3). + +=item (6) + +If C<run3()> opened a temporary file for C<$stdout> or C<$stderr> in step (1), +it rewinds it and reads back its contents using the specified method +(either to a string, an array or by calling a function). + +=item (7) + +C<run3()> closes all filehandles that it opened explicitly in step (1). + +=back + +Note that when using temporary files, C<run3()> tries to amortize the overhead +by reusing them (i.e. it keeps them open and rewinds and truncates them +before the next operation). + +=head1 LIMITATIONS + +Often uses intermediate files (determined by File::Temp, and thus by the +File::Spec defaults and the TMPDIR env. variable) for speed, portability and +simplicity. + +Use extrem caution when using C<run3> in a threaded environment if +concurrent calls of C<run3> are possible. Most likely, I/O from different +invocations will get mixed up. The reason is that in most thread +implementations all threads in a process share the same STDIN/STDOUT/STDERR. +Known failures are Perl ithreads on Linux and Win32. Note that C<fork> +on Win32 is emulated via Win32 threads and hence I/O mix up is possible +between forked children here (C<run3> is "fork safe" on Unix, though). + +=head1 DEBUGGING + +To enable debugging use the IPCRUN3DEBUG environment variable to +a non-zero integer value: + + $ IPCRUN3DEBUG=1 myapp + +=head1 PROFILING + +To enable profiling, set IPCRUN3PROFILE to a number to enable emitting profile +information to STDERR (1 to get timestamps, 2 to get a summary report at the +END of the program, 3 to get mini reports after each run) or to a filename to +emit raw data to a file for later analysis. + +=head1 COMPARISON + +Here's how it stacks up to existing APIs: + +=head2 compared to C<system()>, C<qx''>, C<open "...|">, C<open "|..."> + +=over + +=item + + +redirects more than one file descriptor + +=item + + +returns TRUE on success, FALSE on failure + +=item + + +throws an error if problems occur in the parent process (or the pre-exec child) + +=item + + +allows a very perlish interface to Perl data structures and subroutines + +=item + + +allows 1 word invocations to avoid the shell easily: + + run3 ["foo"]; # does not invoke shell + +=item - + +does not return the exit code, leaves it in $? + +=back + +=head2 compared to C<open2()>, C<open3()> + +=over + +=item + + +no lengthy, error prone polling/select loop needed + +=item + + +hides OS dependancies + +=item + + +allows SCALAR, ARRAY, and CODE references to source and sink I/O + +=item + + +I/O parameter order is like C<open3()> (not like C<open2()>). + +=item - + +does not allow interaction with the subprocess + +=back + +=head2 compared to L<IPC::Run::run()|IPC::Run/run> + +=over + +=item + + +smaller, lower overhead, simpler, more portable + +=item + + +no select() loop portability issues + +=item + + +does not fall prey to Perl closure leaks + +=item - + +does not allow interaction with the subprocess (which +IPC::Run::run() allows by redirecting subroutines) + +=item - + +lacks many features of C<IPC::Run::run()> (filters, pipes, +redirects, pty support) + +=back + +=head1 COPYRIGHT + +Copyright 2003, R. Barrie Slaymaker, Jr., All Rights Reserved + +=head1 LICENSE + +You may use this module under the terms of the BSD, Artistic, or GPL licenses, +any version. + +=head1 AUTHOR + +Barrie Slaymaker E<lt>C<barries@slaysys.com>E<gt> + +Ricardo SIGNES E<lt>C<rjbs@cpan.org>E<gt> performed some routine maintenance in +2005, thanks to help from the following ticket and/or patch submitters: Jody +Belka, Roderich Schupp, David Morel, and anonymous others. + +=cut diff --git a/tests/README b/tests/README new file mode 100644 index 00000000..d6b7cf51 --- /dev/null +++ b/tests/README @@ -0,0 +1,11 @@ +IPC::Run3 code is included, which is copyright 2003 by R. Barrie Slaymaker and +makes the disclaimer: + +You may use this module under the terms of the BSD, Artistic, or GPL licenses, +any version. + +For more information, refer to: + http://search.cpan.org/~rjbs/IPC-Run3-0.043/lib/IPC/Run3.pm + +The rest of the code and data is subject to the cdec BSD license. + diff --git a/tests/run-system-tests.pl b/tests/run-system-tests.pl new file mode 100755 index 00000000..738000dc --- /dev/null +++ b/tests/run-system-tests.pl @@ -0,0 +1,121 @@ +#!/usr/bin/perl -w +use strict; +my $script_dir; BEGIN { use Cwd qw/ abs_path cwd /; use File::Basename; $script_dir = dirname(abs_path($0)); push @INC, $script_dir; } + +use IPC::Run3; +use File::Temp qw ( tempdir ); +my $TEMP_DIR = tempdir( CLEANUP => 1 ); + +#my $cwd = cwd(); +#die "Sanity failed: $cwd" unless -d $cwd; +my $DECODER = "$script_dir/../src/cdec"; +my $FILTER = "$script_dir/tools/filter-stderr.pl"; +my $COMPARE_STATS = "$script_dir/tools/compare-statistics.pl"; + +die "Can't find $DECODER" unless -f $DECODER; +die "Can't execute $DECODER" unless -x $DECODER; +die "Can't find $FILTER" unless -f $FILTER; +die "Can't execute $FILTER" unless -x $FILTER; +die "Can't find $COMPARE_STATS" unless -f $COMPARE_STATS; +die "Can't execute $COMPARE_STATS" unless -x $COMPARE_STATS; + +my $TEST_DIR = "$script_dir/system_tests"; +opendir DIR, $TEST_DIR or die "Can't open $TEST_DIR: $!"; +#my @test_dirs = grep { /^\./ && −d "$some_dir/$_" } readdir(DIR); +my @tests = grep { !/^\./ && -d "$TEST_DIR/$_" } readdir(DIR); +closedir DIR; + +print STDERR " DECODER: $DECODER\n"; +print STDERR " TESTS: @tests\n"; +print STDERR "TEMP DIR: $TEMP_DIR\n"; + +my $FAIL = 0; +my $PASS = 0; +for my $test (@tests) { + print "TEST: $test\n"; + chdir "$TEST_DIR/$test" or die "Can't chdir to $TEST_DIR/$test: $!"; + my $CMD = "$DECODER"; + unless (-f 'gold.statistics') { + print " missing gold.statistics -- SKIPPING\n"; + $FAIL++; + next; + } + unless (-f 'gold.stdout') { + print " missing gold.stdout -- SKIPPING\n"; + $FAIL++; + next; + } + if (-f 'cdec.ini') { + $CMD .= ' -c cdec.ini'; + } + if (-f 'weights') { + $CMD .= ' -w weights'; + } + if (-f 'input.txt') { + $CMD .= ' -i input.txt'; + } + + run3 $CMD, \undef, "$TEMP_DIR/stdout", "$TEMP_DIR/stderr"; + if ($? != 0) { + print STDERR " non-zero exit! command: $CMD\n"; + $FAIL++; + } else { + die unless -f "$TEMP_DIR/stdout"; + my $failed = 0; + run3 "diff gold.stdout $TEMP_DIR/stdout"; + if ($? != 0) { + print STDERR " FAILED differences in output!\n"; + $failed = 1; + } + die unless -f "$TEMP_DIR/stderr"; + run3 "$FILTER", "$TEMP_DIR/stderr", "$TEMP_DIR/test.statistics"; + if ($? != 0) { + print STDERR " non-zero exit: $FILTER\n"; + $FAIL++; + next; + } + my @lines; + run3 "$COMPARE_STATS gold.statistics", "$TEMP_DIR/test.statistics", \@lines; + if (scalar @lines != 1) { + print STDERR " unexpected output: @lines\n"; + $FAIL++; + next; + } + my $l = $lines[0]; chomp $l; + if ($l =~ /^(\d+) (\d+)$/) { + my $passes = $1; + my $total = $2; + my $pct = $passes * 100 / $total; + $pct = sprintf "%.2f", $pct; + + if ($total == $passes) { + if ($failed) { + print " (decoder statistics match, though)\n"; + } else { + print " PASSED\n"; + } + } else { + if ($failed) { + print " ($pct of decoder search statistics match)\n"; + } else { + print " FAILED $pct of decoder search statistics match\n"; + } + } + } else { + $failed = 1; + print STDERR " bad format: $l\n"; + } + if ($failed) { $FAIL++; } else { $PASS++; } + } +} + +my $TOT = $PASS + $FAIL; +print "\nSUMMARY: $PASS / $TOT TESTS PASSED\n"; +if ($FAIL != 0) { + print " !!! THERE WERE FAILURES - DECODER IS ACTING SUSPICIOUSLY !!!\n\n"; + exit 1; +} else { + print "\n"; + exit 0; +} + diff --git a/tests/system_tests/australia-align/australia.scfg.gz b/tests/system_tests/australia-align/australia.scfg.gz Binary files differnew file mode 100644 index 00000000..0cd70f1f --- /dev/null +++ b/tests/system_tests/australia-align/australia.scfg.gz diff --git a/tests/system_tests/australia-align/cdec.ini b/tests/system_tests/australia-align/cdec.ini new file mode 100644 index 00000000..eba98941 --- /dev/null +++ b/tests/system_tests/australia-align/cdec.ini @@ -0,0 +1,3 @@ +formalism=scfg +grammar=australia.scfg.gz +aligner=true diff --git a/tests/system_tests/australia-align/gold.statistics b/tests/system_tests/australia-align/gold.statistics new file mode 100644 index 00000000..8101bd40 --- /dev/null +++ b/tests/system_tests/australia-align/gold.statistics @@ -0,0 +1,8 @@ +-lm_nodes 77 +-lm_edges 244232 +-lm_paths 3.79555e+28 +-lm_trans australia is have diplomatic relations with north korea one of the few countries . +-lm_viterbi -12.7893 +constr_nodes 111 +constr_edges 305 +constr_paths 9899 diff --git a/tests/system_tests/australia-align/gold.stdout b/tests/system_tests/australia-align/gold.stdout new file mode 100644 index 00000000..310ac348 --- /dev/null +++ b/tests/system_tests/australia-align/gold.stdout @@ -0,0 +1 @@ +0-0 1-1 2-11 3-12 3-13 4-7 4-8 5-9 5-10 6-4 7-5 8-6 9-2 9-3 10-14 diff --git a/tests/system_tests/australia-align/input.txt b/tests/system_tests/australia-align/input.txt new file mode 100644 index 00000000..482d3e4b --- /dev/null +++ b/tests/system_tests/australia-align/input.txt @@ -0,0 +1 @@ +澳洲 是 与 北韩 有 邦交 的 少数 国家 之一 。 ||| australia is one of the few countries that has diplomatic relations with north korea . diff --git a/tests/system_tests/australia-align/stderr b/tests/system_tests/australia-align/stderr new file mode 100644 index 00000000..c49e3bd0 --- /dev/null +++ b/tests/system_tests/australia-align/stderr @@ -0,0 +1,48 @@ +cdec v1.0 (c) 2009 by Chris Dyer +Configuration file: cdec.ini +Reading SCFG grammar from australia.scfg.gz + 33737 rules read. +Reading weights from weights +Loaded 7 feature weights +Reading input from input.txt + +INPUT: 澳洲 是 与 北韩 有 邦交 的 少数 国家 之一 。 ||| australia is one of the few ... + id = 0 + Goal category: [S] + ........... + -LM forest (nodes/edges): 77/244232 + -LM forest (paths): 3.79555e+28 + -LM Viterbi: australia is have diplomatic relations with north korea one of the few countries . + -LM Viterbi: -12.7893 + Goal category: [CAT_76] + ............... + Constr. forest (nodes/edges): 111/305 + Constr. forest (paths): 9899 + Constr. VitTree: (CAT_76 (CAT_75 (CAT_73 (CAT_13 (CAT_12 australia is)) (CAT_66 (CAT_55 (CAT_40 one of the few countries) (CAT_5 that has) diplomatic relations) with north korea)) (CAT_11 .))) + 1 0.367526 0 0 0 0 0 0 0 0 0 0 0 0 0 + 0.367526 0.999979 0.100222 0.0997905 0.0210297 0 0 0.00481735 8.02707e-12 0 0 0 0 0 0.0587194 + 0 9.42288e-12 0 8.53331e-07 3.77376e-06 0 0 8.02707e-12 0.0310404 0 0 0.999994 0.947965 0.947965 9.71274e-07 + 0 0 0 0 0 0 0 0 0 0 0 0.947965 1 1 0 + 0 0 7.54036e-06 6.96573e-05 0 0 0 0.742293 0.999244 0 0 0.0310422 0 0 0.000677908 + 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 + 0 0.02397 0.328274 0.330722 0.961649 0.321117 0.263254 0.257688 0.000755969 0 0 3.82289e-06 0 0 0.0265549 + 0 0 0.130232 0.130232 0.321285 1 0.565141 0 0 0 0 0 0 0 0 + 0 0 0.108129 0.10919 0.260861 0.565141 1 0 0 0 0 0 0 0 2.53515e-12 + 0 0.100047 0.999815 0.996401 0.365844 0.130232 0.108128 0 0 0 0 0 0 0 0.112835 + 0 0.058724 0.112814 0.112464 0.0214041 0 0 0.00590826 0 0 0 0 0 0 1 + + 012345678901234 +0*..............0 +1.*.............1 +2...........*...2 +3............**.3 +4.......**......4 +5.........**....5 +6....*..........6 +7.....*.........7 +8......*........8 +9..**...........9 +0..............*0 + 012345678901234 + +Translation: 1.41218 secs (1 calls) diff --git a/tests/system_tests/australia-align/weights b/tests/system_tests/australia-align/weights new file mode 100644 index 00000000..a280184c --- /dev/null +++ b/tests/system_tests/australia-align/weights @@ -0,0 +1,7 @@ +WordPenalty -2.844814 +LanguageModel 1.0 +PhraseModel_0 -1.066893 +PhraseModel_1 -0.752247 +PhraseModel_2 -0.589793 +PassThrough -20.0 +Glue 0 diff --git a/tests/system_tests/australia/australia.scfg.gz b/tests/system_tests/australia/australia.scfg.gz Binary files differnew file mode 100644 index 00000000..0cd70f1f --- /dev/null +++ b/tests/system_tests/australia/australia.scfg.gz diff --git a/tests/system_tests/australia/cdec.ini b/tests/system_tests/australia/cdec.ini new file mode 100644 index 00000000..dfc3cb20 --- /dev/null +++ b/tests/system_tests/australia/cdec.ini @@ -0,0 +1,2 @@ +formalism=scfg +grammar=australia.scfg.gz diff --git a/tests/system_tests/australia/gold.statistics b/tests/system_tests/australia/gold.statistics new file mode 100644 index 00000000..050d12dd --- /dev/null +++ b/tests/system_tests/australia/gold.statistics @@ -0,0 +1,8 @@ +-lm_nodes 77 +-lm_edges 244232 +-lm_paths 3.79555e+28 +-lm_trans australia is have diplomatic relations with north korea one of the few countries . +-lm_viterbi -12.7893 +constr_nodes 185 +constr_edges 827 +constr_paths 279358 diff --git a/tests/system_tests/australia/gold.stdout b/tests/system_tests/australia/gold.stdout new file mode 100644 index 00000000..e69de29b --- /dev/null +++ b/tests/system_tests/australia/gold.stdout diff --git a/tests/system_tests/australia/input.txt b/tests/system_tests/australia/input.txt new file mode 100644 index 00000000..f14bf535 --- /dev/null +++ b/tests/system_tests/australia/input.txt @@ -0,0 +1 @@ +澳洲 是 与 北韩 有 邦交 的 少数 国家 之一 。 ||| ((('australia',0,1),),(('is',0,1),),(('one',0,1),),(('of',0,1),),(('the',0,4),('a',0,4),('a',0,1),('the',0,1),),(('small',0,1),('tiny',0,1),('miniscule',0,1),('handful',0,2),),(('number',0,1),('group',0,1),),(('of',0,2),),(('few',0,1),),(('countries',0,1),),(('that',0,1),),(('has',0,1),('have',0,1),),(('diplomatic',0,1),),(('relations',0,1),),(('with',0,1),),(('north',0,1),),(('korea',0,1),),(('.',0,1),),) diff --git a/tests/system_tests/australia/stderr b/tests/system_tests/australia/stderr new file mode 100644 index 00000000..c41fb9fb --- /dev/null +++ b/tests/system_tests/australia/stderr @@ -0,0 +1,22 @@ +cdec v1.0 (c) 2009 by Chris Dyer +Configuration file: cdec.ini +Reading SCFG grammar from australia.scfg.gz + 33737 rules read. +Reading weights from weights +Loaded 7 feature weights +Reading input from input.txt + +INPUT: 澳洲 是 与 北韩 有 邦交 的 少数 国家 之一 。 ||| ... + id = 0 + Goal category: [S] + ........... + -LM forest (nodes/edges): 77/244232 + -LM forest (paths): 3.79555e+28 + -LM Viterbi: australia is have diplomatic relations with north korea one of the few countries . + -LM Viterbi: -12.7893 + Goal category: [CAT_76] + .................. + Constr. forest (nodes/edges): 185/827 + Constr. forest (paths): 279358 + Constr. VitTree: (CAT_76 (CAT_75 (CAT_1 (CAT_0 australia)) (CAT_74 is (CAT_31 one of (CAT_20 a few countries)) that (CAT_36 have diplomatic relations (CAT_15 with north korea)) .))) +Translation: 2.69099 secs (1 calls) diff --git a/tests/system_tests/australia/weights b/tests/system_tests/australia/weights new file mode 100644 index 00000000..a280184c --- /dev/null +++ b/tests/system_tests/australia/weights @@ -0,0 +1,7 @@ +WordPenalty -2.844814 +LanguageModel 1.0 +PhraseModel_0 -1.066893 +PhraseModel_1 -0.752247 +PhraseModel_2 -0.589793 +PassThrough -20.0 +Glue 0 diff --git a/tests/system_tests/controlled_synparse/cdec.ini b/tests/system_tests/controlled_synparse/cdec.ini new file mode 100644 index 00000000..b5c7d4d8 --- /dev/null +++ b/tests/system_tests/controlled_synparse/cdec.ini @@ -0,0 +1,3 @@ +formalism=scfg +grammar=scfg.biparse.gz +k_best=100 diff --git a/tests/system_tests/controlled_synparse/gold.statistics b/tests/system_tests/controlled_synparse/gold.statistics new file mode 100644 index 00000000..3885cf94 --- /dev/null +++ b/tests/system_tests/controlled_synparse/gold.statistics @@ -0,0 +1,16 @@ +-lm_nodes 11 +-lm_edges 18 +-lm_paths 18 +-lm_trans a d c b +-lm_trans 0 +constr_nodes 8 +constr_edges 8 +constr_paths 1 +-lm_nodes 11 +-lm_edges 18 +-lm_paths 18 +-lm_trans a d c b +-lm_trans 0 +constr_nodes 12 +constr_edges 14 +constr_paths 3 diff --git a/tests/system_tests/controlled_synparse/gold.stdout b/tests/system_tests/controlled_synparse/gold.stdout new file mode 100644 index 00000000..f3e0ce08 --- /dev/null +++ b/tests/system_tests/controlled_synparse/gold.stdout @@ -0,0 +1,36 @@ +0 ||| c b c d ||| F1bad2=1;F5=1;F6=1;F7=1;Glue=3 +0 ||| c d c b e ||| F1bad2=1;F4=1;F5=1;F7=1;Glue=1 +0 ||| c b c d ||| F1bad2=1;F6=1;F7=1;F9=1;Glue=2 +0 ||| b d c b e ||| F1bad1=1;F4=1;F5=1;F7=1;Glue=1 +0 ||| c b c d ||| F1bad2=1;F3=1;F5=1;F7=1;Glue=1 +0 ||| a d c b e ||| F1=1;F4=1;F5=1;F7=1;Glue=1 +0 ||| b b c d ||| F1bad1=1;F3=1;F5=1;F7=1;Glue=1 +0 ||| c d c b ||| F1bad2=1;F2=1;F5=1;F7=1;Glue=1 +0 ||| a b c d ||| F1=1;F3=1;F5=1;F7=1;Glue=1 +0 ||| b b c d ||| F1bad1=1;F5=1;F6=1;F7=1;Glue=3 +0 ||| b d c b ||| F1bad1=1;F2=1;F5=1;F7=1;Glue=1 +0 ||| a d c b ||| F1=1;F2=1;F5=1;F7=1;Glue=1 +0 ||| b b c d ||| F1bad1=1;F6=1;F7=1;F9=1;Glue=2 +0 ||| c c b d ||| F1bad2=1;F6=1;F7=1;F8=1;Glue=2 +0 ||| a b c d ||| F1=1;F6=1;F7=1;F9=1;Glue=2 +0 ||| a b c d ||| F1=1;F5=1;F6=1;F7=1;Glue=3 +0 ||| b c b d ||| F1bad1=1;F6=1;F7=1;F8=1;Glue=2 +0 ||| a c b d ||| F1=1;F6=1;F7=1;F8=1;Glue=2 +1 ||| c b c d ||| F1bad2=1;F5=1;F6=1;F7=1;Glue=3 +1 ||| c d c b e ||| F1bad2=1;F4=1;F5=1;F7=1;Glue=1 +1 ||| c b c d ||| F1bad2=1;F6=1;F7=1;F9=1;Glue=2 +1 ||| b d c b e ||| F1bad1=1;F4=1;F5=1;F7=1;Glue=1 +1 ||| c b c d ||| F1bad2=1;F3=1;F5=1;F7=1;Glue=1 +1 ||| a d c b e ||| F1=1;F4=1;F5=1;F7=1;Glue=1 +1 ||| b b c d ||| F1bad1=1;F3=1;F5=1;F7=1;Glue=1 +1 ||| c d c b ||| F1bad2=1;F2=1;F5=1;F7=1;Glue=1 +1 ||| a b c d ||| F1=1;F3=1;F5=1;F7=1;Glue=1 +1 ||| b b c d ||| F1bad1=1;F5=1;F6=1;F7=1;Glue=3 +1 ||| b d c b ||| F1bad1=1;F2=1;F5=1;F7=1;Glue=1 +1 ||| a d c b ||| F1=1;F2=1;F5=1;F7=1;Glue=1 +1 ||| b b c d ||| F1bad1=1;F6=1;F7=1;F9=1;Glue=2 +1 ||| c c b d ||| F1bad2=1;F6=1;F7=1;F8=1;Glue=2 +1 ||| a b c d ||| F1=1;F6=1;F7=1;F9=1;Glue=2 +1 ||| a b c d ||| F1=1;F5=1;F6=1;F7=1;Glue=3 +1 ||| b c b d ||| F1bad1=1;F6=1;F7=1;F8=1;Glue=2 +1 ||| a c b d ||| F1=1;F6=1;F7=1;F8=1;Glue=2 diff --git a/tests/system_tests/controlled_synparse/input.txt b/tests/system_tests/controlled_synparse/input.txt new file mode 100644 index 00000000..2dbc09c8 --- /dev/null +++ b/tests/system_tests/controlled_synparse/input.txt @@ -0,0 +1,2 @@ +A B C D ||| a d c b +A B C D ||| a b c d diff --git a/tests/system_tests/controlled_synparse/scfg.biparse.gz b/tests/system_tests/controlled_synparse/scfg.biparse.gz Binary files differnew file mode 100644 index 00000000..aaf1c41d --- /dev/null +++ b/tests/system_tests/controlled_synparse/scfg.biparse.gz diff --git a/tests/system_tests/ftrans/cdec.ini b/tests/system_tests/ftrans/cdec.ini new file mode 100644 index 00000000..e8e004d6 --- /dev/null +++ b/tests/system_tests/ftrans/cdec.ini @@ -0,0 +1,3 @@ +formalism=fst +k_best=1000 +grammar=ftrans.pt diff --git a/tests/system_tests/ftrans/ftrans.pt b/tests/system_tests/ftrans/ftrans.pt new file mode 100644 index 00000000..7e8c6f59 --- /dev/null +++ b/tests/system_tests/ftrans/ftrans.pt @@ -0,0 +1,4 @@ +b ||| B ||| F1=1 OtherFeat=1 +c ||| C ||| F2=0.2 +b c ||| BC ||| F3=0.4 +c b ||| CB ||| F4=1 diff --git a/tests/system_tests/ftrans/gold.statistics b/tests/system_tests/ftrans/gold.statistics new file mode 100644 index 00000000..40cdfd4c --- /dev/null +++ b/tests/system_tests/ftrans/gold.statistics @@ -0,0 +1,5 @@ +-lm_nodes 13 +-lm_edges 16 +-lm_paths 4 +-lm_trans CB +-lm_viterbi 101 diff --git a/tests/system_tests/ftrans/gold.stdout b/tests/system_tests/ftrans/gold.stdout new file mode 100644 index 00000000..25c615d1 --- /dev/null +++ b/tests/system_tests/ftrans/gold.stdout @@ -0,0 +1,4 @@ +0 ||| CB ||| F4=1;Inv=1 +0 ||| BC ||| F3=0.4;Mono=1 +0 ||| B C ||| F1=1;OtherFeat=1;F2=0.2;Mono=1 +0 ||| C B ||| F1=1;OtherFeat=1;F2=0.2;Inv=1 diff --git a/tests/system_tests/ftrans/input.cfg b/tests/system_tests/ftrans/input.cfg new file mode 100644 index 00000000..b602c1cb --- /dev/null +++ b/tests/system_tests/ftrans/input.cfg @@ -0,0 +1,5 @@ +[S] ||| [A] +[A] ||| [B] [C] ||| Mono=1 +[A] ||| [C] [B] ||| Inv=1 +[B] ||| b +[C] ||| c diff --git a/tests/system_tests/ftrans/input.txt b/tests/system_tests/ftrans/input.txt new file mode 100644 index 00000000..aa37b2e7 --- /dev/null +++ b/tests/system_tests/ftrans/input.txt @@ -0,0 +1 @@ +{"rules":[1,"[B] ||| b ||| b",2,"[C] ||| c ||| c",3,"[A] ||| [B,1] [C,2] ||| [1] [2] ||| Mono=1",4,"[A] ||| [C,1] [B,2] ||| [1] [2] ||| Inv=1",5,"[S] ||| [A,1] ||| [1]"],"features":["Mono","Inv"],"edges":[{"tail":[],"feats":[],"rule":1}],"node":{"in_edges":[0],"cat":"B"},"edges":[{"tail":[],"feats":[],"rule":2}],"node":{"in_edges":[1],"cat":"C"},"edges":[{"tail":[0,1],"feats":[0,1],"rule":3},{"tail":[1,0],"feats":[1,1],"rule":4}],"node":{"in_edges":[2,3],"cat":"A"},"edges":[{"tail":[2],"feats":[],"rule":5}],"node":{"in_edges":[4],"cat":"S"}} diff --git a/tests/system_tests/ftrans/weights b/tests/system_tests/ftrans/weights new file mode 100644 index 00000000..89d3a6d3 --- /dev/null +++ b/tests/system_tests/ftrans/weights @@ -0,0 +1,5 @@ +F1 1 +F3 10 +F4 100 +Mono 2 +Inv 1 diff --git a/tests/system_tests/unsup-align/cdec.ini b/tests/system_tests/unsup-align/cdec.ini new file mode 100644 index 00000000..4016a201 --- /dev/null +++ b/tests/system_tests/unsup-align/cdec.ini @@ -0,0 +1,6 @@ +aligner=true +grammar=unsup-align.lex-grammar +cubepruning_pop_limit=1000000 +formalism=lexcrf +feature_function=RelativeSentencePosition +feature_function=MarkovJump diff --git a/tests/system_tests/unsup-align/gold.statistics b/tests/system_tests/unsup-align/gold.statistics new file mode 100644 index 00000000..afc49bfc --- /dev/null +++ b/tests/system_tests/unsup-align/gold.statistics @@ -0,0 +1,96 @@ +-lm_nodes 2 +-lm_edges 3 +-lm_paths 2 +-lm_trans blue +-lm_trans 0.4528 ++lm_nodes 2 ++lm_edges 3 ++lm_paths 2 ++lm_trans blue +constr_nodes 3 +constr_edges 3 +constr_paths 1 +-lm_nodes 2 +-lm_edges 4 +-lm_paths 3 +-lm_trans house +-lm_trans 0.673643 ++lm_nodes 2 ++lm_edges 4 ++lm_paths 3 ++lm_trans house +constr_nodes 3 +constr_edges 3 +constr_paths 1 +-lm_nodes 4 +-lm_edges 16 +-lm_paths 49 +-lm_trans the the +-lm_viterbi 1.42559 ++lm_nodes 7 ++lm_edges 20 ++lm_paths 49 ++lm_trans the house +constr_nodes 8 +constr_edges 11 +constr_paths 4 +-lm_nodes 4 +-lm_edges 12 +-lm_paths 25 +-lm_trans house house +-lm_viterbi 1.34729 ++lm_nodes 7 ++lm_edges 16 ++lm_paths 25 ++lm_trans house blue +constr_nodes 8 +constr_edges 11 +constr_paths 4 +-lm_nodes 4 +-lm_edges 14 +-lm_paths 36 +-lm_trans the the +-lm_viterbi 1.42559 ++lm_nodes 7 ++lm_edges 18 ++lm_paths 36 ++lm_trans the the +constr_nodes 8 +constr_edges 11 +constr_paths 4 +-lm_nodes 2 +-lm_edges 5 +-lm_paths 4 +-lm_trans the +-lm_trans 0.712796 ++lm_nodes 2 ++lm_edges 5 ++lm_paths 4 ++lm_trans the +constr_nodes 3 +constr_edges 3 +constr_paths 1 +-lm_nodes 4 +-lm_edges 14 +-lm_paths 36 +-lm_trans the the +-lm_viterbi 1.42559 ++lm_nodes 7 ++lm_edges 18 ++lm_paths 36 ++lm_trans the the +constr_nodes 8 +constr_edges 11 +constr_paths 4 +-lm_nodes 4 +-lm_edges 10 +-lm_paths 16 +-lm_trans thet thet +-lm_trans 0 ++lm_nodes 7 ++lm_edges 14 ++lm_paths 16 ++lm_trans end thet +constr_nodes 8 +constr_edges 11 +constr_paths 4 diff --git a/tests/system_tests/unsup-align/gold.stdout b/tests/system_tests/unsup-align/gold.stdout new file mode 100644 index 00000000..b94ab862 --- /dev/null +++ b/tests/system_tests/unsup-align/gold.stdout @@ -0,0 +1,8 @@ +0-0 +0-0 +0-0 1-1 +0-1 1-0 +0-0 1-1 +0-0 +0-0 1-1 +0-0 1-1 diff --git a/tests/system_tests/unsup-align/input.txt b/tests/system_tests/unsup-align/input.txt new file mode 100644 index 00000000..b97f81cf --- /dev/null +++ b/tests/system_tests/unsup-align/input.txt @@ -0,0 +1,8 @@ +bleue ||| blue +maison ||| house +la maison ||| the house +maison bleue ||| blue house +la fleur ||| the flower +la ||| the +la bouche ||| the mouth +le fin ||| thet end diff --git a/tests/system_tests/unsup-align/unsup-align.lex-grammar b/tests/system_tests/unsup-align/unsup-align.lex-grammar new file mode 100644 index 00000000..30b1a61a --- /dev/null +++ b/tests/system_tests/unsup-align/unsup-align.lex-grammar @@ -0,0 +1,17 @@ +bleue ||| blue ||| F1000001=1 +bleue ||| house ||| F1000002=1 +bouche ||| the ||| F1000003=1 +bouche ||| mouth ||| F1000004=1 +fin ||| thet ||| F1000005=1 +fin ||| end ||| F1000006=1 +fleur ||| the ||| F1000007=1 +fleur ||| flower ||| F1000008=1 +la ||| the ||| F1000009=1 +la ||| house ||| F1000010=1 +la ||| flower ||| F1000011=1 +la ||| mouth ||| F1000012=1 +le ||| thet ||| F1000013=1 +le ||| end ||| F1000014=1 +maison ||| house ||| F1000015=1 +maison ||| the ||| F1000016=1 +maison ||| blue ||| F1000017=1 diff --git a/tests/system_tests/unsup-align/weights b/tests/system_tests/unsup-align/weights new file mode 100644 index 00000000..7d9012c5 --- /dev/null +++ b/tests/system_tests/unsup-align/weights @@ -0,0 +1,19 @@ +RelativeSentencePosition -0.1 +MarkovJump -0.2 +F1000001 0.45280036748928199 +F1000002 -0.30603801277140658 +F1000003 0.0087200168696079348 +F1000004 0.25201383750998718 +F1000005 0 +F1000006 0 +F1000007 0.0087200038242073886 +F1000008 0.25201333505199081 +F1000009 0.7127956550520711 +F1000010 -0.4803381673023227 +F1000011 -0.42444387021307117 +F1000012 -0.42444295555037082 +F1000013 0 +F1000014 0 +F1000015 0.67364292245615709 +F1000016 -0.38087265233441997 +F1000017 -0.34457059973906817 diff --git a/tests/tools/compare-statistics.pl b/tests/tools/compare-statistics.pl new file mode 100755 index 00000000..3122c534 --- /dev/null +++ b/tests/tools/compare-statistics.pl @@ -0,0 +1,49 @@ +#!/usr/bin/perl -w +use strict; + +die "Usage: $0 gold.statistics < test.statistics\n" unless scalar @ARGV == 1; + +my $gold_file = shift @ARGV; +open G, "<$gold_file" or die "Can't read $gold_file: $!"; +my @gold_keys = (); +my @gold_vals = (); +while(<G>) { + chomp; + if (/^([^ ]+)\s*(.*)$/) { + push @gold_keys, $1; + push @gold_vals, $2; + } else { + die "Unexpected line in $gold_file: $_\n"; + } +} + +my $sc = 0; +my $MATCH = 0; +my $MISMATCH = 0; +while(<>) { + my $gold_key = $gold_keys[$sc]; + my $gold_val = $gold_vals[$sc]; + $sc++; + if (/^([^ ]+)\s*(.*)$/) { + my $test_key = $1; + my $test_val = $2; + if ($test_key ne $gold_key) { + die "Missing key in output! Expected '$gold_key' but got '$test_key'\n"; + } + if ($gold_val ne 'IGNORE') { + if ($gold_val eq $test_val) { $MATCH++; } else { + $MISMATCH++; + print STDERR "[VALUE FAILURE] key: '$gold_key'\n expected value: '$gold_val'\n actual value: '$test_val'\n"; + } + } + } else { + die "Unexpected line in test data: $_\n"; + } +} + +my $TOT = $MISMATCH + $MATCH; + +print "$MATCH $TOT\n"; + +if ($MISMATCH > 0) { exit 1; } else { exit 0; } + diff --git a/tests/tools/filter-stderr.pl b/tests/tools/filter-stderr.pl new file mode 100755 index 00000000..29a97298 --- /dev/null +++ b/tests/tools/filter-stderr.pl @@ -0,0 +1,30 @@ +#!/usr/bin/perl -w +use strict; + +my $REAL = '(\+|-)?[1-9][0-9]*(\.[0-9]*)?|(\+|-)?[1-9]\.[0-9]*e(\+|-)?[0-9]+'; + +while(<>) { +if (/-LM forest\s+\(nodes\/edges\): (\d+)\/(\d+)/) { print "-lm_nodes $1\n-lm_edges $2\n"; } +if (/-LM forest\s+\(paths\): (.+)$/) { print "-lm_paths $1\n"; } +# -LM Viterbi: -12.7893 +if (/-LM\s+Viterbi:\s+($REAL)/) { + print "-lm_viterbi $1\n"; +} elsif (/-LM\s+Viterbi:\s+(.+)$/) { + # -LM Viterbi: australia is have diplomatic relations with north korea one of the few countries . + print "-lm_trans $1\n"; +} +#Constr. forest (nodes/edges): 111/305 +#Constr. forest (paths): 9899 +if (/Constr\. forest\s+\(nodes\/edges\): (\d+)\/(\d+)/) { print "constr_nodes $1\nconstr_edges $2\n"; } +if (/Constr\. forest\s+\(paths\): (.+)$/) { print "constr_paths $1\n"; } + +if (/\+LM forest\s+\(nodes\/edges\): (\d+)\/(\d+)/) { print "+lm_nodes $1\n+lm_edges $2\n"; } +if (/\+LM forest\s+\(paths\): (.+)$/) { print "+lm_paths $1\n"; } +if (/\+LM\s+Viterbi:\s+($REAL)/) { + print "+lm_viterbi $1\n"; +} elsif (/\+LM\s+Viterbi:\s+(.+)$/) { + # -LM Viterbi: australia is have diplomatic relations with north korea one of the few countries . + print "+lm_trans $1\n"; +} + +} |