You are on page 1of 645

my $thing = TAP::Whatever->new();

$thing->callback( event => sub {


# do something interesting
} );
=head1 DESCRIPTION
C<TAP::Base> provides callback management.
=head1 METHODS
=head2 Class Methods
=cut
sub _initialize {
my ( $self, $arg_for, $ok_callback ) = @_;
my %ok_map = map { $_ => 1 } @$ok_callback;
$self->{ok_callbacks} = \%ok_map;
if ( my $cb = delete $arg_for->{callbacks} ) {
while ( my ( $event, $callback ) = each %$cb ) {
$self->callback( $event, $callback );
}
}
return $self;
}
=head3 C<callback>
Install a callback for a named event.
=cut
sub callback {
my ( $self, $event, $callback ) = @_;
my %ok_map = %{ $self->{ok_callbacks} };
$self->_croak('No callbacks may be installed')
unless %ok_map;
$self->_croak( "Callback $event is not supported. Valid callbacks are "
. join( ', ', sort keys %ok_map ) )
unless exists $ok_map{$event};
push @{ $self->{code_for}{$event} }, $callback;
return;
}
sub _has_callbacks {
my $self = shift;
return keys %{ $self->{code_for} } != 0;
}
sub _callback_for {
my ( $self, $event ) = @_;
return $self->{code_for}{$event};
}
sub _make_callback {
my $self = shift;
my $event = shift;
my $cb = $self->_callback_for($event);
return unless defined $cb;
return map { $_->(@_) } @$cb;
}
=head3 C<get_time>
Return the current time using Time::HiRes if available.
=cut
sub get_time { return time() }
=head3 C<time_is_hires>
Return true if the time returned by get_time is high resolution (i.e. if Time::H
iRes is available).
=cut
sub time_is_hires { return GOT_TIME_HIRES }
1;
package TAP::Harness;
use strict;
use Carp;
use File::Spec;
use File::Path;
use IO::Handle;
use TAP::Base;
use vars qw($VERSION @ISA);
@ISA = qw(TAP::Base);
=head1 NAME
TAP::Harness - Run test scripts with statistics
=head1 VERSION
Version 3.17
=cut
$VERSION = '3.17';
$ENV{HARNESS_ACTIVE} = 1;
$ENV{HARNESS_VERSION} = $VERSION;
END {
# For VMS.
delete $ENV{HARNESS_ACTIVE};
delete $ENV{HARNESS_VERSION};
}
=head1 DESCRIPTION
This is a simple test harness which allows tests to be run and results
automatically aggregated and output to STDOUT.
=head1 SYNOPSIS
use TAP::Harness;
my $harness = TAP::Harness->new( \%args );
$harness->runtests(@tests);
=cut
my %VALIDATION_FOR;
my @FORMATTER_ARGS;
sub _error {
my $self = shift;
return $self->{error} unless @_;
$self->{error} = shift;
}
BEGIN {
@FORMATTER_ARGS = qw(
directives verbosity timer failures comments errors stdout color
show_count normalize
);
%VALIDATION_FOR = (
lib => sub {
my ( $self, $libs ) = @_;
$libs = [$libs] unless 'ARRAY' eq ref $libs;
return [ map {"-I$_"} @$libs ];
},
switches => sub { shift; shift },
exec => sub { shift; shift },
merge => sub { shift; shift },
aggregator_class => sub { shift; shift },
formatter_class => sub { shift; shift },
multiplexer_class => sub { shift; shift },
parser_class => sub { shift; shift },
scheduler_class => sub { shift; shift },
formatter => sub { shift; shift },
jobs => sub { shift; shift },
test_args => sub { shift; shift },
ignore_exit => sub { shift; shift },
rules => sub { shift; shift },
);
for my $method ( sort keys %VALIDATION_FOR ) {
no strict 'refs';
if ( $method eq 'lib' || $method eq 'switches' ) {
*{$method} = sub {
my $self = shift;
unless (@_) {
$self->{$method} ||= [];
return wantarray
? @{ $self->{$method} }
: $self->{$method};
}
$self->_croak("Too many arguments to method '$method'")
if @_ > 1;
my $args = shift;
$args = [$args] unless ref $args;
$self->{$method} = $args;
return $self;
};
}
else {
*{$method} = sub {
my $self = shift;
return $self->{$method} unless @_;
$self->{$method} = shift;
};
}
}
for my $method (@FORMATTER_ARGS) {
no strict 'refs';
*{$method} = sub {
my $self = shift;
return $self->formatter->$method(@_);
};
}
}
##############################################################################
=head1 METHODS
=head2 Class Methods
=head3 C<new>
my %args = (
verbosity => 1,
lib => [ 'lib', 'blib/lib', 'blib/arch' ],
)
my $harness = TAP::Harness->new( \%args );
The constructor returns a new C<TAP::Harness> object. It accepts an
optional hashref whose allowed keys are:
=over 4
=item * C<verbosity>
Set the verbosity level:
1 verbose Print individual test results to STDOUT.
0 normal
-1 quiet Suppress some test output (mostly failures
while tests are running).
-2 really quiet Suppress everything but the tests summary.
-3 silent Suppress everything.
=item * C<timer>
Append run time for each test to output. Uses L<Time::HiRes> if
available.
=item * C<failures>
Show test failures (this is a no-op if C<verbose> is selected).
=item * C<comments>
Show test comments (this is a no-op if C<verbose> is selected).
=item * C<show_count>
Update the running test count during testing.
=item * C<normalize>
Set to a true value to normalize the TAP that is emitted in verbose modes.
=item * C<lib>
Accepts a scalar value or array ref of scalar values indicating which
paths to allowed libraries should be included if Perl tests are
executed. Naturally, this only makes sense in the context of tests
written in Perl.
=item * C<switches>
Accepts a scalar value or array ref of scalar values indicating which
switches should be included if Perl tests are executed. Naturally, this
only makes sense in the context of tests written in Perl.
=item * C<test_args>
A reference to an C<@INC> style array of arguments to be passed to each
test program.
=item * C<color>
Attempt to produce color output.
=item * C<exec>
Typically, Perl tests are run through this. However, anything which
spits out TAP is fine. You can use this argument to specify the name of
the program (and optional switches) to run your tests with:
exec => ['/usr/bin/ruby', '-w']
You can also pass a subroutine reference in order to determine and
return the proper program to run based on a given test script. The
subroutine reference should expect the TAP::Harness object itself as the
first argument, and the file name as the second argument. It should
return an array reference containing the command to be run and including
the test file name. It can also simply return C<undef>, in which case
TAP::Harness will fall back on executing the test script in Perl:
exec => sub {
my ( $harness, $test_file ) = @_;
# Let Perl tests run.
return undef if $test_file =~ /[.]t$/;
return [ qw( /usr/bin/ruby -w ), $test_file ]
if $test_file =~ /[.]rb$/;
}
If the subroutine returns a scalar with a newline or a filehandle, it
will be interpreted as raw TAP or as a TAP stream, respectively.
=item * C<merge>
If C<merge> is true the harness will create parsers that merge STDOUT
and STDERR together for any processes they start.
=item * C<aggregator_class>
The name of the class to use to aggregate test results. The default is
L<TAP::Parser::Aggregator>.
=item * C<formatter_class>
The name of the class to use to format output. The default is
L<TAP::Formatter::Console>, or L<TAP::Formatter::File> if the output
isn't a TTY.
=item * C<multiplexer_class>
The name of the class to use to multiplex tests during parallel testing.
The default is L<TAP::Parser::Multiplexer>.
=item * C<parser_class>
The name of the class to use to parse TAP. The default is
L<TAP::Parser>.
=item * C<scheduler_class>
The name of the class to use to schedule test execution. The default is
L<TAP::Parser::Scheduler>.
=item * C<formatter>
If set C<formatter> must be an object that is capable of formatting the
TAP output. See L<TAP::Formatter::Console> for an example.
=item * C<errors>
If parse errors are found in the TAP output, a note of this will be
made in the summary report. To see all of the parse errors, set this
argument to true:
errors => 1
=item * C<directives>
If set to a true value, only test results with directives will be
displayed. This overrides other settings such as C<verbose> or
C<failures>.
=item * C<ignore_exit>
If set to a true value instruct C<TAP::Parser> to ignore exit and wait
status from test scripts.
=item * C<jobs>
The maximum number of parallel tests to run at any time. Which tests
can be run in parallel is controlled by C<rules>. The default is to
run only one test at a time.
=item * C<rules>
A reference to a hash of rules that control which tests may be
executed in parallel. This is an experimental feature and the
interface may change.
$harness->rules(
{ par => [
{ seq => '../ext/DB_File/t/*' },
{ seq => '../ext/IO_Compress_Zlib/t/*' },
{ seq => '../lib/CPANPLUS/*' },
{ seq => '../lib/ExtUtils/t/*' },
'*'
]
}
);
=item * C<stdout>
A filehandle for catching standard output.
=back
Any keys for which the value is C<undef> will be ignored.
=cut
# new supplied by TAP::Base
{
my @legal_callback = qw(
parser_args
made_parser
before_runtests
after_runtests
after_test
);
my %default_class = (
aggregator_class => 'TAP::Parser::Aggregator',
formatter_class => 'TAP::Formatter::Console',
multiplexer_class => 'TAP::Parser::Multiplexer',
parser_class => 'TAP::Parser',
scheduler_class => 'TAP::Parser::Scheduler',
);
sub _initialize {
my ( $self, $arg_for ) = @_;
$arg_for ||= {};
$self->SUPER::_initialize( $arg_for, \@legal_callback );
my %arg_for = %$arg_for; # force a shallow copy
for my $name ( sort keys %VALIDATION_FOR ) {
my $property = delete $arg_for{$name};
if ( defined $property ) {
my $validate = $VALIDATION_FOR{$name};
my $value = $self->$validate($property);
if ( $self->_error ) {
$self->_croak;
}
$self->$name($value);
}
}
$self->jobs(1) unless defined $self->jobs;
local $default_class{formatter_class} = 'TAP::Formatter::File'
unless -t ( $arg_for{stdout} || \*STDOUT ) && !$ENV{HARNESS_NOTTY};
while ( my ( $attr, $class ) = each %default_class ) {
$self->$attr( $self->$attr() || $class );
}
unless ( $self->formatter ) {
# This is a little bodge to preserve legacy behaviour. It's
# pretty horrible that we know which args are destined for
# the formatter.
my %formatter_args = ( jobs => $self->jobs );
for my $name (@FORMATTER_ARGS) {
if ( defined( my $property = delete $arg_for{$name} ) ) {
$formatter_args{$name} = $property;
}
}
$self->formatter(
$self->_construct( $self->formatter_class, \%formatter_args )
);
}
if ( my @props = sort keys %arg_for ) {
$self->_croak("Unknown arguments to TAP::Harness::new (@props)");
}
return $self;
}
}
##############################################################################
=head2 Instance Methods
=head3 C<runtests>
$harness->runtests(@tests);
Accepts and array of C<@tests> to be run. This should generally be the
names of test files, but this is not required. Each element in C<@tests>
will be passed to C<TAP::Parser::new()> as a C<source>. See
L<TAP::Parser> for more information.
It is possible to provide aliases that will be displayed in place of the
test name by supplying the test as a reference to an array containing
C<< [ $test, $alias ] >>:
$harness->runtests( [ 't/foo.t', 'Foo Once' ],
[ 't/foo.t', 'Foo Twice' ] );
Normally it is an error to attempt to run the same test twice. Aliases
allow you to overcome this limitation by giving each run of the test a
unique name.
Tests will be run in the order found.
If the environment variable C<PERL_TEST_HARNESS_DUMP_TAP> is defined it
should name a directory into which a copy of the raw TAP for each test
will be written. TAP is written to files named for each test.
Subdirectories will be created as needed.
Returns a L<TAP::Parser::Aggregator> containing the test results.
=cut
sub runtests {
my ( $self, @tests ) = @_;
my $aggregate = $self->_construct( $self->aggregator_class );
$self->_make_callback( 'before_runtests', $aggregate );
$aggregate->start;
$self->aggregate_tests( $aggregate, @tests );
$aggregate->stop;
$self->summary($aggregate);
$self->_make_callback( 'after_runtests', $aggregate );
return $aggregate;
}
=head3 C<summary>
Output the summary for a TAP::Parser::Aggregator.
=cut
sub summary {
my ( $self, $aggregate ) = @_;
$self->formatter->summary($aggregate);
}
sub _after_test {
my ( $self, $aggregate, $job, $parser ) = @_;
$self->_make_callback( 'after_test', $job->as_array_ref, $parser );
$aggregate->add( $job->description, $parser );
}
sub _bailout {
my ( $self, $result ) = @_;
my $explanation = $result->explanation;
die "FAILED--Further testing stopped"
. ( $explanation ? ": $explanation\n" : ".\n" );
}
sub _aggregate_parallel {
my ( $self, $aggregate, $scheduler ) = @_;
my $jobs = $self->jobs;
my $mux = $self->_construct( $self->multiplexer_class );
RESULT: {
# Keep multiplexer topped up
FILL:
while ( $mux->parsers < $jobs ) {
my $job = $scheduler->get_job;
# If we hit a spinner stop filling and start running.
last FILL if !defined $job || $job->is_spinner;
my ( $parser, $session ) = $self->make_parser($job);
$mux->add( $parser, [ $session, $job ] );
}
if ( my ( $parser, $stash, $result ) = $mux->next ) {
my ( $session, $job ) = @$stash;
if ( defined $result ) {
$session->result($result);
$self->_bailout($result) if $result->is_bailout;
}
else {
# End of parser. Automatically removed from the mux.
$self->finish_parser( $parser, $session );
$self->_after_test( $aggregate, $job, $parser );
$job->finish;
}
redo RESULT;
}
}
return;
}
sub _aggregate_single {
my ( $self, $aggregate, $scheduler ) = @_;
JOB:
while ( my $job = $scheduler->get_job ) {
next JOB if $job->is_spinner;
my ( $parser, $session ) = $self->make_parser($job);
while ( defined( my $result = $parser->next ) ) {
$session->result($result);
if ( $result->is_bailout ) {
# Keep reading until input is exhausted in the hope
# of allowing any pending diagnostics to show up.
1 while $parser->next;
$self->_bailout($result);
}
}
$self->finish_parser( $parser, $session );
$self->_after_test( $aggregate, $job, $parser );
$job->finish;
}
return;
}
=head3 C<aggregate_tests>
$harness->aggregate_tests( $aggregate, @tests );
Run the named tests and display a summary of result. Tests will be run
in the order found.
Test results will be added to the supplied L<TAP::Parser::Aggregator>.
C<aggregate_tests> may be called multiple times to run several sets of
tests. Multiple C<Test::Harness> instances may be used to pass results
to a single aggregator so that different parts of a complex test suite
may be run using different C<TAP::Harness> settings. This is useful, for
example, in the case where some tests should run in parallel but others
are unsuitable for parallel execution.
my $formatter = TAP::Formatter::Console->new;
my $ser_harness = TAP::Harness->new( { formatter => $formatter } );
my $par_harness = TAP::Harness->new(
{ formatter => $formatter,
jobs => 9
}
);
my $aggregator = TAP::Parser::Aggregator->new;
$aggregator->start();
$ser_harness->aggregate_tests( $aggregator, @ser_tests );
$par_harness->aggregate_tests( $aggregator, @par_tests );
$aggregator->stop();
$formatter->summary($aggregator);
Note that for simpler testing requirements it will often be possible to
replace the above code with a single call to C<runtests>.
Each elements of the @tests array is either
=over
=item * the file name of a test script to run
=item * a reference to a [ file name, display name ] array
=back
When you supply a separate display name it becomes possible to run a
test more than once; the display name is effectively the alias by which
the test is known inside the harness. The harness doesn't care if it
runs the same script more than once when each invocation uses a
different name.
=cut
sub aggregate_tests {
my ( $self, $aggregate, @tests ) = @_;
my $jobs = $self->jobs;
my $scheduler = $self->make_scheduler(@tests);
# #12458
local $ENV{HARNESS_IS_VERBOSE} = 1
if $self->formatter->verbosity > 0;
# Formatter gets only names.
$self->formatter->prepare( map { $_->description } $scheduler->get_all );
if ( $self->jobs > 1 ) {
$self->_aggregate_parallel( $aggregate, $scheduler );
}
else {
$self->_aggregate_single( $aggregate, $scheduler );
}
return;
}
sub _add_descriptions {
my $self = shift;
# Turn unwrapped scalars into anonymous arrays and copy the name as
# the description for tests that have only a name.
return map { @$_ == 1 ? [ $_->[0], $_->[0] ] : $_ }
map { 'ARRAY' eq ref $_ ? $_ : [$_] } @_;
}
=head3 C<make_scheduler>
Called by the harness when it needs to create a
L<TAP::Parser::Scheduler>. Override in a subclass to provide an
alternative scheduler. C<make_scheduler> is passed the list of tests
that was passed to C<aggregate_tests>.
=cut
sub make_scheduler {
my ( $self, @tests ) = @_;
return $self->_construct(
$self->scheduler_class,
tests => [ $self->_add_descriptions(@tests) ],
rules => $self->rules
);
}
=head3 C<jobs>
Gets or sets the number of concurrent test runs the harness is
handling. By default, this value is 1 -- for parallel testing, this
should be set higher.
=cut
##############################################################################
=head1 SUBCLASSING
C<TAP::Harness> is designed to be (mostly) easy to subclass. If you
don't like how a particular feature functions, just override the
desired methods.
=head2 Methods
TODO: This is out of date
The following methods are ones you may wish to override if you want to
subclass C<TAP::Harness>.
=head3 C<summary>
$harness->summary( \%args );
C<summary> prints the summary report after all tests are run. The
argument is a hashref with the following keys:
=over 4
=item * C<start>
This is created with C<< Benchmark->new >> and it the time the tests
started. You can print a useful summary time, if desired, with:
$self->output(
timestr( timediff( Benchmark->new, $start_time ), 'nop' ) );
=item * C<tests>
This is an array reference of all test names. To get the L<TAP::Parser>
object for individual tests:
my $aggregate = $args->{aggregate};
my $tests = $args->{tests};
for my $name ( @$tests ) {
my ($parser) = $aggregate->parsers($test);
... do something with $parser
}
This is a bit clunky and will be cleaned up in a later release.
=back
=cut
sub _get_parser_args {
my ( $self, $job ) = @_;
my $test_prog = $job->filename;
my %args = ();
my @switches;
@switches = $self->lib if $self->lib;
push @switches => $self->switches if $self->switches;
$args{switches} = \@switches;
$args{spool} = $self->_open_spool($test_prog);
$args{merge} = $self->merge;
$args{ignore_exit} = $self->ignore_exit;
if ( my $exec = $self->exec ) {
$args{exec}
= ref $exec eq 'CODE'
? $exec->( $self, $test_prog )
: [ @$exec, $test_prog ];
if ( not defined $args{exec} ) {
$args{source} = $test_prog;
}
elsif ( ( ref( $args{exec} ) || "" ) ne "ARRAY" ) {
$args{source} = delete $args{exec};
}
}
else {
$args{source} = $test_prog;
}
if ( defined( my $test_args = $self->test_args ) ) {
$args{test_args} = $test_args;
}
return \%args;
}
=head3 C<make_parser>
Make a new parser and display formatter session. Typically used and/or
overridden in subclasses.
my ( $parser, $session ) = $harness->make_parser;
=cut
sub make_parser {
my ( $self, $job ) = @_;
my $args = $self->_get_parser_args($job);
$self->_make_callback( 'parser_args', $args, $job->as_array_ref );
my $parser = $self->_construct( $self->parser_class, $args );
$self->_make_callback( 'made_parser', $parser, $job->as_array_ref );
my $session = $self->formatter->open_test( $job->description, $parser );
return ( $parser, $session );
}
=head3 C<finish_parser>
Terminate use of a parser. Typically used and/or overridden in
subclasses. The parser isn't destroyed as a result of this.
=cut
sub finish_parser {
my ( $self, $parser, $session ) = @_;
$session->close_test;
$self->_close_spool($parser);
return $parser;
}
sub _open_spool {
my $self = shift;
my $test = shift;
if ( my $spool_dir = $ENV{PERL_TEST_HARNESS_DUMP_TAP} ) {
my $spool = File::Spec->catfile( $spool_dir, $test );
# Make the directory
my ( $vol, $dir, undef ) = File::Spec->splitpath($spool);
my $path = File::Spec->catpath( $vol, $dir, '' );
eval { mkpath($path) };
$self->_croak($@) if $@;
my $spool_handle = IO::Handle->new;
open( $spool_handle, ">$spool" )
or $self->_croak(" Can't write $spool ( $! ) ");
return $spool_handle;
}
return;
}
sub _close_spool {
my $self = shift;
my ($parser) = @_;
if ( my $spool_handle = $parser->delete_spool ) {
close($spool_handle)
or $self->_croak(" Error closing TAP spool file( $! ) \n ");
}
return;
}
sub _croak {
my ( $self, $message ) = @_;
unless ($message) {
$message = $self->_error;
}
$self->SUPER::_croak($message);
return;
}
=head1 REPLACING
If you like the C<prove> utility and L<TAP::Parser> but you want your
own harness, all you need to do is write one and provide C<new> and
C<runtests> methods. Then you can use the C<prove> utility like so:
prove --harness My::Test::Harness
Note that while C<prove> accepts a list of tests (or things to be
tested), C<new> has a fairly rich set of arguments. You'll probably want
to read over this code carefully to see how all of them are being used.
=head1 SEE ALSO
L<Test::Harness>
=cut
1;
# vim:ts=4:sw=4:et:sta
package TAP::Object;
use strict;
use vars qw($VERSION);
=head1 NAME
TAP::Object - Base class that provides common functionality to all C<TAP::*> mod
ules
=head1 VERSION
Version 3.17
=cut
$VERSION = '3.17';
=head1 SYNOPSIS
package TAP::Whatever;
use strict;
use vars qw(@ISA);
use TAP::Object;
@ISA = qw(TAP::Object);
# new() implementation by TAP::Object
sub _initialize {
my ( $self, @args) = @_;
# initialize your object
return $self;
}
# ... later ...
my $obj = TAP::Whatever->new(@args);
=head1 DESCRIPTION
C<TAP::Object> provides a default constructor and exception model for all
C<TAP::*> classes. Exceptions are raised using L<Carp>.
=head1 METHODS
=head2 Class Methods
=head3 C<new>
Create a new object. Any arguments passed to C<new> will be passed on to the
L</_initialize> method. Returns a new object.
=cut
sub new {
my $class = shift;
my $self = bless {}, $class;
return $self->_initialize(@_);
}
=head2 Instance Methods
=head3 C<_initialize>
Initializes a new object. This method is a stub by default, you should override
it as appropriate.
I<Note:> L</new> expects you to return C<$self> or raise an exception. See
L</_croak>, and L<Carp>.
=cut
sub _initialize {
return $_[0];
}
=head3 C<_croak>
Raise an exception using C<croak> from L<Carp>, eg:
$self->_croak( 'why me?', 'aaarrgh!' );
May also be called as a I<class> method.
$class->_croak( 'this works too' );
=cut
sub _croak {
my $proto = shift;
require Carp;
Carp::croak(@_);
return;
}
=head3 C<_construct>
Create a new instance of the specified class.
=cut
sub _construct {
my ( $self, $class, @args ) = @_;
$self->_croak("Bad module name $class")
unless $class =~ /^ \w+ (?: :: \w+ ) *$/x;
unless ( $class->can('new') ) {
local $@;
eval "require $class";
$self->_croak("Can't load $class") if $@;
}
return $class->new(@args);
}
=head3 C<mk_methods>
Create simple getter/setters.
__PACKAGE__->mk_methods(@method_names);
=cut
sub mk_methods {
my ( $class, @methods ) = @_;
foreach my $method_name (@methods) {
my $method = "${class}::$method_name";
no strict 'refs';
*$method = sub {
my $self = shift;
$self->{$method_name} = shift if @_;
return $self->{$method_name};
};
}
}
1;
package TAP::Parser;
use strict;
use vars qw($VERSION @ISA);
use TAP::Base ();
use TAP::Parser::Grammar ();
use TAP::Parser::Result ();
use TAP::Parser::ResultFactory ();
use TAP::Parser::Source ();
use TAP::Parser::Source::Perl ();
use TAP::Parser::Iterator ();
use TAP::Parser::IteratorFactory ();
use Carp qw( confess );
=head1 NAME
TAP::Parser - Parse L<TAP|Test::Harness::TAP> output
=head1 VERSION
Version 3.17
=cut
$VERSION = '3.17';
my $DEFAULT_TAP_VERSION = 12;
my $MAX_TAP_VERSION = 13;
$ENV{TAP_VERSION} = $MAX_TAP_VERSION;
END {
# For VMS.
delete $ENV{TAP_VERSION};
}
BEGIN { # making accessors
@ISA = qw(TAP::Base);
__PACKAGE__->mk_methods(
qw(
_stream
_spool
exec
exit
is_good_plan
plan
tests_planned
tests_run
wait
version
in_todo
start_time
end_time
skip_all
source_class
perl_source_class
grammar_class
iterator_factory_class
result_factory_class
)
);
} # done making accessors
=head1 SYNOPSIS
use TAP::Parser;
my $parser = TAP::Parser->new( { source => $source } );
while ( my $result = $parser->next ) {
print $result->as_string;
}
=head1 DESCRIPTION
C<TAP::Parser> is designed to produce a proper parse of TAP output. For
an example of how to run tests through this module, see the simple
harnesses C<examples/>.
There's a wiki dedicated to the Test Anything Protocol:
L<http://testanything.org>
It includes the TAP::Parser Cookbook:
L<http://testanything.org/wiki/index.php/TAP::Parser_Cookbook>
=head1 METHODS
=head2 Class Methods
=head3 C<new>
my $parser = TAP::Parser->new(\%args);
Returns a new C<TAP::Parser> object.
The arguments should be a hashref with I<one> of the following keys:
=over 4
=item * C<source>
This is the preferred method of passing arguments to the constructor. To
determine how to handle the source, the following steps are taken.
If the source contains a newline, it's assumed to be a string of raw TAP
output.
If the source is a reference, it's assumed to be something to pass to
the L<TAP::Parser::Iterator::Stream> constructor. This is used
internally and you should not use it.
Otherwise, the parser does a C<-e> check to see if the source exists. If so,
it attempts to execute the source and read the output as a stream. This is by
far the preferred method of using the parser.
foreach my $file ( @test_files ) {
my $parser = TAP::Parser->new( { source => $file } );
# do stuff with the parser
}
=item * C<tap>
The value should be the complete TAP output.
=item * C<exec>
If passed an array reference, will attempt to create the iterator by
passing a L<TAP::Parser::Source> object to
L<TAP::Parser::Iterator::Source>, using the array reference strings as
the command arguments to L<IPC::Open3::open3|IPC::Open3>:
exec => [ '/usr/bin/ruby', 't/my_test.rb' ]
Note that C<source> and C<exec> are mutually exclusive.
=back
The following keys are optional.
=over 4
=item * C<callback>
If present, each callback corresponding to a given result type will be called
with the result as the argument if the C<run> method is used:
my %callbacks = (
test => \&test_callback,
plan => \&plan_callback,
comment => \&comment_callback,
bailout => \&bailout_callback,
unknown => \&unknown_callback,
);
my $aggregator = TAP::Parser::Aggregator->new;
foreach my $file ( @test_files ) {
my $parser = TAP::Parser->new(
{
source => $file,
callbacks => \%callbacks,
}
);
$parser->run;
$aggregator->add( $file, $parser );
}
=item * C<switches>
If using a Perl file as a source, optional switches may be passed which will
be used when invoking the perl executable.
my $parser = TAP::Parser->new( {
source => $test_file,
switches => '-Ilib',
} );
=item * C<test_args>
Used in conjunction with the C<source> option to supply a reference to
an C<@ARGV> style array of arguments to pass to the test program.
=item * C<spool>
If passed a filehandle will write a copy of all parsed TAP to that handle.
=item * C<merge>
If false, STDERR is not captured (though it is 'relayed' to keep it
somewhat synchronized with STDOUT.)
If true, STDERR and STDOUT are the same filehandle. This may cause
breakage if STDERR contains anything resembling TAP format, but does
allow exact synchronization.
Subtleties of this behavior may be platform-dependent and may change in
the future.
=item * C<source_class>
This option was introduced to let you easily customize which I<source> class
the parser should use. It defaults to L<TAP::Parser::Source>.
See also L</make_source>.
=item * C<perl_source_class>
This option was introduced to let you easily customize which I<perl source>
class the parser should use. It defaults to L<TAP::Parser::Source::Perl>.
See also L</make_perl_source>.
=item * C<grammar_class>
This option was introduced to let you easily customize which I<grammar> class
the parser should use. It defaults to L<TAP::Parser::Grammar>.
See also L</make_grammar>.
=item * C<iterator_factory_class>
This option was introduced to let you easily customize which I<iterator>
factory class the parser should use. It defaults to
L<TAP::Parser::IteratorFactory>.
See also L</make_iterator>.
=item * C<result_factory_class>
This option was introduced to let you easily customize which I<result>
factory class the parser should use. It defaults to
L<TAP::Parser::ResultFactory>.
See also L</make_result>.
=back
=cut
# new() implementation supplied by TAP::Base
# This should make overriding behaviour of the Parser in subclasses easier:
sub _default_source_class {'TAP::Parser::Source'}
sub _default_perl_source_class {'TAP::Parser::Source::Perl'}
sub _default_grammar_class {'TAP::Parser::Grammar'}
sub _default_iterator_factory_class {'TAP::Parser::IteratorFactory'}
sub _default_result_factory_class {'TAP::Parser::ResultFactory'}
##############################################################################
=head2 Instance Methods
=head3 C<next>
my $parser = TAP::Parser->new( { source => $file } );
while ( my $result = $parser->next ) {
print $result->as_string, "\n";
}
This method returns the results of the parsing, one result at a time. Note
that it is destructive. You can't rewind and examine previous results.
If callbacks are used, they will be issued before this call returns.
Each result returned is a subclass of L<TAP::Parser::Result>. See that
module and related classes for more information on how to use them.
=cut
sub next {
my $self = shift;
return ( $self->{_iter} ||= $self->_iter )->();
}
##############################################################################
=head3 C<run>
$parser->run;
This method merely runs the parser and parses all of the TAP.
=cut
sub run {
my $self = shift;
while ( defined( my $result = $self->next ) ) {
# do nothing
}
}
##############################################################################
=head3 C<make_source>
Make a new L<TAP::Parser::Source> object and return it. Passes through any
arguments given.
The C<source_class> can be customized, as described in L</new>.
=head3 C<make_perl_source>
Make a new L<TAP::Parser::Source::Perl> object and return it. Passes through
any arguments given.
The C<perl_source_class> can be customized, as described in L</new>.
=head3 C<make_grammar>
Make a new L<TAP::Parser::Grammar> object and return it. Passes through any
arguments given.
The C<grammar_class> can be customized, as described in L</new>.
=head3 C<make_iterator>
Make a new L<TAP::Parser::Iterator> object using the parser's
L<TAP::Parser::IteratorFactory>, and return it. Passes through any arguments
given.
The C<iterator_factory_class> can be customized, as described in L</new>.
=head3 C<make_result>
Make a new L<TAP::Parser::Result> object using the parser's
L<TAP::Parser::ResultFactory>, and return it. Passes through any arguments
given.
The C<result_factory_class> can be customized, as described in L</new>.
=cut
# This should make overriding behaviour of the Parser in subclasses easier:
sub make_source { shift->source_class->new(@_); }
sub make_perl_source { shift->perl_source_class->new(@_); }
sub make_grammar { shift->grammar_class->new(@_); }
sub make_iterator { shift->iterator_factory_class->make_iterator(@_); }
sub make_result { shift->result_factory_class->make_result(@_); }
sub _iterator_for_source {
my ( $self, $source ) = @_;
# If the source has a get_stream method then use it. This makes it
# possible to pass a pre-existing source object to the parser's
# constructor.
if ( UNIVERSAL::can( $source, 'can' ) && $source->can('get_stream') ) {
return $source->get_stream($self);
}
else {
return $self->iterator_factory_class->make_iterator($source);
}
}
{
# of the following, anything beginning with an underscore is strictly
# internal and should not be exposed.
my %initialize = (
version => $DEFAULT_TAP_VERSION,
plan => '', # the test plan (e.g., 1..3)
tap => '', # the TAP
tests_run => 0, # actual current test numbers
results => [], # TAP parser results
skipped => [], #
todo => [], #
passed => [], #
failed => [], #
actual_failed => [], # how many tests really failed
actual_passed => [], # how many tests really passed
todo_passed => [], # tests which unexpectedly succeed
parse_errors => [], # perfect TAP should have none
);
# We seem to have this list hanging around all over the place. We could
#probably get it from somewhere else to avoid the repetition.
my @legal_callback = qw(
test
version
plan
comment
bailout
unknown
yaml
ALL
ELSE
EOF
);
my @class_overrides = qw(
source_class
perl_source_class
grammar_class
iterator_factory_class
result_factory_class
);
sub _initialize {
my ( $self, $arg_for ) = @_;
# everything here is basically designed to convert any TAP source to a
# stream.
# Shallow copy
my %args = %{ $arg_for || {} };
$self->SUPER::_initialize( \%args, \@legal_callback );
# get any class overrides out first:
for my $key (@class_overrides) {
my $default_method = "_default_$key";
my $val = delete $args{$key} || $self->$default_method();
$self->$key($val);
}
my $stream = delete $args{stream};
my $tap = delete $args{tap};
my $source = delete $args{source};
my $exec = delete $args{exec};
my $merge = delete $args{merge};
my $spool = delete $args{spool};
my $switches = delete $args{switches};
my $ignore_exit = delete $args{ignore_exit};
my @test_args = @{ delete $args{test_args} || [] };
if ( 1 < grep {defined} $stream, $tap, $source, $exec ) {
$self->_croak(
"You may only choose one of 'exec', 'stream', 'tap' or 'source'"
);
}
if ( my @excess = sort keys %args ) {
$self->_croak("Unknown options: @excess");
}
if ($tap) {
$stream = $self->_iterator_for_source( [ split "\n" => $tap ] );
}
elsif ($exec) {
my $source = $self->make_source;
$source->source( [ @$exec, @test_args ] );
$source->merge($merge); # XXX should just be arguments?
$stream = $source->get_stream($self);
}
elsif ($source) {
if ( $source =~ /\n/ ) {
$stream
= $self->_iterator_for_source( [ split "\n" => $source ] );
}
elsif ( ref $source ) {
$stream = $self->_iterator_for_source($source);
}
elsif ( -e $source ) {
my $perl = $self->make_perl_source;
$perl->switches($switches)
if $switches;
$perl->merge($merge); # XXX args to new()?
$perl->source( [ $source, @test_args ] );
$stream = $perl->get_stream($self);
}
else {
$self->_croak("Cannot determine source for $source");
}
}
unless ($stream) {
$self->_croak('PANIC: could not determine stream');
}
while ( my ( $k, $v ) = each %initialize ) {
$self->{$k} = 'ARRAY' eq ref $v ? [] : $v;
}
$self->_stream($stream);
$self->_spool($spool);
$self->ignore_exit($ignore_exit);
return $self;
}
}
=head1 INDIVIDUAL RESULTS
If you've read this far in the docs, you've seen this:
while ( my $result = $parser->next ) {
print $result->as_string;
}
Each result returned is a L<TAP::Parser::Result> subclass, referred to as
I<result types>.
=head2 Result types
Basically, you fetch individual results from the TAP. The six types, with
examples of each, are as follows:
=over 4
=item * Version
TAP version 12
=item * Plan
1..42
=item * Pragma
pragma +strict
=item * Test
ok 3 - We should start with some foobar!
=item * Comment
# Hope we don't use up the foobar.
=item * Bailout
Bail out! We ran out of foobar!
=item * Unknown
... yo, this ain't TAP! ...
=back
Each result fetched is a result object of a different type. There are common
methods to each result object and different types may have methods unique to
their type. Sometimes a type method may be overridden in a subclass, but its
use is guaranteed to be identical.
=head2 Common type methods
=head3 C<type>
Returns the type of result, such as C<comment> or C<test>.
=head3 C<as_string>
Prints a string representation of the token. This might not be the exact
output, however. Tests will have test numbers added if not present, TODO and
SKIP directives will be capitalized and, in general, things will be cleaned
up. If you need the original text for the token, see the C<raw> method.
=head3 C<raw>
Returns the original line of text which was parsed.
=head3 C<is_plan>
Indicates whether or not this is the test plan line.
=head3 C<is_test>
Indicates whether or not this is a test line.
=head3 C<is_comment>
Indicates whether or not this is a comment. Comments will generally only
appear in the TAP stream if STDERR is merged to STDOUT. See the
C<merge> option.
=head3 C<is_bailout>
Indicates whether or not this is bailout line.
=head3 C<is_yaml>
Indicates whether or not the current item is a YAML block.
=head3 C<is_unknown>
Indicates whether or not the current line could be parsed.
=head3 C<is_ok>
if ( $result->is_ok ) { ... }
Reports whether or not a given result has passed. Anything which is B<not> a
test result returns true. This is merely provided as a convenient shortcut
which allows you to do this:
my $parser = TAP::Parser->new( { source => $source } );
while ( my $result = $parser->next ) {
# only print failing results
print $result->as_string unless $result->is_ok;
}
=head2 C<plan> methods
if ( $result->is_plan ) { ... }
If the above evaluates as true, the following methods will be available on the
C<$result> object.
=head3 C<plan>
if ( $result->is_plan ) {
print $result->plan;
}
This is merely a synonym for C<as_string>.
=head3 C<directive>
my $directive = $result->directive;
If a SKIP directive is included with the plan, this method will return it.
1..0 # SKIP: why bother?
=head3 C<explanation>
my $explanation = $result->explanation;
If a SKIP directive was included with the plan, this method will return the
explanation, if any.
=head2 C<pragma> methods
if ( $result->is_pragma ) { ... }
If the above evaluates as true, the following methods will be available on the
C<$result> object.
=head3 C<pragmas>
Returns a list of pragmas each of which is a + or - followed by the
pragma name.

=head2 C<commment> methods
if ( $result->is_comment ) { ... }
If the above evaluates as true, the following methods will be available on the
C<$result> object.
=head3 C<comment>
if ( $result->is_comment ) {
my $comment = $result->comment;
print "I have something to say: $comment";
}
=head2 C<bailout> methods
if ( $result->is_bailout ) { ... }
If the above evaluates as true, the following methods will be available on the
C<$result> object.
=head3 C<explanation>
if ( $result->is_bailout ) {
my $explanation = $result->explanation;
print "We bailed out because ($explanation)";
}
If, and only if, a token is a bailout token, you can get an "explanation" via
this method. The explanation is the text after the mystical "Bail out!" words
which appear in the tap output.
=head2 C<unknown> methods
if ( $result->is_unknown ) { ... }
There are no unique methods for unknown results.
=head2 C<test> methods
if ( $result->is_test ) { ... }
If the above evaluates as true, the following methods will be available on the
C<$result> object.
=head3 C<ok>
my $ok = $result->ok;
Returns the literal text of the C<ok> or C<not ok> status.
=head3 C<number>
my $test_number = $result->number;
Returns the number of the test, even if the original TAP output did not supply
that number.
=head3 C<description>
my $description = $result->description;
Returns the description of the test, if any. This is the portion after the
test number but before the directive.
=head3 C<directive>
my $directive = $result->directive;
Returns either C<TODO> or C<SKIP> if either directive was present for a test
line.
=head3 C<explanation>
my $explanation = $result->explanation;
If a test had either a C<TODO> or C<SKIP> directive, this method will return
the accompanying explantion, if present.
not ok 17 - 'Pigs can fly' # TODO not enough acid
For the above line, the explanation is I<not enough acid>.
=head3 C<is_ok>
if ( $result->is_ok ) { ... }
Returns a boolean value indicating whether or not the test passed. Remember
that for TODO tests, the test always passes.
B<Note:> this was formerly C<passed>. The latter method is deprecated and
will issue a warning.
=head3 C<is_actual_ok>
if ( $result->is_actual_ok ) { ... }
Returns a boolean value indicating whether or not the test passed, regardless
of its TODO status.
B<Note:> this was formerly C<actual_passed>. The latter method is deprecated
and will issue a warning.
=head3 C<is_unplanned>
if ( $test->is_unplanned ) { ... }
If a test number is greater than the number of planned tests, this method will
return true. Unplanned tests will I<always> return false for C<is_ok>,
regardless of whether or not the test C<has_todo> (see
L<TAP::Parser::Result::Test> for more information about this).
=head3 C<has_skip>
if ( $result->has_skip ) { ... }
Returns a boolean value indicating whether or not this test had a SKIP
directive.
=head3 C<has_todo>
if ( $result->has_todo ) { ... }
Returns a boolean value indicating whether or not this test had a TODO
directive.
Note that TODO tests I<always> pass. If you need to know whether or not
they really passed, check the C<is_actual_ok> method.
=head3 C<in_todo>
if ( $parser->in_todo ) { ... }
True while the most recent result was a TODO. Becomes true before the
TODO result is returned and stays true until just before the next non-
TODO test is returned.
=head1 TOTAL RESULTS
After parsing the TAP, there are many methods available to let you dig through
the results and determine what is meaningful to you.
=head2 Individual Results
These results refer to individual tests which are run.
=head3 C<passed>
my @passed = $parser->passed; # the test numbers which passed
my $passed = $parser->passed; # the number of tests which passed
This method lets you know which (or how many) tests passed. If a test failed
but had a TODO directive, it will be counted as a passed test.
=cut
sub passed { @{ shift->{passed} } }
=head3 C<failed>
my @failed = $parser->failed; # the test numbers which failed
my $failed = $parser->failed; # the number of tests which failed
This method lets you know which (or how many) tests failed. If a test passed
but had a TODO directive, it will B<NOT> be counted as a failed test.
=cut
sub failed { @{ shift->{failed} } }
=head3 C<actual_passed>
# the test numbers which actually passed
my @actual_passed = $parser->actual_passed;
# the number of tests which actually passed
my $actual_passed = $parser->actual_passed;
This method lets you know which (or how many) tests actually passed,
regardless of whether or not a TODO directive was found.
=cut
sub actual_passed { @{ shift->{actual_passed} } }
*actual_ok = \&actual_passed;
=head3 C<actual_ok>
This method is a synonym for C<actual_passed>.
=head3 C<actual_failed>
# the test numbers which actually failed
my @actual_failed = $parser->actual_failed;
# the number of tests which actually failed
my $actual_failed = $parser->actual_failed;
This method lets you know which (or how many) tests actually failed,
regardless of whether or not a TODO directive was found.
=cut
sub actual_failed { @{ shift->{actual_failed} } }
##############################################################################
=head3 C<todo>
my @todo = $parser->todo; # the test numbers with todo directives
my $todo = $parser->todo; # the number of tests with todo directives
This method lets you know which (or how many) tests had TODO directives.
=cut
sub todo { @{ shift->{todo} } }
=head3 C<todo_passed>
# the test numbers which unexpectedly succeeded
my @todo_passed = $parser->todo_passed;
# the number of tests which unexpectedly succeeded
my $todo_passed = $parser->todo_passed;
This method lets you know which (or how many) tests actually passed but were
declared as "TODO" tests.
=cut
sub todo_passed { @{ shift->{todo_passed} } }
##############################################################################
=head3 C<todo_failed>
# deprecated in favor of 'todo_passed'. This method was horribly misnamed.
This was a badly misnamed method. It indicates which TODO tests unexpectedly
succeeded. Will now issue a warning and call C<todo_passed>.
=cut
sub todo_failed {
warn
'"todo_failed" is deprecated. Please use "todo_passed". See the docs.';
goto &todo_passed;
}
=head3 C<skipped>
my @skipped = $parser->skipped; # the test numbers with SKIP directives
my $skipped = $parser->skipped; # the number of tests with SKIP directives
This method lets you know which (or how many) tests had SKIP directives.
=cut
sub skipped { @{ shift->{skipped} } }
=head2 Pragmas
=head3 C<pragma>
Get or set a pragma. To get the state of a pragma:
if ( $p->pragma('strict') ) {
# be strict
}
To set the state of a pragma:
$p->pragma('strict', 1); # enable strict mode
=cut
sub pragma {
my ( $self, $pragma ) = splice @_, 0, 2;
return $self->{pragma}->{$pragma} unless @_;
if ( my $state = shift ) {
$self->{pragma}->{$pragma} = 1;
}
else {
delete $self->{pragma}->{$pragma};
}
return;
}
=head3 C<pragmas>
Get a list of all the currently enabled pragmas:
my @pragmas_enabled = $p->pragmas;
=cut
sub pragmas { sort keys %{ shift->{pragma} || {} } }
=head2 Summary Results
These results are "meta" information about the total results of an individual
test program.
=head3 C<plan>
my $plan = $parser->plan;
Returns the test plan, if found.
=head3 C<good_plan>
Deprecated. Use C<is_good_plan> instead.
=cut
sub good_plan {
warn 'good_plan() is deprecated. Please use "is_good_plan()"';
goto &is_good_plan;
}
##############################################################################
=head3 C<is_good_plan>
if ( $parser->is_good_plan ) { ... }
Returns a boolean value indicating whether or not the number of tests planned
matches the number of tests run.
B<Note:> this was formerly C<good_plan>. The latter method is deprecated and
will issue a warning.
And since we're on that subject ...
=head3 C<tests_planned>
print $parser->tests_planned;
Returns the number of tests planned, according to the plan. For example, a
plan of '1..17' will mean that 17 tests were planned.
=head3 C<tests_run>
print $parser->tests_run;
Returns the number of tests which actually were run. Hopefully this will
match the number of C<< $parser->tests_planned >>.
=head3 C<skip_all>
Returns a true value (actually the reason for skipping) if all tests
were skipped.
=head3 C<start_time>
Returns the time when the Parser was created.
=head3 C<end_time>
Returns the time when the end of TAP input was seen.
=head3 C<has_problems>
if ( $parser->has_problems ) {
...
}
This is a 'catch-all' method which returns true if any tests have currently
failed, any TODO tests unexpectedly succeeded, or any parse errors occurred.
=cut
sub has_problems {
my $self = shift;
return
$self->failed
|| $self->parse_errors
|| ( !$self->ignore_exit && ( $self->wait || $self->exit ) );
}
=head3 C<version>
$parser->version;
Once the parser is done, this will return the version number for the
parsed TAP. Version numbers were introduced with TAP version 13 so if no
version number is found version 12 is assumed.
=head3 C<exit>
$parser->exit;
Once the parser is done, this will return the exit status. If the parser ran
an executable, it returns the exit status of the executable.
=head3 C<wait>
$parser->wait;
Once the parser is done, this will return the wait status. If the parser ran
an executable, it returns the wait status of the executable. Otherwise, this
mererely returns the C<exit> status.
=head2 C<ignore_exit>
$parser->ignore_exit(1);
Tell the parser to ignore the exit status from the test when determining
whether the test passed. Normally tests with non-zero exit status are
considered to have failed even if all individual tests passed. In cases
where it is not possible to control the exit value of the test script
use this option to ignore it.
=cut
sub ignore_exit { shift->pragma( 'ignore_exit', @_ ) }
=head3 C<parse_errors>
my @errors = $parser->parse_errors; # the parser errors
my $errors = $parser->parse_errors; # the number of parser_errors
Fortunately, all TAP output is perfect. In the event that it is not, this
method will return parser errors. Note that a junk line which the parser does
not recognize is C<not> an error. This allows this parser to handle future
versions of TAP. The following are all TAP errors reported by the parser:
=over 4
=item * Misplaced plan
The plan (for example, '1..5'), must only come at the beginning or end of the
TAP output.
=item * No plan
Gotta have a plan!
=item * More than one plan
1..3
ok 1 - input file opened
not ok 2 - first line of the input valid # todo some data
ok 3 read the rest of the file
1..3
Right. Very funny. Don't do that.
=item * Test numbers out of sequence
1..3
ok 1 - input file opened
not ok 2 - first line of the input valid # todo some data
ok 2 read the rest of the file
That last test line above should have the number '3' instead of '2'.
Note that it's perfectly acceptable for some lines to have test numbers and
others to not have them. However, when a test number is found, it must be in
sequence. The following is also an error:
1..3
ok 1 - input file opened
not ok - first line of the input valid # todo some data
ok 2 read the rest of the file
But this is not:
1..3
ok - input file opened
not ok - first line of the input valid # todo some data
ok 3 read the rest of the file
=back
=cut
sub parse_errors { @{ shift->{parse_errors} } }
sub _add_error {
my ( $self, $error ) = @_;
push @{ $self->{parse_errors} } => $error;
return $self;
}
sub _make_state_table {
my $self = shift;
my %states;
my %planned_todo = ();
#These transitions are defaults for all states
my %state_globals = (
comment => {},
bailout => {},
yaml => {},
version => {
act => sub {
$self->_add_error(
'If TAP version is present it must be the first line of outp
ut'
);
},
},
unknown => {
act => sub {
my $unk = shift;
if ( $self->pragma('strict') ) {
$self->_add_error(
'Unknown TAP token: "' . $unk->raw . '"' );
}
},
},
pragma => {
act => sub {
my ($pragma) = @_;
for my $pr ( $pragma->pragmas ) {
if ( $pr =~ /^ ([-+])(\w+) $/x ) {
$self->pragma( $2, $1 eq '+' );
}
}
},
},
);
# Provides default elements for transitions
my %state_defaults = (
plan => {
act => sub {
my ($plan) = @_;
$self->tests_planned( $plan->tests_planned );
$self->plan( $plan->plan );
if ( $plan->has_skip ) {
$self->skip_all( $plan->explanation
|| '(no reason given)' );
}
$planned_todo{$_}++ for @{ $plan->todo_list };
},
},
test => {
act => sub {
my ($test) = @_;
my ( $number, $tests_run )
= ( $test->number, ++$self->{tests_run} );
# Fake TODO state
if ( defined $number && delete $planned_todo{$number} ) {
$test->set_directive('TODO');
}
my $has_todo = $test->has_todo;
$self->in_todo($has_todo);
if ( defined( my $tests_planned = $self->tests_planned ) ) {
if ( $tests_run > $tests_planned ) {
$test->is_unplanned(1);
}
}
if ( defined $number ) {
if ( $number != $tests_run ) {
my $count = $tests_run;
$self->_add_error( "Tests out of sequence. Found "
. "($number) but expected ($count)" );
}
}
else {
$test->_number( $number = $tests_run );
}
push @{ $self->{todo} } => $number if $has_todo;
push @{ $self->{todo_passed} } => $number
if $test->todo_passed;
push @{ $self->{skipped} } => $number
if $test->has_skip;
push @{ $self->{ $test->is_ok ? 'passed' : 'failed' } } =>
$number;
push @{
$self->{
$test->is_actual_ok
? 'actual_passed'
: 'actual_failed'
}
} => $number;
},
},
yaml => { act => sub { }, },
);
# Each state contains a hash the keys of which match a token type. For
# each token
# type there may be:
# act A coderef to run
# goto The new state to move to. Stay in this state if
# missing
# continue Goto the new state and run the new state for the
# current token
%states = (
INIT => {
version => {
act => sub {
my ($version) = @_;
my $ver_num = $version->version;
if ( $ver_num <= $DEFAULT_TAP_VERSION ) {
my $ver_min = $DEFAULT_TAP_VERSION + 1;
$self->_add_error(
"Explicit TAP version must be at least "
. "$ver_min. Got version $ver_num" );
$ver_num = $DEFAULT_TAP_VERSION;
}
if ( $ver_num > $MAX_TAP_VERSION ) {
$self->_add_error(
"TAP specified version $ver_num but "
. "we don't know about versions later "
. "than $MAX_TAP_VERSION" );
$ver_num = $MAX_TAP_VERSION;
}
$self->version($ver_num);
$self->_grammar->set_version($ver_num);
},
goto => 'PLAN'
},
plan => { goto => 'PLANNED' },
test => { goto => 'UNPLANNED' },
},
PLAN => {
plan => { goto => 'PLANNED' },
test => { goto => 'UNPLANNED' },
},
PLANNED => {
test => { goto => 'PLANNED_AFTER_TEST' },
plan => {
act => sub {
my ($version) = @_;
$self->_add_error(
'More than one plan found in TAP output');
},
},
},
PLANNED_AFTER_TEST => {
test => { goto => 'PLANNED_AFTER_TEST' },
plan => { act => sub { }, continue => 'PLANNED' },
yaml => { goto => 'PLANNED' },
},
GOT_PLAN => {
test => {
act => sub {
my ($plan) = @_;
my $line = $self->plan;
$self->_add_error(
"Plan ($line) must be at the beginning "
. "or end of the TAP output" );
$self->is_good_plan(0);
},
continue => 'PLANNED'
},
plan => { continue => 'PLANNED' },
},
UNPLANNED => {
test => { goto => 'UNPLANNED_AFTER_TEST' },
plan => { goto => 'GOT_PLAN' },
},
UNPLANNED_AFTER_TEST => {
test => { act => sub { }, continue => 'UNPLANNED' },
plan => { act => sub { }, continue => 'UNPLANNED' },
yaml => { goto => 'PLANNED' },
},
);
# Apply globals and defaults to state table
for my $name ( keys %states ) {
# Merge with globals
my $st = { %state_globals, %{ $states{$name} } };
# Add defaults
for my $next ( sort keys %{$st} ) {
if ( my $default = $state_defaults{$next} ) {
for my $def ( sort keys %{$default} ) {
$st->{$next}->{$def} ||= $default->{$def};
}
}
}
# Stuff back in table
$states{$name} = $st;
}
return \%states;
}
=head3 C<get_select_handles>
Get an a list of file handles which can be passed to C<select> to
determine the readiness of this parser.
=cut
sub get_select_handles { shift->_stream->get_select_handles }
sub _grammar {
my $self = shift;
return $self->{_grammar} = shift if @_;
return $self->{_grammar} ||= $self->make_grammar(
{ stream => $self->_stream,
parser => $self,
version => $self->version
}
);
}
sub _iter {
my $self = shift;
my $stream = $self->_stream;
my $grammar = $self->_grammar;
my $spool = $self->_spool;
my $state = 'INIT';
my $state_table = $self->_make_state_table;
$self->start_time( $self->get_time );
# Make next_state closure
my $next_state = sub {
my $token = shift;
my $type = $token->type;
TRANS: {
my $state_spec = $state_table->{$state}
or die "Illegal state: $state";
if ( my $next = $state_spec->{$type} ) {
if ( my $act = $next->{act} ) {
$act->($token);
}
if ( my $cont = $next->{continue} ) {
$state = $cont;
redo TRANS;
}
elsif ( my $goto = $next->{goto} ) {
$state = $goto;
}
}
else {
confess("Unhandled token type: $type\n");
}
}
return $token;
};
# Handle end of stream - which means either pop a block or finish
my $end_handler = sub {
$self->exit( $stream->exit );
$self->wait( $stream->wait );
$self->_finish;
return;
};
# Finally make the closure that we return. For performance reasons
# there are two versions of the returned function: one that handles
# callbacks and one that does not.
if ( $self->_has_callbacks ) {
return sub {
my $result = eval { $grammar->tokenize };
$self->_add_error($@) if $@;
if ( defined $result ) {
$result = $next_state->($result);
if ( my $code = $self->_callback_for( $result->type ) ) {
$_->($result) for @{$code};
}
else {
$self->_make_callback( 'ELSE', $result );
}
$self->_make_callback( 'ALL', $result );
# Echo TAP to spool file
print {$spool} $result->raw, "\n" if $spool;
}
else {
$result = $end_handler->();
$self->_make_callback( 'EOF', $self )
unless defined $result;
}
return $result;
};
} # _has_callbacks
else {
return sub {
my $result = eval { $grammar->tokenize };
$self->_add_error($@) if $@;
if ( defined $result ) {
$result = $next_state->($result);
# Echo TAP to spool file
print {$spool} $result->raw, "\n" if $spool;
}
else {
$result = $end_handler->();
}
return $result;
};
} # no callbacks
}
sub _finish {
my $self = shift;
$self->end_time( $self->get_time );
# Avoid leaks
$self->_stream(undef);
$self->_grammar(undef);
# If we just delete the iter we won't get a fault if it's recreated.
# Instead we set it to a sub that returns an infinite
# stream of undef. This segfaults on 5.5.4, presumably because
# we're still executing the closure that gets replaced and it hasn't
# been protected with a refcount.
$self->{_iter} = sub {return}
if $] >= 5.006;
# sanity checks
if ( !$self->plan ) {
$self->_add_error('No plan found in TAP output');
}
else {
$self->is_good_plan(1) unless defined $self->is_good_plan;
}
if ( $self->tests_run != ( $self->tests_planned || 0 ) ) {
$self->is_good_plan(0);
if ( defined( my $planned = $self->tests_planned ) ) {
my $ran = $self->tests_run;
$self->_add_error(
"Bad plan. You planned $planned tests but ran $ran.");
}
}
if ( $self->tests_run != ( $self->passed + $self->failed ) ) {
# this should never happen
my $actual = $self->tests_run;
my $passed = $self->passed;
my $failed = $self->failed;
$self->_croak( "Panic: planned test count ($actual) did not equal "
. "sum of passed ($passed) and failed ($failed) tests!" );
}
$self->is_good_plan(0) unless defined $self->is_good_plan;
return $self;
}
=head3 C<delete_spool>
Delete and return the spool.
my $fh = $parser->delete_spool;
=cut
sub delete_spool {
my $self = shift;
return delete $self->{_spool};
}
##############################################################################
=head1 CALLBACKS
As mentioned earlier, a "callback" key may be added to the
C<TAP::Parser> constructor. If present, each callback corresponding to a
given result type will be called with the result as the argument if the
C<run> method is used. The callback is expected to be a subroutine
reference (or anonymous subroutine) which is invoked with the parser
result as its argument.
my %callbacks = (
test => \&test_callback,
plan => \&plan_callback,
comment => \&comment_callback,
bailout => \&bailout_callback,
unknown => \&unknown_callback,
);
my $aggregator = TAP::Parser::Aggregator->new;
foreach my $file ( @test_files ) {
my $parser = TAP::Parser->new(
{
source => $file,
callbacks => \%callbacks,
}
);
$parser->run;
$aggregator->add( $file, $parser );
}
Callbacks may also be added like this:
$parser->callback( test => \&test_callback );
$parser->callback( plan => \&plan_callback );
The following keys allowed for callbacks. These keys are case-sensitive.
=over 4
=item * C<test>
Invoked if C<< $result->is_test >> returns true.
=item * C<version>
Invoked if C<< $result->is_version >> returns true.
=item * C<plan>
Invoked if C<< $result->is_plan >> returns true.
=item * C<comment>
Invoked if C<< $result->is_comment >> returns true.
=item * C<bailout>
Invoked if C<< $result->is_unknown >> returns true.
=item * C<yaml>
Invoked if C<< $result->is_yaml >> returns true.
=item * C<unknown>
Invoked if C<< $result->is_unknown >> returns true.
=item * C<ELSE>
If a result does not have a callback defined for it, this callback will
be invoked. Thus, if all of the previous result types are specified as
callbacks, this callback will I<never> be invoked.
=item * C<ALL>
This callback will always be invoked and this will happen for each
result after one of the above callbacks is invoked. For example, if
L<Term::ANSIColor> is loaded, you could use the following to color your
test output:
my %callbacks = (
test => sub {
my $test = shift;
if ( $test->is_ok && not $test->directive ) {
# normal passing test
print color 'green';
}
elsif ( !$test->is_ok ) { # even if it's TODO
print color 'white on_red';
}
elsif ( $test->has_skip ) {
print color 'white on_blue';
}
elsif ( $test->has_todo ) {
print color 'white';
}
},
ELSE => sub {
# plan, comment, and so on (anything which isn't a test line)
print color 'black on_white';
},
ALL => sub {
# now print them
print shift->as_string;
print color 'reset';
print "\n";
},
);
=item * C<EOF>
Invoked when there are no more lines to be parsed. Since there is no
accompanying L<TAP::Parser::Result> object the C<TAP::Parser> object is
passed instead.
=back
=head1 TAP GRAMMAR
If you're looking for an EBNF grammar, see L<TAP::Parser::Grammar>.
=head1 BACKWARDS COMPATABILITY
The Perl-QA list attempted to ensure backwards compatability with
L<Test::Harness>. However, there are some minor differences.
=head2 Differences
=over 4
=item * TODO plans
A little-known feature of L<Test::Harness> is that it supported TODO
lists in the plan:
1..2 todo 2
ok 1 - We have liftoff
not ok 2 - Anti-gravity device activated
Under L<Test::Harness>, test number 2 would I<pass> because it was
listed as a TODO test on the plan line. However, we are not aware of
anyone actually using this feature and hard-coding test numbers is
discouraged because it's very easy to add a test and break the test
number sequence. This makes test suites very fragile. Instead, the
following should be used:
1..2
ok 1 - We have liftoff
not ok 2 - Anti-gravity device activated # TODO
=item * 'Missing' tests
It rarely happens, but sometimes a harness might encounter
'missing tests:
ok 1
ok 2
ok 15
ok 16
ok 17
L<Test::Harness> would report tests 3-14 as having failed. For the
C<TAP::Parser>, these tests are not considered failed because they've
never run. They're reported as parse failures (tests out of sequence).
=back
=head1 SUBCLASSING
If you find you need to provide custom functionality (as you would have using
L<Test::Harness::Straps>), you're in luck: C<TAP::Parser> and friends are
designed to be easily subclassed.
Before you start, it's important to know a few things:
=over 2
=item 1
All C<TAP::*> objects inherit from L<TAP::Object>.
=item 2
Most C<TAP::*> classes have a I<SUBCLASSING> section to guide you.
=item 3
Note that C<TAP::Parser> is designed to be the central 'maker' - ie: it is
responsible for creating new objects in the C<TAP::Parser::*> namespace.
This makes it possible for you to have a single point of configuring what
subclasses should be used, which in turn means that in many cases you'll find
you only need to sub-class one of the parser's components.
=item 4
By subclassing, you may end up overriding undocumented methods. That's not
a bad thing per se, but be forewarned that undocumented methods may change
without warning from one release to the next - we cannot guarantee backwards
compatability. If any I<documented> method needs changing, it will be
deprecated first, and changed in a later release.
=back
=head2 Parser Components
=head3 Sources
A TAP parser consumes input from a I<source>. There are currently two types
of sources: L<TAP::Parser::Source> for general non-perl commands, and
L<TAP::Parser::Source::Perl>. You can subclass both of them. You'll need to
customize your parser by setting the C<source_class> & C<perl_source_class>
parameters. See L</new> for more details.
If you need to customize the objects on creation, subclass L<TAP::Parser> and
override L</make_source> or L</make_perl_source>.
=head3 Iterators
A TAP parser uses I<iterators> to loop through the I<stream> provided by the
parser's I<source>. There are quite a few types of Iterators available.
Choosing which class to use is the responsibility of the I<iterator factory>.
To create your own iterators you'll have to subclass
L<TAP::Parser::IteratorFactory> and L<TAP::Parser::Iterator>. Then you'll
need to customize the class used by your parser by setting the
C<iterator_factory_class> parameter. See L</new> for more details.
If you need to customize the objects on creation, subclass L<TAP::Parser> and
override L</make_iterator>.
=head3 Results
A TAP parser creates L<TAP::Parser::Result>s as it iterates through the
input I<stream>. There are quite a few result types available; choosing
which class to use is the responsibility of the I<result factory>.
To create your own result types you have two options:
=over 2
=item option 1
Subclass L<TAP::Parser::Result> and register your new result type/class with
the default L<TAP::Parser::ResultFactory>.
=item option 2
Subclass L<TAP::Parser::ResultFactory> itself and implement your own
L<TAP::Parser::Result> creation logic. Then you'll need to customize the
class used by your parser by setting the C<result_factory_class> parameter.
See L</new> for more details.
=back
If you need to customize the objects on creation, subclass L<TAP::Parser> and
override L</make_result>.
=head3 Grammar
L<TAP::Parser::Grammar> is the heart of the parser - it tokenizes the TAP
input I<stream> and produces results. If you need to customize its behaviour
you should probably familiarize yourself with the source first. Enough
lecturing.
Subclass L<TAP::Parser::Grammar> and customize your parser by setting the
C<grammar_class> parameter. See L</new> for more details.
If you need to customize the objects on creation, subclass L<TAP::Parser> and
override L</make_grammar>
=head1 ACKNOWLEDGEMENTS
All of the following have helped. Bug reports, patches, (im)moral
support, or just words of encouragement have all been forthcoming.
=over 4
=item * Michael Schwern
=item * Andy Lester
=item * chromatic
=item * GEOFFR
=item * Shlomi Fish
=item * Torsten Schoenfeld
=item * Jerry Gay
=item * Aristotle
=item * Adam Kennedy
=item * Yves Orton
=item * Adrian Howard
=item * Sean & Lil
=item * Andreas J. Koenig
=item * Florian Ragwitz
=item * Corion
=item * Mark Stosberg
=item * Matt Kraai
=item * David Wheeler
=item * Alex Vandiver
=back
=head1 AUTHORS
Curtis "Ovid" Poe <ovid@cpan.org>
Andy Armstong <andy@hexten.net>
Eric Wilhelm @ <ewilhelm at cpan dot org>
Michael Peters <mpeters at plusthree dot com>
Leif Eriksen <leif dot eriksen at bigpond dot com>
Steve Purkis <spurkis@cpan.org>
Nicholas Clark <nick@ccl4.org>
=head1 BUGS
Please report any bugs or feature requests to
C<bug-test-harness@rt.cpan.org>, or through the web interface at
L<http://rt.cpan.org/NoAuth/ReportBug.html?Queue=Test-Harness>.
We will be notified, and then you'll automatically be notified of
progress on your bug as we make changes.
Obviously, bugs which include patches are best. If you prefer, you can
patch against bleed by via anonymous checkout of the latest version:
svn checkout http://svn.hexten.net/tapx
=head1 COPYRIGHT & LICENSE
Copyright 2006-2008 Curtis "Ovid" Poe, all rights reserved.
This program is free software; you can redistribute it and/or modify it
under the same terms as Perl itself.
=cut
1;
package TAP::Formatter::Base;
use strict;
use TAP::Base ();
use POSIX qw(strftime);
use vars qw($VERSION @ISA);
my $MAX_ERRORS = 5;
my %VALIDATION_FOR;
BEGIN {
@ISA = qw(TAP::Base);
%VALIDATION_FOR = (
directives => sub { shift; shift },
verbosity => sub { shift; shift },
normalize => sub { shift; shift },
timer => sub { shift; shift },
failures => sub { shift; shift },
comments => sub { shift; shift },
errors => sub { shift; shift },
color => sub { shift; shift },
jobs => sub { shift; shift },
show_count => sub { shift; shift },
stdout => sub {
my ( $self, $ref ) = @_;
$self->_croak("option 'stdout' needs a filehandle")
unless ( ref $ref || '' ) eq 'GLOB'
or eval { $ref->can('print') };
return $ref;
},
);
my @getter_setters = qw(
_longest
_printed_summary_header
_colorizer
);
__PACKAGE__->mk_methods( @getter_setters, keys %VALIDATION_FOR );
}
=head1 NAME
TAP::Formatter::Console - Harness output delegate for default console output
=head1 VERSION
Version 3.17
=cut
$VERSION = '3.17';
=head1 DESCRIPTION
This provides console orientated output formatting for TAP::Harness.
=head1 SYNOPSIS
use TAP::Formatter::Console;
my $harness = TAP::Formatter::Console->new( \%args );
=cut
sub _initialize {
my ( $self, $arg_for ) = @_;
$arg_for ||= {};
$self->SUPER::_initialize($arg_for);
my %arg_for = %$arg_for; # force a shallow copy
$self->verbosity(0);
for my $name ( keys %VALIDATION_FOR ) {
my $property = delete $arg_for{$name};
if ( defined $property ) {
my $validate = $VALIDATION_FOR{$name};
$self->$name( $self->$validate($property) );
}
}
if ( my @props = keys %arg_for ) {
$self->_croak(
"Unknown arguments to " . __PACKAGE__ . "::new (@props)" );
}
$self->stdout( \*STDOUT ) unless $self->stdout;
if ( $self->color ) {
require TAP::Formatter::Color;
$self->_colorizer( TAP::Formatter::Color->new );
}
return $self;
}
sub verbose { shift->verbosity >= 1 }
sub quiet { shift->verbosity <= -1 }
sub really_quiet { shift->verbosity <= -2 }
sub silent { shift->verbosity <= -3 }
=head1 METHODS
=head2 Class Methods
=head3 C<new>
my %args = (
verbose => 1,
)
my $harness = TAP::Formatter::Console->new( \%args );
The constructor returns a new C<TAP::Formatter::Console> object. If
a L<TAP::Harness> is created with no C<formatter> a
C<TAP::Formatter::Console> is automatically created. If any of the
following options were given to TAP::Harness->new they well be passed to
this constructor which accepts an optional hashref whose allowed keys are:
=over 4
=item * C<verbosity>
Set the verbosity level.
=item * C<verbose>
Printing individual test results to STDOUT.
=item * C<timer>
Append run time for each test to output. Uses L<Time::HiRes> if available.
=item * C<failures>
Show test failures (this is a no-op if C<verbose> is selected).
=item * C<comments>
Show test comments (this is a no-op if C<verbose> is selected).
=item * C<quiet>
Suppressing some test output (mostly failures while tests are running).
=item * C<really_quiet>
Suppressing everything but the tests summary.
=item * C<silent>
Suppressing all output.
=item * C<errors>
If parse errors are found in the TAP output, a note of this will be made
in the summary report. To see all of the parse errors, set this argument to
true:
errors => 1
=item * C<directives>
If set to a true value, only test results with directives will be displayed.
This overrides other settings such as C<verbose>, C<failures>, or C<comments>.
=item * C<stdout>
A filehandle for catching standard output.
=item * C<color>
If defined specifies whether color output is desired. If C<color> is not
defined it will default to color output if color support is available on
the current platform and output is not being redirected.
=item * C<jobs>
The number of concurrent jobs this formatter will handle.
=item * C<show_count>
Boolean value. If false, disables the C<X/Y> test count which shows up while
tests are running.
=back
Any keys for which the value is C<undef> will be ignored.
=cut
# new supplied by TAP::Base
=head3 C<prepare>
Called by Test::Harness before any test output is generated.
This is an advisory and may not be called in the case where tests are
being supplied to Test::Harness by an iterator.
=cut
sub prepare {
my ( $self, @tests ) = @_;
my $longest = 0;
foreach my $test (@tests) {
$longest = length $test if length $test > $longest;
}
$self->_longest($longest);
}
sub _format_now { strftime "[%H:%M:%S]", localtime }
sub _format_name {
my ( $self, $test ) = @_;
my $name = $test;
my $periods = '.' x ( $self->_longest + 2 - length $test );
$periods = " $periods ";
if ( $self->timer ) {
my $stamp = $self->_format_now();
return "$stamp $name$periods";
}
else {
return "$name$periods";
}
}
=head3 C<open_test>
Called to create a new test session. A test session looks like this:
my $session = $formatter->open_test( $test, $parser );
while ( defined( my $result = $parser->next ) ) {
$session->result($result);
exit 1 if $result->is_bailout;
}
$session->close_test;
=cut
sub open_test {
die "Unimplemented.";
}
sub _output_success {
my ( $self, $msg ) = @_;
$self->_output($msg);
}
=head3 C<summary>
$harness->summary( $aggregate );
C<summary> prints the summary report after all tests are run. The argument is
an aggregate.
=cut
sub summary {
my ( $self, $aggregate ) = @_;
return if $self->silent;
my @t = $aggregate->descriptions;
my $tests = \@t;
my $runtime = $aggregate->elapsed_timestr;
my $total = $aggregate->total;
my $passed = $aggregate->passed;
if ( $self->timer ) {
$self->_output( $self->_format_now(), "\n" );
}
# TODO: Check this condition still works when all subtests pass but
# the exit status is nonzero
if ( $aggregate->all_passed ) {
$self->_output_success("All tests successful.\n");
}
# ~TODO option where $aggregate->skipped generates reports
if ( $total != $passed or $aggregate->has_problems ) {
$self->_output("\nTest Summary Report");
$self->_output("\n-------------------\n");
foreach my $test (@$tests) {
$self->_printed_summary_header(0);
my ($parser) = $aggregate->parsers($test);
$self->_output_summary_failure(
'failed',
[ ' Failed test: ', ' Failed tests: ' ],
$test, $parser
);
$self->_output_summary_failure(
'todo_passed',
" TODO passed: ", $test, $parser
);
# ~TODO this cannot be the default
#$self->_output_summary_failure( 'skipped', " Tests skipped: " );
if ( my $exit = $parser->exit ) {
$self->_summary_test_header( $test, $parser );
$self->_failure_output(" Non-zero exit status: $exit\n");
}
elsif ( my $wait = $parser->wait ) {
$self->_summary_test_header( $test, $parser );
$self->_failure_output(" Non-zero wait status: $wait\n");
}
if ( my @errors = $parser->parse_errors ) {
my $explain;
if ( @errors > $MAX_ERRORS && !$self->errors ) {
$explain
= "Displayed the first $MAX_ERRORS of "
. scalar(@errors)
. " TAP syntax errors.\n"
. "Re-run prove with the -p option to see them all.\n";
splice @errors, $MAX_ERRORS;
}
$self->_summary_test_header( $test, $parser );
$self->_failure_output(
sprintf " Parse errors: %s\n",
shift @errors
);
foreach my $error (@errors) {
my $spaces = ' ' x 16;
$self->_failure_output("$spaces$error\n");
}
$self->_failure_output($explain) if $explain;
}
}
}
my $files = @$tests;
$self->_output("Files=$files, Tests=$total, $runtime\n");
my $status = $aggregate->get_status;
$self->_output("Result: $status\n");
}
sub _output_summary_failure {
my ( $self, $method, $name, $test, $parser ) = @_;
# ugly hack. Must rethink this :(
my $output = $method eq 'failed' ? '_failure_output' : '_output';
if ( my @r = $parser->$method() ) {
$self->_summary_test_header( $test, $parser );
my ( $singular, $plural )
= 'ARRAY' eq ref $name ? @$name : ( $name, $name );
$self->$output( @r == 1 ? $singular : $plural );
my @results = $self->_balanced_range( 40, @r );
$self->$output( sprintf "%s\n" => shift @results );
my $spaces = ' ' x 16;
while (@results) {
$self->$output( sprintf "$spaces%s\n" => shift @results );
}
}
}
sub _summary_test_header {
my ( $self, $test, $parser ) = @_;
return if $self->_printed_summary_header;
my $spaces = ' ' x ( $self->_longest - length $test );
$spaces = ' ' unless $spaces;
my $output = $self->_get_output_method($parser);
$self->$output(
sprintf "$test$spaces(Wstat: %d Tests: %d Failed: %d)\n",
$parser->wait, $parser->tests_run, scalar $parser->failed
);
$self->_printed_summary_header(1);
}
sub _output {
my $self = shift;
print { $self->stdout } @_;
}
sub _failure_output {
my $self = shift;
$self->_output(@_);
}
sub _balanced_range {
my ( $self, $limit, @range ) = @_;
@range = $self->_range(@range);
my $line = "";
my @lines;
my $curr = 0;
while (@range) {
if ( $curr < $limit ) {
my $range = ( shift @range ) . ", ";
$line .= $range;
$curr += length $range;
}
elsif (@range) {
$line =~ s/, $//;
push @lines => $line;
$line = '';
$curr = 0;
}
}
if ($line) {
$line =~ s/, $//;
push @lines => $line;
}
return @lines;
}
sub _range {
my ( $self, @numbers ) = @_;
# shouldn't be needed, but subclasses might call this
@numbers = sort { $a <=> $b } @numbers;
my ( $min, @range );
foreach my $i ( 0 .. $#numbers ) {
my $num = $numbers[$i];
my $next = $numbers[ $i + 1 ];
if ( defined $next && $next == $num + 1 ) {
if ( !defined $min ) {
$min = $num;
}
}
elsif ( defined $min ) {
push @range => "$min-$num";
undef $min;
}
else {
push @range => $num;
}
}
return @range;
}
sub _get_output_method {
my ( $self, $parser ) = @_;
return $parser->has_problems ? '_failure_output' : '_output';
}
1;
package TAP::Formatter::Color;
use strict;
use vars qw($VERSION @ISA);
use constant IS_WIN32 => ( $^O =~ /^(MS)?Win32$/ );
@ISA = qw(TAP::Object);
my $NO_COLOR;
BEGIN {
$NO_COLOR = 0;
if (IS_WIN32) {
eval 'use Win32::Console';
if ($@) {
$NO_COLOR = $@;
}
else {
my $console = Win32::Console->new( STD_OUTPUT_HANDLE() );
# eval here because we might not know about these variables
my $fg = eval '$FG_LIGHTGRAY';
my $bg = eval '$BG_BLACK';
*set_color = sub {
my ( $self, $output, $color ) = @_;
my $var;
if ( $color eq 'reset' ) {
$fg = eval '$FG_LIGHTGRAY';
$bg = eval '$BG_BLACK';
}
elsif ( $color =~ /^on_(.+)$/ ) {
$bg = eval '$BG_' . uc($1);
}
else {
$fg = eval '$FG_' . uc($color);
}
# In case of colors that aren't defined
$self->set_color('reset')
unless defined $bg && defined $fg;
$console->Attr( $bg | $fg );
};
}
}
else {
eval 'use Term::ANSIColor';
if ($@) {
$NO_COLOR = $@;
}
else {
*set_color = sub {
my ( $self, $output, $color ) = @_;
$output->( color($color) );
};
}
}
if ($NO_COLOR) {
*set_color = sub { };
}
}
=head1 NAME
TAP::Formatter::Color - Run Perl test scripts with color
=head1 VERSION
Version 3.17
=cut
$VERSION = '3.17';
=head1 DESCRIPTION
Note that this harness is I<experimental>. You may not like the colors I've
chosen and I haven't yet provided an easy way to override them.
This test harness is the same as L<TAP::Harness>, but test results are output
in color. Passing tests are printed in green. Failing tests are in red.
Skipped tests are blue on a white background and TODO tests are printed in
white.
If L<Term::ANSIColor> cannot be found (or L<Win32::Console> if running
under Windows) tests will be run without color.
=head1 SYNOPSIS
use TAP::Formatter::Color;
my $harness = TAP::Formatter::Color->new( \%args );
$harness->runtests(@tests);
=head1 METHODS
=head2 Class Methods
=head3 C<new>
The constructor returns a new C<TAP::Formatter::Color> object. If
L<Term::ANSIColor> is not installed, returns undef.
=cut
# new() implementation supplied by TAP::Object
sub _initialize {
my $self = shift;
if ($NO_COLOR) {
# shorten that message a bit
( my $error = $NO_COLOR ) =~ s/ in \@INC .*//s;
warn "Note: Cannot run tests in color: $error\n";
return; # abort object construction
}
return $self;
}
##############################################################################
=head3 C<can_color>
Test::Formatter::Color->can_color()
Returns a boolean indicating whether or not this module can actually
generate colored output. This will be false if it could not load the
modules needed for the current platform.
=cut
sub can_color {
return !$NO_COLOR;
}
=head3 C<set_color>
Set the output color.
=cut
1;
package TAP::Formatter::Console;
use strict;
use TAP::Formatter::Base ();
use POSIX qw(strftime);
use vars qw($VERSION @ISA);
@ISA = qw(TAP::Formatter::Base);
=head1 NAME
TAP::Formatter::Console - Harness output delegate for default console output
=head1 VERSION
Version 3.17
=cut
$VERSION = '3.17';
=head1 DESCRIPTION
This provides console orientated output formatting for TAP::Harness.
=head1 SYNOPSIS
use TAP::Formatter::Console;
my $harness = TAP::Formatter::Console->new( \%args );
=head2 C<< open_test >>
See L<TAP::Formatter::base>
=cut
sub open_test {
my ( $self, $test, $parser ) = @_;
my $class
= $self->jobs > 1
? 'TAP::Formatter::Console::ParallelSession'
: 'TAP::Formatter::Console::Session';
eval "require $class";
$self->_croak($@) if $@;
my $session = $class->new(
{ name => $test,
formatter => $self,
parser => $parser,
show_count => $self->show_count,
}
);
$session->header;
return $session;
}
# Use _colorizer delegate to set output color. NOP if we have no delegate
sub _set_colors {
my ( $self, @colors ) = @_;
if ( my $colorizer = $self->_colorizer ) {
my $output_func = $self->{_output_func} ||= sub {
$self->_output(@_);
};
$colorizer->set_color( $output_func, $_ ) for @colors;
}
}
sub _output_success {
my ( $self, $msg ) = @_;
$self->_set_colors('green');
$self->_output($msg);
$self->_set_colors('reset');
}
sub _failure_output {
my $self = shift;
$self->_set_colors('red');
my $out = join '', @_;
my $has_newline = chomp $out;
$self->_output($out);
$self->_set_colors('reset');
$self->_output($/)
if $has_newline;
}
1;
package TAP::Formatter::File;
use strict;
use TAP::Formatter::Base ();
use TAP::Formatter::File::Session;
use POSIX qw(strftime);
use vars qw($VERSION @ISA);
@ISA = qw(TAP::Formatter::Base);
=head1 NAME
TAP::Formatter::File - Harness output delegate for file output
=head1 VERSION
Version 3.17
=cut
$VERSION = '3.17';
=head1 DESCRIPTION
This provides file orientated output formatting for TAP::Harness.
=head1 SYNOPSIS
use TAP::Formatter::File;
my $harness = TAP::Formatter::File->new( \%args );
=head2 C<< open_test >>
See L<TAP::Formatter::base>
=cut
sub open_test {
my ( $self, $test, $parser ) = @_;
my $session = TAP::Formatter::File::Session->new(
{ name => $test,
formatter => $self,
parser => $parser,
}
);
$session->header;
return $session;
}
sub _should_show_count {
return 0;
}
1;
package TAP::Formatter::Session;
use strict;
use TAP::Base;
use vars qw($VERSION @ISA);
@ISA = qw(TAP::Base);
my @ACCESSOR;
BEGIN {
@ACCESSOR = qw( name formatter parser show_count );
for my $method (@ACCESSOR) {
no strict 'refs';
*$method = sub { shift->{$method} };
}
}
=head1 NAME
TAP::Formatter::Session - Abstract base class for harness output delegate
=head1 VERSION
Version 3.17
=cut
$VERSION = '3.17';
=head1 METHODS
=head2 Class Methods
=head3 C<new>
my %args = (
formatter => $self,
)
my $harness = TAP::Formatter::Console::Session->new( \%args );
The constructor returns a new C<TAP::Formatter::Console::Session> object.
=over 4
=item * C<formatter>
=item * C<parser>
=item * C<name>
=item * C<show_count>
=back
=cut
sub _initialize {
my ( $self, $arg_for ) = @_;
$arg_for ||= {};
$self->SUPER::_initialize($arg_for);
my %arg_for = %$arg_for; # force a shallow copy
for my $name (@ACCESSOR) {
$self->{$name} = delete $arg_for{$name};
}
if ( !defined $self->show_count ) {
$self->{show_count} = 1; # defaults to true
}
if ( $self->show_count ) { # but may be a damned lie!
$self->{show_count} = $self->_should_show_count;
}
if ( my @props = sort keys %arg_for ) {
$self->_croak(
"Unknown arguments to " . __PACKAGE__ . "::new (@props)" );
}
return $self;
}
=head3 C<header>
Output test preamble
=head3 C<result>
Called by the harness for each line of TAP it receives.
=head3 C<close_test>
Called to close a test session.
=head3 C<clear_for_close>
Called by C<close_test> to clear the line showing test progress, or the parallel
test ruler, prior to printing the final test result.
=cut
sub header { }
sub result { }
sub close_test { }
sub clear_for_close { }
sub _should_show_count {
my $self = shift;
return
!$self->formatter->verbose
&& -t $self->formatter->stdout
&& !$ENV{HARNESS_NOTTY};
}
sub _format_for_output {
my ( $self, $result ) = @_;
return $self->formatter->normalize ? $result->as_string : $result->raw;
}
sub _output_test_failure {
my ( $self, $parser ) = @_;
my $formatter = $self->formatter;
return if $formatter->really_quiet;
my $tests_run = $parser->tests_run;
my $tests_planned = $parser->tests_planned;
my $total
= defined $tests_planned
? $tests_planned
: $tests_run;
my $passed = $parser->passed;
# The total number of fails includes any tests that were planned but
# didn't run
my $failed = $parser->failed + $total - $tests_run;
my $exit = $parser->exit;
if ( my $exit = $parser->exit ) {
my $wstat = $parser->wait;
my $status = sprintf( "%d (wstat %d, 0x%x)", $exit, $wstat, $wstat );
$formatter->_failure_output("Dubious, test returned $status\n");
}
if ( $failed == 0 ) {
$formatter->_failure_output(
$total
? "All $total subtests passed "
: 'No subtests run '
);
}
else {
$formatter->_failure_output("Failed $failed/$total subtests ");
if ( !$total ) {
$formatter->_failure_output("\nNo tests run!");
}
}
if ( my $skipped = $parser->skipped ) {
$passed -= $skipped;
my $test = 'subtest' . ( $skipped != 1 ? 's' : '' );
$formatter->_output(
"\n\t(less $skipped skipped $test: $passed okay)");
}
if ( my $failed = $parser->todo_passed ) {
my $test = $failed > 1 ? 'tests' : 'test';
$formatter->_output(
"\n\t($failed TODO $test unexpectedly succeeded)");
}
$formatter->_output("\n");
}
1;
package TAP::Formatter::Console::ParallelSession;
use strict;
use File::Spec;
use File::Path;
use TAP::Formatter::Console::Session;
use Carp;
use constant WIDTH => 72; # Because Eric says
use vars qw($VERSION @ISA);
@ISA = qw(TAP::Formatter::Console::Session);
my %shared;
sub _initialize {
my ( $self, $arg_for ) = @_;
$self->SUPER::_initialize($arg_for);
my $formatter = $self->formatter;
# Horrid bodge. This creates our shared context per harness. Maybe
# TAP::Harness should give us this?
my $context = $shared{$formatter} ||= $self->_create_shared_context;
push @{ $context->{active} }, $self;
return $self;
}
sub _create_shared_context {
my $self = shift;
return {
active => [],
tests => 0,
fails => 0,
};
}
=head1 NAME
TAP::Formatter::Console::ParallelSession - Harness output delegate for parallel
console output
=head1 VERSION
Version 3.17
=cut
$VERSION = '3.17';
=head1 DESCRIPTION
This provides console orientated output formatting for L<TAP::Harness>
when run with multiple L<TAP::Harness/jobs>.
=head1 SYNOPSIS
=cut
=head1 METHODS
=head2 Class Methods
=head3 C<header>
Output test preamble
=cut
sub header {
}
sub _clear_ruler {
my $self = shift;
$self->formatter->_output( "\r" . ( ' ' x WIDTH ) . "\r" );
}
my $now = 0;
my $start;
my $trailer = '... )===';
my $chop_length = WIDTH - length $trailer;
sub _output_ruler {
my ( $self, $refresh ) = @_;
my $new_now = time;
return if $new_now == $now and !$refresh;
$now = $new_now;
$start ||= $now;
my $formatter = $self->formatter;
return if $formatter->really_quiet;
my $context = $shared{$formatter};
my $ruler = sprintf '===( %7d;%d ', $context->{tests}, $now - $start;
foreach my $active ( @{ $context->{active} } ) {
my $parser = $active->parser;
my $tests = $parser->tests_run;
my $planned = $parser->tests_planned || '?';
$ruler .= sprintf '%' . length($planned) . "d/$planned ", $tests;
}
chop $ruler; # Remove a trailing space
$ruler .= ')===';
if ( length $ruler > WIDTH ) {
$ruler =~ s/(.{$chop_length}).*/$1$trailer/o;
}
else {
$ruler .= '=' x ( WIDTH - length($ruler) );
}
$formatter->_output("\r$ruler");
}
=head3 C<result>
Called by the harness for each line of TAP it receives .
=cut
sub result {
my ( $self, $result ) = @_;
my $formatter = $self->formatter;
# my $really_quiet = $formatter->really_quiet;
# my $show_count = $self->_should_show_count;
if ( $result->is_test ) {
my $context = $shared{$formatter};
$context->{tests}++;
my $active = $context->{active};
if ( @$active == 1 ) {
# There is only one test, so use the serial output format.
return $self->SUPER::result($result);
}
$self->_output_ruler( $self->parser->tests_run == 1 );
}
elsif ( $result->is_bailout ) {
$formatter->_failure_output(
"Bailout called. Further testing stopped: "
. $result->explanation
. "\n" );
}
}
=head3 C<clear_for_close>
=cut
sub clear_for_close {
my $self = shift;
my $formatter = $self->formatter;
return if $formatter->really_quiet;
my $context = $shared{$formatter};
if ( @{ $context->{active} } == 1 ) {
$self->SUPER::clear_for_close;
}
else {
$self->_clear_ruler;
}
}
=head3 C<close_test>
=cut
sub close_test {
my $self = shift;
my $name = $self->name;
my $parser = $self->parser;
my $formatter = $self->formatter;
my $context = $shared{$formatter};
$self->SUPER::close_test;
my $active = $context->{active};
my @pos = grep { $active->[$_]->name eq $name } 0 .. $#$active;
die "Can't find myself" unless @pos;
splice @$active, $pos[0], 1;
if ( @$active > 1 ) {
$self->_output_ruler(1);
}
elsif ( @$active == 1 ) {
# Print out "test/name.t ...."
$active->[0]->SUPER::header;
}
else {
# $self->formatter->_output("\n");
delete $shared{$formatter};
}
}
1;
package TAP::Formatter::Console::Session;
use strict;
use TAP::Formatter::Session;
use vars qw($VERSION @ISA);
@ISA = qw(TAP::Formatter::Session);
my @ACCESSOR;
BEGIN {
my @CLOSURE_BINDING = qw( header result clear_for_close close_test );
for my $method (@CLOSURE_BINDING) {
no strict 'refs';
*$method = sub {
my $self = shift;
return ( $self->{_closures} ||= $self->_closures )->{$method}
->(@_);
};
}
}
=head1 NAME
TAP::Formatter::Console::Session - Harness output delegate for default console o
utput
=head1 VERSION
Version 3.17
=cut
$VERSION = '3.17';
=head1 DESCRIPTION
This provides console orientated output formatting for TAP::Harness.
=cut
sub _get_output_result {
my $self = shift;
my @color_map = (
{ test => sub { $_->is_test && !$_->is_ok },
colors => ['red'],
},
{ test => sub { $_->is_test && $_->has_skip },
colors => [
'white',
'on_blue'
],
},
{ test => sub { $_->is_test && $_->has_todo },
colors => ['yellow'],
},
);
my $formatter = $self->formatter;
my $parser = $self->parser;
return $formatter->_colorizer
? sub {
my $result = shift;
for my $col (@color_map) {
local $_ = $result;
if ( $col->{test}->() ) {
$formatter->_set_colors( @{ $col->{colors} } );
last;
}
}
$formatter->_output( $self->_format_for_output($result) );
$formatter->_set_colors('reset');
}
: sub {
$formatter->_output( $self->_format_for_output(shift) );
};
}
sub _closures {
my $self = shift;
my $parser = $self->parser;
my $formatter = $self->formatter;
my $pretty = $formatter->_format_name( $self->name );
my $show_count = $self->show_count;
my $really_quiet = $formatter->really_quiet;
my $quiet = $formatter->quiet;
my $verbose = $formatter->verbose;
my $directives = $formatter->directives;
my $failures = $formatter->failures;
my $comments = $formatter->comments;
my $output_result = $self->_get_output_result;
my $output = '_output';
my $plan = '';
my $newline_printed = 0;
my $last_status_printed = 0;
return {
header => sub {
$formatter->_output($pretty)
unless $really_quiet;
},
result => sub {
my $result = shift;
if ( $result->is_bailout ) {
$formatter->_failure_output(
"Bailout called. Further testing stopped: "
. $result->explanation
. "\n" );
}
return if $really_quiet;
my $is_test = $result->is_test;
# These are used in close_test - but only if $really_quiet
# is false - so it's safe to only set them here unless that
# relationship changes.
if ( !$plan ) {
my $planned = $parser->tests_planned || '?';
$plan = "/$planned ";
}
$output = $formatter->_get_output_method($parser);
if ( $show_count and $is_test ) {
my $number = $result->number;
my $now = CORE::time;
# Print status roughly once per second.
# We will always get the first number as a side effect of
# $last_status_printed starting with the value 0, which $now
# will never be. (Unless someone sets their clock to 1970)
if ( $last_status_printed != $now ) {
$formatter->$output("\r$pretty$number$plan");
$last_status_printed = $now;
}
}
if (!$quiet
&& ( $verbose
|| ( $is_test && $failures && !$result->is_ok )
|| ( $comments && $result->is_comment )
|| ( $directives && $result->has_directive ) )
)
{
unless ($newline_printed) {
$formatter->_output("\n");
$newline_printed = 1;
}
$output_result->($result);
$formatter->_output("\n");
}
},
clear_for_close => sub {
my $spaces
= ' ' x length( '.' . $pretty . $plan . $parser->tests_run );
$formatter->$output("\r$spaces");
},
close_test => sub {
if ( $show_count && !$really_quiet ) {
$self->clear_for_close;
$formatter->$output("\r$pretty");
}
# Avoid circular references
$self->parser(undef);
$self->{_closures} = {};
return if $really_quiet;
if ( my $skip_all = $parser->skip_all ) {
$formatter->_output("skipped: $skip_all\n");
}
elsif ( $parser->has_problems ) {
$self->_output_test_failure($parser);
}
else {
my $time_report = '';
if ( $formatter->timer ) {
my $start_time = $parser->start_time;
my $end_time = $parser->end_time;
if ( defined $start_time and defined $end_time ) {
my $elapsed = $end_time - $start_time;
$time_report
= $self->time_is_hires
? sprintf( ' %8d ms', $elapsed * 1000 )
: sprintf( ' %8s s', $elapsed || '<1' );
}
}
$formatter->_output("ok$time_report\n");
}
},
};
}
=head2 C<< clear_for_close >>
=head2 C<< close_test >>
=head2 C<< header >>
=head2 C<< result >>
=cut
1;
package TAP::Formatter::File::Session;
use strict;
use TAP::Formatter::Session;
use vars qw($VERSION @ISA);
@ISA = qw(TAP::Formatter::Session);
=head1 NAME
TAP::Formatter::File::Session - Harness output delegate for file output
=head1 VERSION
Version 3.17
=cut
$VERSION = '3.17';
=head1 DESCRIPTION
This provides file orientated output formatting for L<TAP::Harness>.
It is particularly important when running with parallel tests, as it
ensures that test results are not interleaved, even when run
verbosely.
=cut
=head1 METHODS
=head2 result
Stores results for later output, all together.
=cut
sub result {
my $self = shift;
my $result = shift;
my $parser = $self->parser;
my $formatter = $self->formatter;
if ( $result->is_bailout ) {
$formatter->_failure_output(
"Bailout called. Further testing stopped: "
. $result->explanation
. "\n" );
return;
}
if (!$formatter->quiet
&& ( $formatter->verbose
|| ( $result->is_test && $formatter->failures && !$result->is_ok )
|| ( $formatter->comments && $result->is_comment )
|| ( $result->has_directive && $formatter->directives ) )
)
{
$self->{results} .= $self->_format_for_output($result) . "\n";
}
}
=head2 close_test
When the test file finishes, outputs the summary, together.
=cut
sub close_test {
my $self = shift;
# Avoid circular references
$self->parser(undef);
my $parser = $self->parser;
my $formatter = $self->formatter;
my $pretty = $formatter->_format_name( $self->name );
return if $formatter->really_quiet;
if ( my $skip_all = $parser->skip_all ) {
$formatter->_output( $pretty . "skipped: $skip_all\n" );
}
elsif ( $parser->has_problems ) {
$formatter->_output(
$pretty . ( $self->{results} ? "\n" . $self->{results} : "\n" ) );
$self->_output_test_failure($parser);
}
else {
my $time_report = '';
if ( $formatter->timer ) {
my $start_time = $parser->start_time;
my $end_time = $parser->end_time;
if ( defined $start_time and defined $end_time ) {
my $elapsed = $end_time - $start_time;
$time_report
= $self->time_is_hires
? sprintf( ' %8d ms', $elapsed * 1000 )
: sprintf( ' %8s s', $elapsed || '<1' );
}
}
$formatter->_output( $pretty
. ( $self->{results} ? "\n" . $self->{results} : "" )
. "ok$time_report\n" );
}
}
1;
package TAP::Parser::Aggregator;
use strict;
use Benchmark;
use vars qw($VERSION @ISA);
use TAP::Object ();
@ISA = qw(TAP::Object);
=head1 NAME
TAP::Parser::Aggregator - Aggregate TAP::Parser results
=head1 VERSION
Version 3.17
=cut
$VERSION = '3.17';
=head1 SYNOPSIS
use TAP::Parser::Aggregator;
my $aggregate = TAP::Parser::Aggregator->new;
$aggregate->add( 't/00-load.t', $load_parser );
$aggregate->add( 't/10-lex.t', $lex_parser );
my $summary = <<'END_SUMMARY';
Passed: %s
Failed: %s
Unexpectedly succeeded: %s
END_SUMMARY
printf $summary,
scalar $aggregate->passed,
scalar $aggregate->failed,
scalar $aggregate->todo_passed;
=head1 DESCRIPTION
C<TAP::Parser::Aggregator> collects parser objects and allows
reporting/querying their aggregate results.
=head1 METHODS
=head2 Class Methods
=head3 C<new>
my $aggregate = TAP::Parser::Aggregator->new;
Returns a new C<TAP::Parser::Aggregator> object.
=cut
# new() implementation supplied by TAP::Object
my %SUMMARY_METHOD_FOR;
BEGIN { # install summary methods
%SUMMARY_METHOD_FOR = map { $_ => $_ } qw(
failed
parse_errors
passed
skipped
todo
todo_passed
total
wait
exit
);
$SUMMARY_METHOD_FOR{total} = 'tests_run';
$SUMMARY_METHOD_FOR{planned} = 'tests_planned';
foreach my $method ( keys %SUMMARY_METHOD_FOR ) {
next if 'total' eq $method;
no strict 'refs';
*$method = sub {
my $self = shift;
return wantarray
? @{ $self->{"descriptions_for_$method"} }
: $self->{$method};
};
}
} # end install summary methods
sub _initialize {
my ($self) = @_;
$self->{parser_for} = {};
$self->{parse_order} = [];
foreach my $summary ( keys %SUMMARY_METHOD_FOR ) {
$self->{$summary} = 0;
next if 'total' eq $summary;
$self->{"descriptions_for_$summary"} = [];
}
return $self;
}
##############################################################################
=head2 Instance Methods
=head3 C<add>
$aggregate->add( $description => $parser );
The C<$description> is usually a test file name (but only by
convention.) It is used as a unique identifier (see e.g.
L<"parsers">.) Reusing a description is a fatal error.
The C<$parser> is a L<TAP::Parser|TAP::Parser> object.
=cut
sub add {
my ( $self, $description, $parser ) = @_;
if ( exists $self->{parser_for}{$description} ) {
$self->_croak( "You already have a parser for ($description)."
. " Perhaps you have run the same test twice." );
}
push @{ $self->{parse_order} } => $description;
$self->{parser_for}{$description} = $parser;
while ( my ( $summary, $method ) = each %SUMMARY_METHOD_FOR ) {
# Slightly nasty. Instead we should maybe have 'cooked' accessors
# for results that may be masked by the parser.
next
if ( $method eq 'exit' || $method eq 'wait' )
&& $parser->ignore_exit;
if ( my $count = $parser->$method() ) {
$self->{$summary} += $count;
push @{ $self->{"descriptions_for_$summary"} } => $description;
}
}
return $self;
}
##############################################################################
=head3 C<parsers>
my $count = $aggregate->parsers;
my @parsers = $aggregate->parsers;
my @parsers = $aggregate->parsers(@descriptions);
In scalar context without arguments, this method returns the number of parsers
aggregated. In list context without arguments, returns the parsers in the
order they were added.
If C<@descriptions> is given, these correspond to the keys used in each
call to the add() method. Returns an array of the requested parsers (in
the requested order) in list context or an array reference in scalar
context.
Requesting an unknown identifier is a fatal error.
=cut
sub parsers {
my $self = shift;
return $self->_get_parsers(@_) if @_;
my $descriptions = $self->{parse_order};
my @parsers = @{ $self->{parser_for} }{@$descriptions};
# Note: Because of the way context works, we must assign the parsers to
# the @parsers array or else this method does not work as documented.
return @parsers;
}
sub _get_parsers {
my ( $self, @descriptions ) = @_;
my @parsers;
foreach my $description (@descriptions) {
$self->_croak("A parser for ($description) could not be found")
unless exists $self->{parser_for}{$description};
push @parsers => $self->{parser_for}{$description};
}
return wantarray ? @parsers : \@parsers;
}
=head3 C<descriptions>
Get an array of descriptions in the order in which they were added to
the aggregator.
=cut
sub descriptions { @{ shift->{parse_order} || [] } }
=head3 C<start>
Call C<start> immediately before adding any results to the aggregator.
Among other times it records the start time for the test run.
=cut
sub start {
my $self = shift;
$self->{start_time} = Benchmark->new;
}
=head3 C<stop>
Call C<stop> immediately after adding all test results to the aggregator.
=cut
sub stop {
my $self = shift;
$self->{end_time} = Benchmark->new;
}
=head3 C<elapsed>
Elapsed returns a L<Benchmark> object that represents the running time
of the aggregated tests. In order for C<elapsed> to be valid you must
call C<start> before running the tests and C<stop> immediately
afterwards.
=cut
sub elapsed {
my $self = shift;
require Carp;
Carp::croak
q{Can't call elapsed without first calling start and then stop}
unless defined $self->{start_time} && defined $self->{end_time};
return timediff( $self->{end_time}, $self->{start_time} );
}
=head3 C<elapsed_timestr>
Returns a formatted string representing the runtime returned by
C<elapsed()>. This lets the caller not worry about Benchmark.
=cut
sub elapsed_timestr {
my $self = shift;
my $elapsed = $self->elapsed;
return timestr($elapsed);
}
=head3 C<all_passed>
Return true if all the tests passed and no parse errors were detected.
=cut
sub all_passed {
my $self = shift;
return
$self->total
&& $self->total == $self->passed
&& !$self->has_errors;
}
=head3 C<get_status>
Get a single word describing the status of the aggregated tests.
Depending on the outcome of the tests returns 'PASS', 'FAIL' or
'NOTESTS'. This token is understood by L<CPAN::Reporter>.
=cut
sub get_status {
my $self = shift;
my $total = $self->total;
my $passed = $self->passed;
return
( $self->has_errors || $total != $passed ) ? 'FAIL'
: $total ? 'PASS'
: 'NOTESTS';
}
##############################################################################
=head2 Summary methods
Each of the following methods will return the total number of corresponding
tests if called in scalar context. If called in list context, returns the
descriptions of the parsers which contain the corresponding tests (see C<add>
for an explanation of description.
=over 4
=item * failed
=item * parse_errors
=item * passed
=item * planned
=item * skipped
=item * todo
=item * todo_passed
=item * wait
=item * exit
=back
For example, to find out how many tests unexpectedly succeeded (TODO tests
which passed when they shouldn't):
my $count = $aggregate->todo_passed;
my @descriptions = $aggregate->todo_passed;
Note that C<wait> and C<exit> are the totals of the wait and exit
statuses of each of the tests. These values are totalled only to provide
a true value if any of them are non-zero.
=cut
##############################################################################
=head3 C<total>
my $tests_run = $aggregate->total;
Returns the total number of tests run.
=cut
sub total { shift->{total} }
##############################################################################
=head3 C<has_problems>
if ( $parser->has_problems ) {
...
}
Identical to C<has_errors>, but also returns true if any TODO tests
unexpectedly succeeded. This is more akin to "warnings".
=cut
sub has_problems {
my $self = shift;
return $self->todo_passed
|| $self->has_errors;
}
##############################################################################
=head3 C<has_errors>
if ( $parser->has_errors ) {
...
}
Returns true if I<any> of the parsers failed. This includes:
=over 4
=item * Failed tests
=item * Parse errors
=item * Bad exit or wait status
=back
=cut
sub has_errors {
my $self = shift;
return
$self->failed
|| $self->parse_errors
|| $self->exit
|| $self->wait;
}
##############################################################################
=head3 C<todo_failed>
# deprecated in favor of 'todo_passed'. This method was horribly misnamed.
This was a badly misnamed method. It indicates which TODO tests unexpectedly
succeeded. Will now issue a warning and call C<todo_passed>.
=cut
sub todo_failed {
warn
'"todo_failed" is deprecated. Please use "todo_passed". See the docs.';
goto &todo_passed;
}
=head1 See Also
L<TAP::Parser>
L<TAP::Harness>
=cut
1;
package TAP::Parser::Iterator;
use strict;
use vars qw($VERSION @ISA);
use TAP::Object ();
@ISA = qw(TAP::Object);
=head1 NAME
TAP::Parser::Iterator - Internal base class for TAP::Parser Iterators
=head1 VERSION
Version 3.17
=cut
$VERSION = '3.17';
=head1 SYNOPSIS
# see TAP::Parser::IteratorFactory for general usage
# to subclass:
use vars qw(@ISA);
use TAP::Parser::Iterator ();
@ISA = qw(TAP::Parser::Iterator);
sub _initialize {
# see TAP::Object...
}
=head1 DESCRIPTION
This is a simple iterator base class that defines L<TAP::Parser>'s iterator
API. See C<TAP::Parser::IteratorFactory> for the preferred way of creating
iterators.
=head1 METHODS
=head2 Class Methods
=head3 C<new>
Create an iterator. Provided by L<TAP::Object>.
=head2 Instance Methods
=head3 C<next>
while ( my $item = $iter->next ) { ... }
Iterate through it, of course.
=head3 C<next_raw>
B<Note:> this method is abstract and should be overridden.
while ( my $item = $iter->next_raw ) { ... }
Iterate raw input without applying any fixes for quirky input syntax.
=cut
sub next {
my $self = shift;
my $line = $self->next_raw;
# vms nit: When encountering 'not ok', vms often has the 'not' on a line
# by itself:
# not
# ok 1 - 'I hate VMS'
if ( defined($line) and $line =~ /^\s*not\s*$/ ) {
$line .= ( $self->next_raw || '' );
}
return $line;
}
sub next_raw {
require Carp;
my $msg = Carp::longmess('abstract method called directly!');
$_[0]->_croak($msg);
}
=head3 C<handle_unicode>
If necessary switch the input stream to handle unicode. This only has
any effect for I/O handle based streams.
The default implementation does nothing.
=cut
sub handle_unicode { }
=head3 C<get_select_handles>
Return a list of filehandles that may be used upstream in a select()
call to signal that this Iterator is ready. Iterators that are not
handle-based should return an empty list.
The default implementation does nothing.
=cut
sub get_select_handles {
return;
}
=head3 C<wait>
B<Note:> this method is abstract and should be overridden.
my $wait_status = $iter->wait;
Return the C<wait> status for this iterator.
=head3 C<exit>
B<Note:> this method is abstract and should be overridden.
my $wait_status = $iter->exit;
Return the C<exit> status for this iterator.
=cut
sub wait {
require Carp;
my $msg = Carp::longmess('abstract method called directly!');
$_[0]->_croak($msg);
}
sub exit {
require Carp;
my $msg = Carp::longmess('abstract method called directly!');
$_[0]->_croak($msg);
}
1;
=head1 SUBCLASSING
Please see L<TAP::Parser/SUBCLASSING> for a subclassing overview.
You must override the abstract methods as noted above.
=head2 Example
L<TAP::Parser::Iterator::Array> is probably the easiest example to follow.
There's not much point repeating it here.
=head1 SEE ALSO
L<TAP::Object>,
L<TAP::Parser>,
L<TAP::Parser::IteratorFactory>,
L<TAP::Parser::Iterator::Array>,
L<TAP::Parser::Iterator::Stream>,
L<TAP::Parser::Iterator::Process>,
=cut
package TAP::Parser::IteratorFactory;
use strict;
use vars qw($VERSION @ISA);
use TAP::Object ();
use TAP::Parser::Iterator::Array ();
use TAP::Parser::Iterator::Stream ();
use TAP::Parser::Iterator::Process ();
@ISA = qw(TAP::Object);
=head1 NAME
TAP::Parser::IteratorFactory - Internal TAP::Parser Iterator
=head1 VERSION
Version 3.17
=cut
$VERSION = '3.17';
=head1 SYNOPSIS
use TAP::Parser::IteratorFactory;
my $factory = TAP::Parser::IteratorFactory->new;
my $iter = $factory->make_iterator(\*TEST);
my $iter = $factory->make_iterator(\@array);
my $iter = $factory->make_iterator(\%hash);
my $line = $iter->next;
=head1 DESCRIPTION
This is a factory class for simple iterator wrappers for arrays, filehandles,
and hashes. Unless you're subclassing, you probably won't need to use this
module directly.
=head1 METHODS
=head2 Class Methods
=head3 C<new>
Creates a new factory class.
I<Note:> You currently don't need to instantiate a factory in order to use it.
=head3 C<make_iterator>
Create an iterator. The type of iterator created depends on the arguments to
the constructor:
my $iter = TAP::Parser::Iterator->make_iterator( $filehandle );
Creates a I<stream> iterator (see L</make_stream_iterator>).
my $iter = TAP::Parser::Iterator->make_iterator( $array_reference );
Creates an I<array> iterator (see L</make_array_iterator>).
my $iter = TAP::Parser::Iterator->make_iterator( $hash_reference );
Creates a I<process> iterator (see L</make_process_iterator>).
=cut
sub make_iterator {
my ( $proto, $thing ) = @_;
my $ref = ref $thing;
if ( $ref eq 'GLOB' || $ref eq 'IO::Handle' ) {
return $proto->make_stream_iterator($thing);
}
elsif ( $ref eq 'ARRAY' ) {
return $proto->make_array_iterator($thing);
}
elsif ( $ref eq 'HASH' ) {
return $proto->make_process_iterator($thing);
}
else {
die "Can't iterate with a $ref";
}
}
=head3 C<make_stream_iterator>
Make a new stream iterator and return it. Passes through any arguments given.
Defaults to a L<TAP::Parser::Iterator::Stream>.
=head3 C<make_array_iterator>
Make a new array iterator and return it. Passes through any arguments given.
Defaults to a L<TAP::Parser::Iterator::Array>.
=head3 C<make_process_iterator>
Make a new process iterator and return it. Passes through any arguments given.
Defaults to a L<TAP::Parser::Iterator::Process>.
=cut
sub make_stream_iterator {
my $proto = shift;
TAP::Parser::Iterator::Stream->new(@_);
}
sub make_array_iterator {
my $proto = shift;
TAP::Parser::Iterator::Array->new(@_);
}
sub make_process_iterator {
my $proto = shift;
TAP::Parser::Iterator::Process->new(@_);
}
1;
=head1 SUBCLASSING
Please see L<TAP::Parser/SUBCLASSING> for a subclassing overview.
There are a few things to bear in mind when creating your own
C<ResultFactory>:
=over 4
=item 1
The factory itself is never instantiated (this I<may> change in the future).
This means that C<_initialize> is never called.
=back
=head2 Example
package MyIteratorFactory;
use strict;
use vars '@ISA';
use MyStreamIterator;
use TAP::Parser::IteratorFactory;
@ISA = qw( TAP::Parser::IteratorFactory );
# override stream iterator
sub make_stream_iterator {
my $proto = shift;
MyStreamIterator->new(@_);
}
1;
=head1 ATTRIBUTION
Originally ripped off from L<Test::Harness>.
=head1 SEE ALSO
L<TAP::Object>,
L<TAP::Parser>,
L<TAP::Parser::Iterator>,
L<TAP::Parser::Iterator::Array>,
L<TAP::Parser::Iterator::Stream>,
L<TAP::Parser::Iterator::Process>,
=cut
package TAP::Parser::Grammar;
use strict;
use vars qw($VERSION @ISA);
use TAP::Object ();
use TAP::Parser::ResultFactory ();
use TAP::Parser::YAMLish::Reader ();
@ISA = qw(TAP::Object);
=head1 NAME
TAP::Parser::Grammar - A grammar for the Test Anything Protocol.
=head1 VERSION
Version 3.17
=cut
$VERSION = '3.17';
=head1 SYNOPSIS
use TAP::Parser::Grammar;
my $grammar = $self->make_grammar({
stream => $tap_parser_stream,
parser => $tap_parser,
version => 12,
});
my $result = $grammar->tokenize;
=head1 DESCRIPTION
C<TAP::Parser::Grammar> tokenizes lines from a TAP stream and constructs
L<TAP::Parser::Result> subclasses to represent the tokens.
Do not attempt to use this class directly. It won't make sense. It's mainly
here to ensure that we will be able to have pluggable grammars when TAP is
expanded at some future date (plus, this stuff was really cluttering the
parser).
=head1 METHODS
=head2 Class Methods
=head3 C<new>
my $grammar = TAP::Parser::Grammar->new({
stream => $stream,
parser => $parser,
version => $version,
});
Returns L<TAP::Parser> grammar object that will parse the specified stream.
Both C<stream> and C<parser> are required arguments. If C<version> is not set
it defaults to C<12> (see L</set_version> for more details).
=cut
# new() implementation supplied by TAP::Object
sub _initialize {
my ( $self, $args ) = @_;
$self->{stream} = $args->{stream}; # TODO: accessor
$self->{parser} = $args->{parser}; # TODO: accessor
$self->set_version( $args->{version} || 12 );
return $self;
}
my %language_for;
{
# XXX the 'not' and 'ok' might be on separate lines in VMS ...
my $ok = qr/(?:not )?ok\b/;
my $num = qr/\d+/;
my %v12 = (
version => {
syntax => qr/^TAP\s+version\s+(\d+)\s*\z/i,
handler => sub {
my ( $self, $line ) = @_;
my $version = $1;
return $self->_make_version_token( $line, $version, );
},
},
plan => {
syntax => qr/^1\.\.(\d+)\s*(.*)\z/,
handler => sub {
my ( $self, $line ) = @_;
my ( $tests_planned, $tail ) = ( $1, $2 );
my $explanation = undef;
my $skip = '';
if ( $tail =~ /^todo((?:\s+\d+)+)/ ) {
my @todo = split /\s+/, _trim($1);
return $self->_make_plan_token(
$line, $tests_planned, 'TODO',
'', \@todo
);
}
elsif ( 0 == $tests_planned ) {
$skip = 'SKIP';
# If we can't match # SKIP the directive should be undef.
($explanation) = $tail =~ /^#\s*SKIP\S*\s+(.*)/i;
}
elsif ( $tail !~ /^\s*$/ ) {
return $self->_make_unknown_token($line);
}
$explanation = '' unless defined $explanation;
return $self->_make_plan_token(
$line, $tests_planned, $skip,
$explanation, []
);
},
},
# An optimization to handle the most common test lines without
# directives.
simple_test => {
syntax => qr/^($ok) \ ($num) (?:\ ([^#]+))? \z/x,
handler => sub {
my ( $self, $line ) = @_;
my ( $ok, $num, $desc ) = ( $1, $2, $3 );
return $self->_make_test_token(
$line, $ok, $num,
$desc
);
},
},
test => {
syntax => qr/^($ok) \s* ($num)? \s* (.*) \z/x,
handler => sub {
my ( $self, $line ) = @_;
my ( $ok, $num, $desc ) = ( $1, $2, $3 );
my ( $dir, $explanation ) = ( '', '' );
if ($desc =~ m/^ ( [^\\\#]* (?: \\. [^\\\#]* )* )
\# \s* (SKIP|TODO) \b \s* (.*) $/ix
)
{
( $desc, $dir, $explanation ) = ( $1, $2, $3 );
}
return $self->_make_test_token(
$line, $ok, $num, $desc,
$dir, $explanation
);
},
},
comment => {
syntax => qr/^#(.*)/,
handler => sub {
my ( $self, $line ) = @_;
my $comment = $1;
return $self->_make_comment_token( $line, $comment );
},
},
bailout => {
syntax => qr/^Bail out!\s*(.*)/,
handler => sub {
my ( $self, $line ) = @_;
my $explanation = $1;
return $self->_make_bailout_token(
$line,
$explanation
);
},
},
);
my %v13 = (
%v12,
plan => {
syntax => qr/^1\.\.(\d+)(?:\s*#\s*SKIP\b(.*))?\z/i,
handler => sub {
my ( $self, $line ) = @_;
my ( $tests_planned, $explanation ) = ( $1, $2 );
my $skip
= ( 0 == $tests_planned || defined $explanation )
? 'SKIP'
: '';
$explanation = '' unless defined $explanation;
return $self->_make_plan_token(
$line, $tests_planned, $skip,
$explanation, []
);
},
},
yaml => {
syntax => qr/^ (\s+) (---.*) $/x,
handler => sub {
my ( $self, $line ) = @_;
my ( $pad, $marker ) = ( $1, $2 );
return $self->_make_yaml_token( $pad, $marker );
},
},
pragma => {
syntax =>
qr/^ pragma \s+ ( [-+] \w+ \s* (?: , \s* [-+] \w+ \s* )* ) $/x,
handler => sub {
my ( $self, $line ) = @_;
my $pragmas = $1;
return $self->_make_pragma_token( $line, $pragmas );
},
},
);
%language_for = (
'12' => {
tokens => \%v12,
},
'13' => {
tokens => \%v13,
setup => sub {
shift->{stream}->handle_unicode;
},
},
);
}
##############################################################################
=head2 Instance Methods
=head3 C<set_version>
$grammar->set_version(13);
Tell the grammar which TAP syntax version to support. The lowest
supported version is 12. Although 'TAP version' isn't valid version 12
syntax it is accepted so that higher version numbers may be parsed.
=cut
sub set_version {
my $self = shift;
my $version = shift;
if ( my $language = $language_for{$version} ) {
$self->{version} = $version;
$self->{tokens} = $language->{tokens};
if ( my $setup = $language->{setup} ) {
$self->$setup();
}
$self->_order_tokens;
}
else {
require Carp;
Carp::croak("Unsupported syntax version: $version");
}
}
# Optimization to put the most frequent tokens first.
sub _order_tokens {
my $self = shift;
my %copy = %{ $self->{tokens} };
my @ordered_tokens = grep {defined}
map { delete $copy{$_} } qw( simple_test test comment plan );
push @ordered_tokens, values %copy;
$self->{ordered_tokens} = \@ordered_tokens;
}
##############################################################################
=head3 C<tokenize>
my $token = $grammar->tokenize;
This method will return a L<TAP::Parser::Result> object representing the
current line of TAP.
=cut
sub tokenize {
my $self = shift;
my $line = $self->{stream}->next;
unless ( defined $line ) {
delete $self->{parser}; # break circular ref
return;
}
my $token;
foreach my $token_data ( @{ $self->{ordered_tokens} } ) {
if ( $line =~ $token_data->{syntax} ) {
my $handler = $token_data->{handler};
$token = $self->$handler($line);
last;
}
}
$token = $self->_make_unknown_token($line) unless $token;
return $self->{parser}->make_result($token);
}
##############################################################################
=head3 C<token_types>
my @types = $grammar->token_types;
Returns the different types of tokens which this grammar can parse.
=cut
sub token_types {
my $self = shift;
return keys %{ $self->{tokens} };
}
##############################################################################
=head3 C<syntax_for>
my $syntax = $grammar->syntax_for($token_type);
Returns a pre-compiled regular expression which will match a chunk of TAP
corresponding to the token type. For example (not that you should really pay
attention to this, C<< $grammar->syntax_for('comment') >> will return
C<< qr/^#(.*)/ >>.
=cut
sub syntax_for {
my ( $self, $type ) = @_;
return $self->{tokens}->{$type}->{syntax};
}
##############################################################################
=head3 C<handler_for>
my $handler = $grammar->handler_for($token_type);
Returns a code reference which, when passed an appropriate line of TAP,
returns the lexed token corresponding to that line. As a result, the basic
TAP parsing loop looks similar to the following:
my @tokens;
my $grammar = TAP::Grammar->new;
LINE: while ( defined( my $line = $parser->_next_chunk_of_tap ) ) {
foreach my $type ( $grammar->token_types ) {
my $syntax = $grammar->syntax_for($type);
if ( $line =~ $syntax ) {
my $handler = $grammar->handler_for($type);
push @tokens => $grammar->$handler($line);
next LINE;
}
}
push @tokens => $grammar->_make_unknown_token($line);
}
=cut
sub handler_for {
my ( $self, $type ) = @_;
return $self->{tokens}->{$type}->{handler};
}
sub _make_version_token {
my ( $self, $line, $version ) = @_;
return {
type => 'version',
raw => $line,
version => $version,
};
}
sub _make_plan_token {
my ( $self, $line, $tests_planned, $directive, $explanation, $todo ) = @_;
if ( $directive eq 'SKIP'
&& 0 != $tests_planned
&& $self->{version} < 13 )
{
warn
"Specified SKIP directive in plan but more than 0 tests ($line)\n";
}
return {
type => 'plan',
raw => $line,
tests_planned => $tests_planned,
directive => $directive,
explanation => _trim($explanation),
todo_list => $todo,
};
}
sub _make_test_token {
my ( $self, $line, $ok, $num, $desc, $dir, $explanation ) = @_;
return {
ok => $ok,
test_num => $num,
description => _trim($desc),
directive => ( defined $dir ? uc $dir : '' ),
explanation => _trim($explanation),
raw => $line,
type => 'test',
};
}
sub _make_unknown_token {
my ( $self, $line ) = @_;
return {
raw => $line,
type => 'unknown',
};
}
sub _make_comment_token {
my ( $self, $line, $comment ) = @_;
return {
type => 'comment',
raw => $line,
comment => _trim($comment)
};
}
sub _make_bailout_token {
my ( $self, $line, $explanation ) = @_;
return {
type => 'bailout',
raw => $line,
bailout => _trim($explanation)
};
}
sub _make_yaml_token {
my ( $self, $pad, $marker ) = @_;
my $yaml = TAP::Parser::YAMLish::Reader->new;
my $stream = $self->{stream};
# Construct a reader that reads from our input stripping leading
# spaces from each line.
my $leader = length($pad);
my $strip = qr{ ^ (\s{$leader}) (.*) $ }x;
my @extra = ($marker);
my $reader = sub {
return shift @extra if @extra;
my $line = $stream->next;
return $2 if $line =~ $strip;
return;
};
my $data = $yaml->read($reader);
# Reconstitute input. This is convoluted. Maybe we should just
# record it on the way in...
chomp( my $raw = $yaml->get_raw );
$raw =~ s/^/$pad/mg;
return {
type => 'yaml',
raw => $raw,
data => $data
};
}
sub _make_pragma_token {
my ( $self, $line, $pragmas ) = @_;
return {
type => 'pragma',
raw => $line,
pragmas => [ split /\s*,\s*/, _trim($pragmas) ],
};
}
sub _trim {
my $data = shift;
return '' unless defined $data;
$data =~ s/^\s+//;
$data =~ s/\s+$//;
return $data;
}
1;
=head1 TAP GRAMMAR
B<NOTE:> This grammar is slightly out of date. There's still some discussion
about it and a new one will be provided when we have things better defined.
The L<TAP::Parser> does not use a formal grammar because TAP is essentially a
stream-based protocol. In fact, it's quite legal to have an infinite stream.
For the same reason that we don't apply regexes to streams, we're not using a
formal grammar here. Instead, we parse the TAP in lines.
For purposes for forward compatability, any result which does not match the
following grammar is currently referred to as
L<TAP::Parser::Result::Unknown>. It is I<not> a parse error.
A formal grammar would look similar to the following:
(*
For the time being, I'm cheating on the EBNF by allowing
certain terms to be defined by POSIX character classes by
using the following syntax:
digit ::= [:digit:]
As far as I am aware, that's not valid EBNF. Sue me. I
didn't know how to write "char" otherwise (Unicode issues).
Suggestions welcome.
*)
tap ::= version? { comment | unknown } leading_plan lines
|
lines trailing_plan {comment}
version ::= 'TAP version ' positiveInteger {positiveInteger} "\n"
leading_plan ::= plan skip_directive? "\n"
trailing_plan ::= plan "\n"
plan ::= '1..' nonNegativeInteger
lines ::= line {line}
line ::= (comment | test | unknown | bailout ) "\n"
test ::= status positiveInteger? description? directive?
status ::= 'not '? 'ok '
description ::= (character - (digit | '#')) {character - '#'}
directive ::= todo_directive | skip_directive
todo_directive ::= hash_mark 'TODO' ' ' {character}
skip_directive ::= hash_mark 'SKIP' ' ' {character}
comment ::= hash_mark {character}
hash_mark ::= '#' {' '}
bailout ::= 'Bail out!' {character}
unknown ::= { (character - "\n") }
(* POSIX character classes and other terminals *)
digit ::= [:digit:]
character ::= ([:print:] - "\n")
positiveInteger ::= ( digit - '0' ) {digit}
nonNegativeInteger ::= digit {digit}
=head1 SUBCLASSING
Please see L<TAP::Parser/SUBCLASSING> for a subclassing overview.
If you I<really> want to subclass L<TAP::Parser>'s grammar the best thing to
do is read through the code. There's no easy way of summarizing it here.
=head1 SEE ALSO
L<TAP::Object>,
L<TAP::Parser>,
L<TAP::Parser::Iterator>,
L<TAP::Parser::Result>,
=cut
package TAP::Parser::Multiplexer;
use strict;
use vars qw($VERSION @ISA);
use IO::Select;
use TAP::Object ();
use constant IS_WIN32 => $^O =~ /^(MS)?Win32$/;
use constant IS_VMS => $^O eq 'VMS';
use constant SELECT_OK => !( IS_VMS || IS_WIN32 );
@ISA = 'TAP::Object';
=head1 NAME
TAP::Parser::Multiplexer - Multiplex multiple TAP::Parsers
=head1 VERSION
Version 3.17
=cut
$VERSION = '3.17';
=head1 SYNOPSIS
use TAP::Parser::Multiplexer;
my $mux = TAP::Parser::Multiplexer->new;
$mux->add( $parser1, $stash1 );
$mux->add( $parser2, $stash2 );
while ( my ( $parser, $stash, $result ) = $mux->next ) {
# do stuff
}
=head1 DESCRIPTION
C<TAP::Parser::Multiplexer> gathers input from multiple TAP::Parsers.
Internally it calls select on the input file handles for those parsers
to wait for one or more of them to have input available.
See L<TAP::Harness> for an example of its use.
=head1 METHODS
=head2 Class Methods
=head3 C<new>
my $mux = TAP::Parser::Multiplexer->new;
Returns a new C<TAP::Parser::Multiplexer> object.
=cut
# new() implementation supplied by TAP::Object
sub _initialize {
my $self = shift;
$self->{select} = IO::Select->new;
$self->{avid} = []; # Parsers that can't select
$self->{count} = 0;
return $self;
}
##############################################################################
=head2 Instance Methods
=head3 C<add>
$mux->add( $parser, $stash );
Add a TAP::Parser to the multiplexer. C<$stash> is an optional opaque
reference that will be returned from C<next> along with the parser and
the next result.
=cut
sub add {
my ( $self, $parser, $stash ) = @_;
if ( SELECT_OK && ( my @handles = $parser->get_select_handles ) ) {
my $sel = $self->{select};
# We have to turn handles into file numbers here because by
# the time we want to remove them from our IO::Select they
# will already have been closed by the iterator.
my @filenos = map { fileno $_ } @handles;
for my $h (@handles) {
$sel->add( [ $h, $parser, $stash, @filenos ] );
}
$self->{count}++;
}
else {
push @{ $self->{avid} }, [ $parser, $stash ];
}
}
=head3 C<parsers>
my $count = $mux->parsers;
Returns the number of parsers. Parsers are removed from the multiplexer
when their input is exhausted.
=cut
sub parsers {
my $self = shift;
return $self->{count} + scalar @{ $self->{avid} };
}
sub _iter {
my $self = shift;
my $sel = $self->{select};
my $avid = $self->{avid};
my @ready = ();
return sub {
# Drain all the non-selectable parsers first
if (@$avid) {
my ( $parser, $stash ) = @{ $avid->[0] };
my $result = $parser->next;
shift @$avid unless defined $result;
return ( $parser, $stash, $result );
}
unless (@ready) {
return unless $sel->count;
@ready = $sel->can_read;
}
my ( $h, $parser, $stash, @handles ) = @{ shift @ready };
my $result = $parser->next;
unless ( defined $result ) {
$sel->remove(@handles);
$self->{count}--;
# Force another can_read - we may now have removed a handle
# thought to have been ready.
@ready = ();
}
return ( $parser, $stash, $result );
};
}
=head3 C<next>
Return a result from the next available parser. Returns a list
containing the parser from which the result came, the stash that
corresponds with that parser and the result.
my ( $parser, $stash, $result ) = $mux->next;
If C<$result> is undefined the corresponding parser has reached the end
of its input (and will automatically be removed from the multiplexer).
When all parsers are exhausted an empty list will be returned.
if ( my ( $parser, $stash, $result ) = $mux->next ) {
if ( ! defined $result ) {
# End of this parser
}
else {
# Process result
}
}
else {
# All parsers finished
}
=cut
sub next {
my $self = shift;
return ( $self->{_iter} ||= $self->_iter )->();
}
=head1 See Also
L<TAP::Parser>
L<TAP::Harness>
=cut
1;
package TAP::Parser::Result;
use strict;
use vars qw($VERSION @ISA);
use TAP::Object ();
@ISA = 'TAP::Object';
BEGIN {
# make is_* methods
my @attrs = qw( plan pragma test comment bailout version unknown yaml );
no strict 'refs';
for my $token (@attrs) {
my $method = "is_$token";
*$method = sub { return $token eq shift->type };
}
}
##############################################################################
=head1 NAME
TAP::Parser::Result - Base class for TAP::Parser output objects
=head1 VERSION
Version 3.17
=cut
$VERSION = '3.17';
=head1 SYNOPSIS
# abstract class - not meany to be used directly
# see TAP::Parser::ResultFactory for preferred usage
# directly:
use TAP::Parser::Result;
my $token = {...};
my $result = TAP::Parser::Result->new( $token );
=head2 DESCRIPTION
This is a simple base class used by L<TAP::Parser> to store objects that
represent the current bit of test output data from TAP (usually a single
line). Unless you're subclassing, you probably won't need to use this module
directly.
=head2 METHODS
=head3 C<new>
# see TAP::Parser::ResultFactory for preferred usage
# to use directly:
my $result = TAP::Parser::Result->new($token);
Returns an instance the appropriate class for the test token passed in.
=cut
# new() implementation provided by TAP::Object
sub _initialize {
my ( $self, $token ) = @_;
if ($token) {
# assign to a hash slice to make a shallow copy of the token.
# I guess we could assign to the hash as (by default) there are not
# contents, but that seems less helpful if someone wants to subclass us
@{$self}{ keys %$token } = values %$token;
}
return $self;
}
##############################################################################
=head2 Boolean methods
The following methods all return a boolean value and are to be overridden in
the appropriate subclass.
=over 4
=item * C<is_plan>
Indicates whether or not this is the test plan line.
1..3
=item * C<is_pragma>
Indicates whether or not this is a pragma line.
pragma +strict
=item * C<is_test>
Indicates whether or not this is a test line.
ok 1 Is OK!
=item * C<is_comment>
Indicates whether or not this is a comment.
# this is a comment
=item * C<is_bailout>
Indicates whether or not this is bailout line.
Bail out! We're out of dilithium crystals.
=item * C<is_version>
Indicates whether or not this is a TAP version line.
TAP version 4
=item * C<is_unknown>
Indicates whether or not the current line could be parsed.
... this line is junk ...
=item * C<is_yaml>
Indicates whether or not this is a YAML chunk.
=back
=cut
##############################################################################
=head3 C<raw>
print $result->raw;
Returns the original line of text which was parsed.
=cut
sub raw { shift->{raw} }
##############################################################################
=head3 C<type>
my $type = $result->type;
Returns the "type" of a token, such as C<comment> or C<test>.
=cut
sub type { shift->{type} }
##############################################################################
=head3 C<as_string>
print $result->as_string;
Prints a string representation of the token. This might not be the exact
output, however. Tests will have test numbers added if not present, TODO and
SKIP directives will be capitalized and, in general, things will be cleaned
up. If you need the original text for the token, see the C<raw> method.
=cut
sub as_string { shift->{raw} }
##############################################################################
=head3 C<is_ok>
if ( $result->is_ok ) { ... }
Reports whether or not a given result has passed. Anything which is B<not> a
test result returns true. This is merely provided as a convenient shortcut.
=cut
sub is_ok {1}
##############################################################################
=head3 C<passed>
Deprecated. Please use C<is_ok> instead.
=cut
sub passed {
warn 'passed() is deprecated. Please use "is_ok()"';
shift->is_ok;
}
##############################################################################
=head3 C<has_directive>
if ( $result->has_directive ) {
...
}
Indicates whether or not the given result has a TODO or SKIP directive.
=cut
sub has_directive {
my $self = shift;
return ( $self->has_todo || $self->has_skip );
}
##############################################################################
=head3 C<has_todo>
if ( $result->has_todo ) {
...
}
Indicates whether or not the given result has a TODO directive.
=cut
sub has_todo { 'TODO' eq ( shift->{directive} || '' ) }
##############################################################################
=head3 C<has_skip>
if ( $result->has_skip ) {
...
}
Indicates whether or not the given result has a SKIP directive.
=cut
sub has_skip { 'SKIP' eq ( shift->{directive} || '' ) }
=head3 C<set_directive>
Set the directive associated with this token. Used internally to fake
TODO tests.
=cut
sub set_directive {
my ( $self, $dir ) = @_;
$self->{directive} = $dir;
}
1;
=head1 SUBCLASSING
Please see L<TAP::Parser/SUBCLASSING> for a subclassing overview.
Remember: if you want your subclass to be automatically used by the parser,
you'll have to register it with L<TAP::Parser::ResultFactory/register_type>.
If you're creating a completely new result I<type>, you'll probably need to
subclass L<TAP::Parser::Grammar> too, or else it'll never get used.
=head2 Example
package MyResult;
use strict;
use vars '@ISA';
@ISA = 'TAP::Parser::Result';
# register with the factory:
TAP::Parser::ResultFactory->register_type( 'my_type' => __PACKAGE__ );
sub as_string { 'My results all look the same' }
=head1 SEE ALSO
L<TAP::Object>,
L<TAP::Parser>,
L<TAP::Parser::ResultFactory>,
L<TAP::Parser::Result::Bailout>,
L<TAP::Parser::Result::Comment>,
L<TAP::Parser::Result::Plan>,
L<TAP::Parser::Result::Pragma>,
L<TAP::Parser::Result::Test>,
L<TAP::Parser::Result::Unknown>,
L<TAP::Parser::Result::Version>,
L<TAP::Parser::Result::YAML>,
=cut
package TAP::Parser::ResultFactory;
use strict;
use vars qw($VERSION @ISA %CLASS_FOR);
use TAP::Object ();
use TAP::Parser::Result::Bailout ();
use TAP::Parser::Result::Comment ();
use TAP::Parser::Result::Plan ();
use TAP::Parser::Result::Pragma ();
use TAP::Parser::Result::Test ();
use TAP::Parser::Result::Unknown ();
use TAP::Parser::Result::Version ();
use TAP::Parser::Result::YAML ();
@ISA = 'TAP::Object';
##############################################################################
=head1 NAME
TAP::Parser::ResultFactory - Factory for creating TAP::Parser output objects
=head1 SYNOPSIS
use TAP::Parser::ResultFactory;
my $token = {...};
my $factory = TAP::Parser::ResultFactory->new;
my $result = $factory->make_result( $token );
=head1 VERSION
Version 3.17
=cut
$VERSION = '3.17';
=head2 DESCRIPTION
This is a simple factory class which returns a L<TAP::Parser::Result> subclass
representing the current bit of test data from TAP (usually a single line).
It is used primarily by L<TAP::Parser::Grammar>. Unless you're subclassing,
you probably won't need to use this module directly.
=head2 METHODS
=head2 Class Methods
=head3 C<new>
Creates a new factory class.
I<Note:> You currently don't need to instantiate a factory in order to use it.
=head3 C<make_result>
Returns an instance the appropriate class for the test token passed in.
my $result = TAP::Parser::ResultFactory->make_result($token);
Can also be called as an instance method.
=cut
sub make_result {
my ( $proto, $token ) = @_;
my $type = $token->{type};
return $proto->class_for($type)->new($token);
}
=head3 C<class_for>
Takes one argument: C<$type>. Returns the class for this $type, or C<croak>s
with an error.
=head3 C<register_type>
Takes two arguments: C<$type>, C<$class>
This lets you override an existing type with your own custom type, or register
a completely new type, eg:
# create a custom result type:
package MyResult;
use strict;
use vars qw(@ISA);
@ISA = 'TAP::Parser::Result';
# register with the factory:
TAP::Parser::ResultFactory->register_type( 'my_type' => __PACKAGE__ );
# use it:
my $r = TAP::Parser::ResultFactory->( { type => 'my_type' } );
Your custom type should then be picked up automatically by the L<TAP::Parser>.
=cut
BEGIN {
%CLASS_FOR = (
plan => 'TAP::Parser::Result::Plan',
pragma => 'TAP::Parser::Result::Pragma',
test => 'TAP::Parser::Result::Test',
comment => 'TAP::Parser::Result::Comment',
bailout => 'TAP::Parser::Result::Bailout',
version => 'TAP::Parser::Result::Version',
unknown => 'TAP::Parser::Result::Unknown',
yaml => 'TAP::Parser::Result::YAML',
);
}
sub class_for {
my ( $class, $type ) = @_;
# return target class:
return $CLASS_FOR{$type} if exists $CLASS_FOR{$type};
# or complain:
require Carp;
Carp::croak("Could not determine class for result type '$type'");
}
sub register_type {
my ( $class, $type, $rclass ) = @_;
# register it blindly, assume they know what they're doing
$CLASS_FOR{$type} = $rclass;
return $class;
}
1;
=head1 SUBCLASSING
Please see L<TAP::Parser/SUBCLASSING> for a subclassing overview.
There are a few things to bear in mind when creating your own
C<ResultFactory>:
=over 4
=item 1
The factory itself is never instantiated (this I<may> change in the future).
This means that C<_initialize> is never called.
=item 2
C<TAP::Parser::Result-E<gt>new> is never called, $tokens are reblessed.
This I<will> change in a future version!
=item 3
L<TAP::Parser::Result> subclasses will register themselves with
L<TAP::Parser::ResultFactory> directly:
package MyFooResult;
TAP::Parser::ResultFactory->register_type( foo => __PACKAGE__ );
Of course, it's up to you to decide whether or not to ignore them.
=back
=head2 Example
package MyResultFactory;
use strict;
use vars '@ISA';
use MyResult;
use TAP::Parser::ResultFactory;
@ISA = qw( TAP::Parser::ResultFactory );
# force all results to be 'MyResult'
sub class_for {
return 'MyResult';
}
1;
=head1 SEE ALSO
L<TAP::Parser>,
L<TAP::Parser::Result>,
L<TAP::Parser::Grammar>
=cut
package TAP::Parser::Scheduler;
use strict;
use vars qw($VERSION);
use Carp;
use TAP::Parser::Scheduler::Job;
use TAP::Parser::Scheduler::Spinner;
=head1 NAME
TAP::Parser::Scheduler - Schedule tests during parallel testing
=head1 VERSION
Version 3.17
=cut
$VERSION = '3.17';
=head1 SYNOPSIS
use TAP::Parser::Scheduler;
=head1 DESCRIPTION
=head1 METHODS
=head2 Class Methods
=head3 C<new>
my $sched = TAP::Parser::Scheduler->new;
Returns a new C<TAP::Parser::Scheduler> object.
=cut
sub new {
my $class = shift;
croak "Need a number of key, value pairs" if @_ % 2;
my %args = @_;
my $tests = delete $args{tests} || croak "Need a 'tests' argument";
my $rules = delete $args{rules} || { par => '**' };
croak "Unknown arg(s): ", join ', ', sort keys %args
if keys %args;
# Turn any simple names into a name, description pair. TODO: Maybe
# construct jobs here?
my $self = bless {}, $class;
$self->_set_rules( $rules, $tests );
return $self;
}
# Build the scheduler data structure.
#
# SCHEDULER-DATA ::= JOB
# || ARRAY OF ARRAY OF SCHEDULER-DATA
#
# The nested arrays are the key to scheduling. The outer array contains
# a list of things that may be executed in parallel. Whenever an
# eligible job is sought any element of the outer array that is ready to
# execute can be selected. The inner arrays represent sequential
# execution. They can only proceed when the first job is ready to run.
sub _set_rules {
my ( $self, $rules, $tests ) = @_;
my @tests = map { TAP::Parser::Scheduler::Job->new(@$_) }
map { 'ARRAY' eq ref $_ ? $_ : [ $_, $_ ] } @$tests;
my $schedule = $self->_rule_clause( $rules, \@tests );
# If any tests are left add them as a sequential block at the end of
# the run.
$schedule = [ [ $schedule, @tests ] ] if @tests;
$self->{schedule} = $schedule;
}
sub _rule_clause {
my ( $self, $rule, $tests ) = @_;
croak 'Rule clause must be a hash'
unless 'HASH' eq ref $rule;
my @type = keys %$rule;
croak 'Rule clause must have exactly one key'
unless @type == 1;
my %handlers = (
par => sub {
[ map { [$_] } @_ ];
},
seq => sub { [ [@_] ] },
);
my $handler = $handlers{ $type[0] }
|| croak 'Unknown scheduler type: ', $type[0];
my $val = $rule->{ $type[0] };
return $handler->(
map {
'HASH' eq ref $_
? $self->_rule_clause( $_, $tests )
: $self->_expand( $_, $tests )
} 'ARRAY' eq ref $val ? @$val : $val
);
}
sub _glob_to_regexp {
my ( $self, $glob ) = @_;
my $nesting;
my $pattern;
while (1) {
if ( $glob =~ /\G\*\*/gc ) {
# ** is any number of characters, including /, within a pathname
$pattern .= '.*?';
}
elsif ( $glob =~ /\G\*/gc ) {
# * is zero or more characters within a filename/directory name
$pattern .= '[^/]*';
}
elsif ( $glob =~ /\G\?/gc ) {
# ? is exactly one character within a filename/directory name
$pattern .= '[^/]';
}
elsif ( $glob =~ /\G\{/gc ) {
# {foo,bar,baz} is any of foo, bar or baz.
$pattern .= '(?:';
++$nesting;
}
elsif ( $nesting and $glob =~ /\G,/gc ) {
# , is only special inside {}
$pattern .= '|';
}
elsif ( $nesting and $glob =~ /\G\}/gc ) {
# } that matches { is special. But unbalanced } are not.
$pattern .= ')';
--$nesting;
}
elsif ( $glob =~ /\G(\\.)/gc ) {
# A quoted literal
$pattern .= $1;
}
elsif ( $glob =~ /\G([\},])/gc ) {
# Sometimes meta characters
$pattern .= '\\' . $1;
}
else {
# Eat everything that is not a meta character.
$glob =~ /\G([^{?*\\\},]*)/gc;
$pattern .= quotemeta $1;
}
return $pattern if pos $glob == length $glob;
}
}
sub _expand {
my ( $self, $name, $tests ) = @_;
my $pattern = $self->_glob_to_regexp($name);
$pattern = qr/^ $pattern $/x;
my @match = ();
for ( my $ti = 0; $ti < @$tests; $ti++ ) {
if ( $tests->[$ti]->filename =~ $pattern ) {
push @match, splice @$tests, $ti, 1;
$ti--;
}
}
return @match;
}
=head3 C<get_all>
Get a list of all remaining tests.
=cut
sub get_all {
my $self = shift;
my @all = $self->_gather( $self->{schedule} );
$self->{count} = @all;
@all;
}
sub _gather {
my ( $self, $rule ) = @_;
return unless defined $rule;
return $rule unless 'ARRAY' eq ref $rule;
return map { defined() ? $self->_gather($_) : () } map {@$_} @$rule;
}
=head3 C<get_job>
Return the next available job or C<undef> if none are available. Returns
a C<TAP::Parser::Scheduler::Spinner> if the scheduler still has pending
jobs but none are available to run right now.
=cut
sub get_job {
my $self = shift;
$self->{count} ||= $self->get_all;
my @jobs = $self->_find_next_job( $self->{schedule} );
if (@jobs) {
--$self->{count};
return $jobs[0];
}
return TAP::Parser::Scheduler::Spinner->new
if $self->{count};
return;
}
sub _not_empty {
my $ar = shift;
return 1 unless 'ARRAY' eq ref $ar;
foreach (@$ar) {
return 1 if _not_empty($_);
}
return;
}
sub _is_empty { !_not_empty(@_) }
sub _find_next_job {
my ( $self, $rule ) = @_;
my @queue = ();
my $index = 0;
while ( $index < @$rule ) {
my $seq = $rule->[$index];
# Prune any exhausted items.
shift @$seq while @$seq && _is_empty( $seq->[0] );
if (@$seq) {
if ( defined $seq->[0] ) {
if ( 'ARRAY' eq ref $seq->[0] ) {
push @queue, $seq;
}
else {
my $job = splice @$seq, 0, 1, undef;
$job->on_finish( sub { shift @$seq } );
return $job;
}
}
++$index;
}
else {
# Remove the empty sub-array from the array
splice @$rule, $index, 1;
}
}
for my $seq (@queue) {
if ( my @jobs = $self->_find_next_job( $seq->[0] ) ) {
return @jobs;
}
}
return;
}
=head3 C<as_string>
Return a human readable representation of the scheduling tree.
=cut
sub as_string {
my $self = shift;
return $self->_as_string( $self->{schedule} );
}
sub _as_string {
my ( $self, $rule, $depth ) = ( shift, shift, shift || 0 );
my $pad = ' ' x 2;
my $indent = $pad x $depth;
if ( !defined $rule ) {
return "$indent(undef)\n";
}
elsif ( 'ARRAY' eq ref $rule ) {
return unless @$rule;
my $type = ( 'par', 'seq' )[ $depth % 2 ];
return join(
'', "$indent$type:\n",
map { $self->_as_string( $_, $depth + 1 ) } @$rule
);
}
else {
return "$indent'" . $rule->filename . "'\n";
}
}
1;
package TAP::Parser::Source;
use strict;
use vars qw($VERSION @ISA);
use TAP::Object ();
use TAP::Parser::IteratorFactory ();
@ISA = qw(TAP::Object);
# Causes problem on MacOS and shouldn't be necessary anyway
#$SIG{CHLD} = sub { wait };
=head1 NAME
TAP::Parser::Source - Stream output from some source
=head1 VERSION
Version 3.17
=cut
$VERSION = '3.17';
=head1 SYNOPSIS
use TAP::Parser::Source;
my $source = TAP::Parser::Source->new;
my $stream = $source->source(['/usr/bin/ruby', 'mytest.rb'])->get_stream;
=head1 DESCRIPTION
Takes a command and hopefully returns a stream from it.
=head1 METHODS
=head2 Class Methods
=head3 C<new>
my $source = TAP::Parser::Source->new;
Returns a new C<TAP::Parser::Source> object.
=cut
# new() implementation supplied by TAP::Object
sub _initialize {
my ( $self, $args ) = @_;
$self->{switches} = [];
_autoflush( \*STDOUT );
_autoflush( \*STDERR );
return $self;
}
##############################################################################
=head2 Instance Methods
=head3 C<source>
my $source = $source->source;
$source->source(['./some_prog some_test_file']);
# or
$source->source(['/usr/bin/ruby', 't/ruby_test.rb']);
Getter/setter for the source. The source should generally consist of an array
reference of strings which, when executed via L<&IPC::Open3::open3|IPC::Open3>,
should return a filehandle which returns successive rows of TAP. C<croaks> if
it doesn't get an arrayref.
=cut
sub source {
my $self = shift;
return $self->{source} unless @_;
unless ( 'ARRAY' eq ref $_[0] ) {
$self->_croak('Argument to &source must be an array reference');
}
$self->{source} = shift;
return $self;
}
##############################################################################
=head3 C<get_stream>
my $stream = $source->get_stream;
Returns a L<TAP::Parser::Iterator> stream of the output generated by executing
C<source>. C<croak>s if there was no command found.
Must be passed an object that implements a C<make_iterator> method.
Typically this is a TAP::Parser instance.
=cut
sub get_stream {
my ( $self, $factory ) = @_;
my @command = $self->_get_command
or $self->_croak('No command found!');
return $factory->make_iterator(
{ command => \@command,
merge => $self->merge
}
);
}
sub _get_command { return @{ shift->source || [] } }
##############################################################################
=head3 C<merge>
my $merge = $source->merge;
Sets or returns the flag that dictates whether STDOUT and STDERR are merged.
=cut
sub merge {
my $self = shift;
return $self->{merge} unless @_;
$self->{merge} = shift;
return $self;
}
# Turns on autoflush for the handle passed
sub _autoflush {
my $flushed = shift;
my $old_fh = select $flushed;
$| = 1;
select $old_fh;
}
1;
=head1 SUBCLASSING
Please see L<TAP::Parser/SUBCLASSING> for a subclassing overview.
=head2 Example
package MyRubySource;
use strict;
use vars '@ISA';
use Carp qw( croak );
use TAP::Parser::Source;
@ISA = qw( TAP::Parser::Source );
# expect $source->(['mytest.rb', 'cmdline', 'args']);
sub source {
my ($self, $args) = @_;
my ($rb_file) = @$args;
croak("error: Ruby file '$rb_file' not found!") unless (-f $rb_file);
return $self->SUPER::source(['/usr/bin/ruby', @$args]);
}
=head1 SEE ALSO
L<TAP::Object>,
L<TAP::Parser>,
L<TAP::Parser::Source::Perl>,
=cut
package TAP::Parser::Utils;
use strict;
use Exporter;
use vars qw($VERSION @ISA @EXPORT_OK);
@ISA = qw( Exporter );
@EXPORT_OK = qw( split_shell );
=head1 NAME
TAP::Parser::Utils - Internal TAP::Parser utilities
=head1 VERSION
Version 3.17
=cut
$VERSION = '3.17';
=head1 SYNOPSIS
use TAP::Parser::Utils qw( split_shell )
my @switches = split_shell( $arg );
=head1 DESCRIPTION
B<FOR INTERNAL USE ONLY!>
=head2 INTERFACE
=head3 C<split_shell>
Shell style argument parsing. Handles backslash escaping, single and
double quoted strings but not shell substitutions.
Pass one or more strings containing shell escaped arguments. The return
value is an array of arguments parsed from the input strings according
to (approximate) shell parsing rules. It's legal to pass C<undef> in
which case an empty array will be returned. That makes it possible to
my @args = split_shell( $ENV{SOME_ENV_VAR} );
without worrying about whether the environment variable exists.
This is used to split HARNESS_PERL_ARGS into individual switches.
=cut
sub split_shell {
my @parts = ();
for my $switch ( grep defined && length, @_ ) {
push @parts, $1 while $switch =~ /
(
(?: [^\\"'\s]+
| \\.
| " (?: \\. | [^"] )* "
| ' (?: \\. | [^'] )* '
)+
) /xg;
}
for (@parts) {
s/ \\(.) | ['"] /defined $1 ? $1 : ''/exg;
}
return @parts;
}
1;
package TAP::Parser::Iterator::Array;
use strict;
use vars qw($VERSION @ISA);
use TAP::Parser::Iterator ();
@ISA = 'TAP::Parser::Iterator';
=head1 NAME
TAP::Parser::Iterator::Array - Internal TAP::Parser array Iterator
=head1 VERSION
Version 3.17
=cut
$VERSION = '3.17';
=head1 SYNOPSIS
# see TAP::Parser::IteratorFactory for preferred usage
# to use directly:
use TAP::Parser::Iterator::Array;
my @data = ('foo', 'bar', baz');
my $it = TAP::Parser::Iterator::Array->new(\@data);
my $line = $it->next;
=head1 DESCRIPTION
This is a simple iterator wrapper for arrays of scalar content, used by
L<TAP::Parser>. Unless you're subclassing, you probably won't need to use
this module directly.
=head1 METHODS
=head2 Class Methods
=head3 C<new>
Create an iterator. Takes one argument: an C<$array_ref>
=head2 Instance Methods
=head3 C<next>
Iterate through it, of course.
=head3 C<next_raw>
Iterate raw input without applying any fixes for quirky input syntax.
=head3 C<wait>
Get the wait status for this iterator. For an array iterator this will always
be zero.
=head3 C<exit>
Get the exit status for this iterator. For an array iterator this will always
be zero.
=cut
# new() implementation supplied by TAP::Object
sub _initialize {
my ( $self, $thing ) = @_;
chomp @$thing;
$self->{idx} = 0;
$self->{array} = $thing;
$self->{exit} = undef;
return $self;
}
sub wait { shift->exit }
sub exit {
my $self = shift;
return 0 if $self->{idx} >= @{ $self->{array} };
return;
}
sub next_raw {
my $self = shift;
return $self->{array}->[ $self->{idx}++ ];
}
1;
=head1 ATTRIBUTION
Originally ripped off from L<Test::Harness>.
=head1 SEE ALSO
L<TAP::Object>,
L<TAP::Parser>,
L<TAP::Parser::Iterator>,
L<TAP::Parser::IteratorFactory>,
=cut
package TAP::Parser::Iterator::Process;
use strict;
use vars qw($VERSION @ISA);
use TAP::Parser::Iterator ();
use Config;
use IO::Handle;
@ISA = 'TAP::Parser::Iterator';
my $IS_WIN32 = ( $^O =~ /^(MS)?Win32$/ );
=head1 NAME
TAP::Parser::Iterator::Process - Internal TAP::Parser Iterator
=head1 VERSION
Version 3.17
=cut
$VERSION = '3.17';
=head1 SYNOPSIS
# see TAP::Parser::IteratorFactory for preferred usage
# to use directly:
use TAP::Parser::Iterator::Process;
my %args = (
command => ['python', 'setup.py', 'test'],
merge => 1,
setup => sub { ... },
teardown => sub { ... },
);
my $it = TAP::Parser::Iterator::Process->new(\%args);
my $line = $it->next;
=head1 DESCRIPTION
This is a simple iterator wrapper for executing external processes, used by
L<TAP::Parser>. Unless you're subclassing, you probably won't need to use
this module directly.
=head1 METHODS
=head2 Class Methods
=head3 C<new>
Create an iterator. Expects one argument containing a hashref of the form:
command => \@command_to_execute
merge => $attempt_merge_stderr_and_stdout?
setup => $callback_to_setup_command
teardown => $callback_to_teardown_command
Tries to uses L<IPC::Open3> & L<IO::Select> to communicate with the spawned
process if they are available. Falls back onto C<open()>.
=head2 Instance Methods
=head3 C<next>
Iterate through the process output, of course.
=head3 C<next_raw>
Iterate raw input without applying any fixes for quirky input syntax.
=head3 C<wait>
Get the wait status for this iterator's process.
=head3 C<exit>
Get the exit status for this iterator's process.
=cut
eval { require POSIX; &POSIX::WEXITSTATUS(0) };
if ($@) {
*_wait2exit = sub { $_[1] >> 8 };
}
else {
*_wait2exit = sub { POSIX::WEXITSTATUS( $_[1] ) }
}
sub _use_open3 {
my $self = shift;
return unless $Config{d_fork} || $IS_WIN32;
for my $module (qw( IPC::Open3 IO::Select )) {
eval "use $module";
return if $@;
}
return 1;
}
{
my $got_unicode;
sub _get_unicode {
return $got_unicode if defined $got_unicode;
eval 'use Encode qw(decode_utf8);';
$got_unicode = $@ ? 0 : 1;
}
}
# new() implementation supplied by TAP::Object
sub _initialize {
my ( $self, $args ) = @_;
my @command = @{ delete $args->{command} || [] }
or die "Must supply a command to execute";
# Private. Used to frig with chunk size during testing.
my $chunk_size = delete $args->{_chunk_size} || 65536;
my $merge = delete $args->{merge};
my ( $pid, $err, $sel );
if ( my $setup = delete $args->{setup} ) {
$setup->(@command);
}
my $out = IO::Handle->new;
if ( $self->_use_open3 ) {
# HOTPATCH {{{
my $xclose = \&IPC::Open3::xclose;
local $^W; # no warnings
local *IPC::Open3::xclose = sub {
my $fh = shift;
no strict 'refs';
return if ( fileno($fh) == fileno(STDIN) );
$xclose->($fh);
};
# }}}
if ($IS_WIN32) {
$err = $merge ? '' : '>&STDERR';
eval {
$pid = open3(
'<&STDIN', $out, $merge ? '' : $err,
@command
);
};
die "Could not execute (@command): $@" if $@;
if ( $] >= 5.006 ) {
# Kludge to avoid warning under 5.5
eval 'binmode($out, ":crlf")';
}
}
else {
$err = $merge ? '' : IO::Handle->new;
eval { $pid = open3( '<&STDIN', $out, $err, @command ); };
die "Could not execute (@command): $@" if $@;
$sel = $merge ? undef : IO::Select->new( $out, $err );
}
}
else {
$err = '';
my $command
= join( ' ', map { $_ =~ /\s/ ? qq{"$_"} : $_ } @command );
open( $out, "$command|" )
or die "Could not execute ($command): $!";
}
$self->{out} = $out;
$self->{err} = $err;
$self->{sel} = $sel;
$self->{pid} = $pid;
$self->{exit} = undef;
$self->{chunk_size} = $chunk_size;
if ( my $teardown = delete $args->{teardown} ) {
$self->{teardown} = sub {
$teardown->(@command);
};
}
return $self;
}
=head3 C<handle_unicode>
Upgrade the input stream to handle UTF8.
=cut
sub handle_unicode {
my $self = shift;
if ( $self->{sel} ) {
if ( _get_unicode() ) {
# Make sure our iterator has been constructed and...
my $next = $self->{_next} ||= $self->_next;
# ...wrap it to do UTF8 casting
$self->{_next} = sub {
my $line = $next->();
return decode_utf8($line) if defined $line;
return;
};
}
}
else {
if ( $] >= 5.008 ) {
eval 'binmode($self->{out}, ":utf8")';
}
}
}
##############################################################################
sub wait { shift->{wait} }
sub exit { shift->{exit} }
sub _next {
my $self = shift;
if ( my $out = $self->{out} ) {
if ( my $sel = $self->{sel} ) {
my $err = $self->{err};
my @buf = ();
my $partial = ''; # Partial line
my $chunk_size = $self->{chunk_size};
return sub {
return shift @buf if @buf;
READ:
while ( my @ready = $sel->can_read ) {
for my $fh (@ready) {
my $got = sysread $fh, my ($chunk), $chunk_size;
if ( $got == 0 ) {
$sel->remove($fh);
}
elsif ( $fh == $err ) {
print STDERR $chunk; # echo STDERR
}
else {
$chunk = $partial . $chunk;
$partial = '';
# Make sure we have a complete line
unless ( substr( $chunk, -1, 1 ) eq "\n" ) {
my $nl = rindex $chunk, "\n";
if ( $nl == -1 ) {
$partial = $chunk;
redo READ;
}
else {
$partial = substr( $chunk, $nl + 1 );
$chunk = substr( $chunk, 0, $nl );
}
}
push @buf, split /\n/, $chunk;
return shift @buf if @buf;
}
}
}
# Return partial last line
if ( length $partial ) {
my $last = $partial;
$partial = '';
return $last;
}
$self->_finish;
return;
};
}
else {
return sub {
if ( defined( my $line = <$out> ) ) {
chomp $line;
return $line;
}
$self->_finish;
return;
};
}
}
else {
return sub {
$self->_finish;
return;
};
}
}
sub next_raw {
my $self = shift;
return ( $self->{_next} ||= $self->_next )->();
}
sub _finish {
my $self = shift;
my $status = $?;
# Avoid circular refs
$self->{_next} = sub {return}
if $] >= 5.006;
# If we have a subprocess we need to wait for it to terminate
if ( defined $self->{pid} ) {
if ( $self->{pid} == waitpid( $self->{pid}, 0 ) ) {
$status = $?;
}
}
( delete $self->{out} )->close if $self->{out};
# If we have an IO::Select we also have an error handle to close.
if ( $self->{sel} ) {
( delete $self->{err} )->close;
delete $self->{sel};
}
else {
$status = $?;
}
# Sometimes we get -1 on Windows. Presumably that means status not
# available.
$status = 0 if $IS_WIN32 && $status == -1;
$self->{wait} = $status;
$self->{exit} = $self->_wait2exit($status);
if ( my $teardown = $self->{teardown} ) {
$teardown->();
}
return $self;
}
=head3 C<get_select_handles>
Return a list of filehandles that may be used upstream in a select()
call to signal that this Iterator is ready. Iterators that are not
handle based should return an empty list.
=cut
sub get_select_handles {
my $self = shift;
return grep $_, ( $self->{out}, $self->{err} );
}
1;
=head1 ATTRIBUTION
Originally ripped off from L<Test::Harness>.
=head1 SEE ALSO
L<TAP::Object>,
L<TAP::Parser>,
L<TAP::Parser::Iterator>,
L<TAP::Parser::IteratorFactory>,
=cut
package TAP::Parser::Iterator::Stream;
use strict;
use vars qw($VERSION @ISA);
use TAP::Parser::Iterator ();
@ISA = 'TAP::Parser::Iterator';
=head1 NAME
TAP::Parser::Iterator::Stream - Internal TAP::Parser Iterator
=head1 VERSION
Version 3.17
=cut
$VERSION = '3.17';
=head1 SYNOPSIS
# see TAP::Parser::IteratorFactory for preferred usage
# to use directly:
use TAP::Parser::Iterator::Stream;
open( TEST, 'test.tap' );
my $it = TAP::Parser::Iterator::Stream->new(\*TEST);
my $line = $it->next;
=head1 DESCRIPTION
This is a simple iterator wrapper for reading from filehandles, used by
L<TAP::Parser>. Unless you're subclassing, you probably won't need to use
this module directly.
=head1 METHODS
=head2 Class Methods
=head3 C<new>
Create an iterator. Expects one argument containing a filehandle.
=cut
# new() implementation supplied by TAP::Object
sub _initialize {
my ( $self, $thing ) = @_;
$self->{fh} = $thing;
return $self;
}
=head2 Instance Methods
=head3 C<next>
Iterate through it, of course.
=head3 C<next_raw>
Iterate raw input without applying any fixes for quirky input syntax.
=head3 C<wait>
Get the wait status for this iterator. Always returns zero.
=head3 C<exit>
Get the exit status for this iterator. Always returns zero.
=cut
sub wait { shift->exit }
sub exit { shift->{fh} ? () : 0 }
sub next_raw {
my $self = shift;
my $fh = $self->{fh};
if ( defined( my $line = <$fh> ) ) {
chomp $line;
return $line;
}
else {
$self->_finish;
return;
}
}
sub _finish {
my $self = shift;
close delete $self->{fh};
}
1;
=head1 ATTRIBUTION
Originally ripped off from L<Test::Harness>.
=head1 SEE ALSO
L<TAP::Object>,
L<TAP::Parser>,
L<TAP::Parser::Iterator>,
L<TAP::Parser::IteratorFactory>,
=cut
package TAP::Parser::Result::Bailout;
use strict;
use vars qw($VERSION @ISA);
use TAP::Parser::Result;
@ISA = 'TAP::Parser::Result';
=head1 NAME
TAP::Parser::Result::Bailout - Bailout result token.
=head1 VERSION
Version 3.17
=cut
$VERSION = '3.17';
=head1 DESCRIPTION
This is a subclass of L<TAP::Parser::Result>. A token of this class will be
returned if a bail out line is encountered.
1..5
ok 1 - woo hooo!
Bail out! Well, so much for "woo hooo!"
=head1 OVERRIDDEN METHODS
Mainly listed here to shut up the pitiful screams of the pod coverage tests.
They keep me awake at night.
=over 4
=item * C<as_string>
=back
=cut
##############################################################################
=head2 Instance Methods
=head3 C<explanation>
if ( $result->is_bailout ) {
my $explanation = $result->explanation;
print "We bailed out because ($explanation)";
}
If, and only if, a token is a bailout token, you can get an "explanation" via
this method. The explanation is the text after the mystical "Bail out!" words
which appear in the tap output.
=cut
sub explanation { shift->{bailout} }
sub as_string { shift->{bailout} }
1;
package TAP::Parser::Result::Comment;
use strict;
use vars qw($VERSION @ISA);
use TAP::Parser::Result;
@ISA = 'TAP::Parser::Result';
=head1 NAME
TAP::Parser::Result::Comment - Comment result token.
=head1 VERSION
Version 3.17
=cut
$VERSION = '3.17';
=head1 DESCRIPTION
This is a subclass of L<TAP::Parser::Result>. A token of this class will be
returned if a comment line is encountered.
1..1
ok 1 - woo hooo!
# this is a comment
=head1 OVERRIDDEN METHODS
Mainly listed here to shut up the pitiful screams of the pod coverage tests.
They keep me awake at night.
=over 4
=item * C<as_string>
Note that this method merely returns the comment preceded by a '# '.
=back
=cut
##############################################################################
=head2 Instance Methods
=head3 C<comment>
if ( $result->is_comment ) {
my $comment = $result->comment;
print "I have something to say: $comment";
}
=cut
sub comment { shift->{comment} }
sub as_string { shift->{raw} }
1;
package TAP::Parser::Result::Plan;
use strict;
use vars qw($VERSION @ISA);
use TAP::Parser::Result;
@ISA = 'TAP::Parser::Result';
=head1 NAME
TAP::Parser::Result::Plan - Plan result token.
=head1 VERSION
Version 3.17
=cut
$VERSION = '3.17';
=head1 DESCRIPTION
This is a subclass of L<TAP::Parser::Result>. A token of this class will be
returned if a plan line is encountered.
1..1
ok 1 - woo hooo!
C<1..1> is the plan. Gotta have a plan.
=head1 OVERRIDDEN METHODS
Mainly listed here to shut up the pitiful screams of the pod coverage tests.
They keep me awake at night.
=over 4
=item * C<as_string>
=item * C<raw>
=back
=cut
##############################################################################
=head2 Instance Methods
=head3 C<plan>
if ( $result->is_plan ) {
print $result->plan;
}
This is merely a synonym for C<as_string>.
=cut
sub plan { '1..' . shift->{tests_planned} }
##############################################################################
=head3 C<tests_planned>
my $planned = $result->tests_planned;
Returns the number of tests planned. For example, a plan of C<1..17> will
cause this method to return '17'.
=cut
sub tests_planned { shift->{tests_planned} }
##############################################################################
=head3 C<directive>
my $directive = $plan->directive;
If a SKIP directive is included with the plan, this method will return it.
1..0 # SKIP: why bother?
=cut
sub directive { shift->{directive} }
##############################################################################
=head3 C<has_skip>
if ( $result->has_skip ) { ... }
Returns a boolean value indicating whether or not this test has a SKIP
directive.
=head3 C<explanation>
my $explanation = $plan->explanation;
If a SKIP directive was included with the plan, this method will return the
explanation, if any.
=cut
sub explanation { shift->{explanation} }
=head3 C<todo_list>
my $todo = $result->todo_list;
for ( @$todo ) {
...
}
=cut
sub todo_list { shift->{todo_list} }
1;
package TAP::Parser::Result::Pragma;
use strict;
use vars qw($VERSION @ISA);
use TAP::Parser::Result;
@ISA = 'TAP::Parser::Result';
=head1 NAME
TAP::Parser::Result::Pragma - TAP pragma token.
=head1 VERSION
Version 3.17
=cut
$VERSION = '3.17';
=head1 DESCRIPTION
This is a subclass of L<TAP::Parser::Result>. A token of this class will be
returned if a pragma is encountered.
TAP version 13
pragma +strict, -foo
Pragmas are only supported from TAP version 13 onwards.
=head1 OVERRIDDEN METHODS
Mainly listed here to shut up the pitiful screams of the pod coverage tests.
They keep me awake at night.
=over 4
=item * C<as_string>
=item * C<raw>
=back
=cut
##############################################################################
=head2 Instance Methods
=head3 C<pragmas>
if ( $result->is_pragma ) {
@pragmas = $result->pragmas;
}
=cut
sub pragmas {
my @pragmas = @{ shift->{pragmas} };
return wantarray ? @pragmas : \@pragmas;
}
1;
package TAP::Parser::Result::Unknown;
use strict;
use vars qw($VERSION @ISA);
use TAP::Parser::Result;
@ISA = 'TAP::Parser::Result';
use vars qw($VERSION);
=head1 NAME
TAP::Parser::Result::Unknown - Unknown result token.
=head1 VERSION
Version 3.17
=cut
$VERSION = '3.17';
=head1 DESCRIPTION
This is a subclass of L<TAP::Parser::Result>. A token of this class will be
returned if the parser does not recognize the token line. For example:
1..5
VERSION 7
ok 1 - woo hooo!
... woo hooo! is cool!
In the above "TAP", the second and fourth lines will generate "Unknown"
tokens.
=head1 OVERRIDDEN METHODS
Mainly listed here to shut up the pitiful screams of the pod coverage tests.
They keep me awake at night.
=over 4
=item * C<as_string>
=item * C<raw>
=back
=cut
1;
package TAP::Parser::Result::Test;
use strict;
use vars qw($VERSION @ISA);
use TAP::Parser::Result;
@ISA = 'TAP::Parser::Result';
use vars qw($VERSION);
=head1 NAME
TAP::Parser::Result::Test - Test result token.
=head1 VERSION
Version 3.17
=cut
$VERSION = '3.17';
=head1 DESCRIPTION
This is a subclass of L<TAP::Parser::Result>. A token of this class will be
returned if a test line is encountered.
1..1
ok 1 - woo hooo!
=head1 OVERRIDDEN METHODS
This class is the workhorse of the L<TAP::Parser> system. Most TAP lines will
be test lines and if C<< $result->is_test >>, then you have a bunch of methods
at your disposal.
=head2 Instance Methods
=cut
##############################################################################
=head3 C<ok>
my $ok = $result->ok;
Returns the literal text of the C<ok> or C<not ok> status.
=cut
sub ok { shift->{ok} }
##############################################################################
=head3 C<number>
my $test_number = $result->number;
Returns the number of the test, even if the original TAP output did not supply
that number.
=cut
sub number { shift->{test_num} }
sub _number {
my ( $self, $number ) = @_;
$self->{test_num} = $number;
}
##############################################################################
=head3 C<description>
my $description = $result->description;
Returns the description of the test, if any. This is the portion after the
test number but before the directive.
=cut
sub description { shift->{description} }
##############################################################################
=head3 C<directive>
my $directive = $result->directive;
Returns either C<TODO> or C<SKIP> if either directive was present for a test
line.
=cut
sub directive { shift->{directive} }
##############################################################################
=head3 C<explanation>
my $explanation = $result->explanation;
If a test had either a C<TODO> or C<SKIP> directive, this method will return
the accompanying explantion, if present.
not ok 17 - 'Pigs can fly' # TODO not enough acid
For the above line, the explanation is I<not enough acid>.
=cut
sub explanation { shift->{explanation} }
##############################################################################
=head3 C<is_ok>
if ( $result->is_ok ) { ... }
Returns a boolean value indicating whether or not the test passed. Remember
that for TODO tests, the test always passes.
If the test is unplanned, this method will always return false. See
C<is_unplanned>.
=cut
sub is_ok {
my $self = shift;
return if $self->is_unplanned;
# TODO directives reverse the sense of a test.
return $self->has_todo ? 1 : $self->ok !~ /not/;
}
##############################################################################
=head3 C<is_actual_ok>
if ( $result->is_actual_ok ) { ... }
Returns a boolean value indicating whether or not the test passed, regardless
of its TODO status.
=cut
sub is_actual_ok {
my $self = shift;
return $self->{ok} !~ /not/;
}
##############################################################################
=head3 C<actual_passed>
Deprecated. Please use C<is_actual_ok> instead.
=cut
sub actual_passed {
warn 'actual_passed() is deprecated. Please use "is_actual_ok()"';
goto &is_actual_ok;
}
##############################################################################
=head3 C<todo_passed>
if ( $test->todo_passed ) {
# test unexpectedly succeeded
}
If this is a TODO test and an 'ok' line, this method returns true.
Otherwise, it will always return false (regardless of passing status on
non-todo tests).
This is used to track which tests unexpectedly succeeded.
=cut
sub todo_passed {
my $self = shift;
return $self->has_todo && $self->is_actual_ok;
}
##############################################################################
=head3 C<todo_failed>
# deprecated in favor of 'todo_passed'. This method was horribly misnamed.
This was a badly misnamed method. It indicates which TODO tests unexpectedly
succeeded. Will now issue a warning and call C<todo_passed>.
=cut
sub todo_failed {
warn 'todo_failed() is deprecated. Please use "todo_passed()"';
goto &todo_passed;
}
##############################################################################
=head3 C<has_skip>
if ( $result->has_skip ) { ... }
Returns a boolean value indicating whether or not this test has a SKIP
directive.
=head3 C<has_todo>
if ( $result->has_todo ) { ... }
Returns a boolean value indicating whether or not this test has a TODO
directive.
=head3 C<as_string>
print $result->as_string;
This method prints the test as a string. It will probably be similar, but
not necessarily identical, to the original test line. Directives are
capitalized, some whitespace may be trimmed and a test number will be added if
it was not present in the original line. If you need the original text of the
test line, use the C<raw> method.
=cut
sub as_string {
my $self = shift;
my $string = $self->ok . " " . $self->number;
if ( my $description = $self->description ) {
$string .= " $description";
}
if ( my $directive = $self->directive ) {
my $explanation = $self->explanation;
$string .= " # $directive $explanation";
}
return $string;
}
##############################################################################
=head3 C<is_unplanned>
if ( $test->is_unplanned ) { ... }
$test->is_unplanned(1);
If a test number is greater than the number of planned tests, this method will
return true. Unplanned tests will I<always> return false for C<is_ok>,
regardless of whether or not the test C<has_todo>.
Note that if tests have a trailing plan, it is not possible to set this
property for unplanned tests as we do not know it's unplanned until the plan
is reached:
print <<'END';
ok 1
ok 2
1..1
END
=cut
sub is_unplanned {
my $self = shift;
return ( $self->{unplanned} || '' ) unless @_;
$self->{unplanned} = !!shift;
return $self;
}
1;
package TAP::Parser::Result::Version;
use strict;
use vars qw($VERSION @ISA);
use TAP::Parser::Result;
@ISA = 'TAP::Parser::Result';
=head1 NAME
TAP::Parser::Result::Version - TAP syntax version token.
=head1 VERSION
Version 3.17
=cut
$VERSION = '3.17';
=head1 DESCRIPTION
This is a subclass of L<TAP::Parser::Result>. A token of this class will be
returned if a version line is encountered.
TAP version 13
ok 1
not ok 2
The first version of TAP to include an explicit version number is 13.
=head1 OVERRIDDEN METHODS
Mainly listed here to shut up the pitiful screams of the pod coverage tests.
They keep me awake at night.
=over 4
=item * C<as_string>
=item * C<raw>
=back
=cut
##############################################################################
=head2 Instance Methods
=head3 C<version>
if ( $result->is_version ) {
print $result->version;
}
This is merely a synonym for C<as_string>.
=cut
sub version { shift->{version} }
1;
package TAP::Parser::Result::YAML;
use strict;
use vars qw($VERSION @ISA);
use TAP::Parser::Result;
@ISA = 'TAP::Parser::Result';
=head1 NAME
TAP::Parser::Result::YAML - YAML result token.
=head1 VERSION
Version 3.17
=cut
$VERSION = '3.17';
=head1 DESCRIPTION
This is a subclass of L<TAP::Parser::Result>. A token of this class will be
returned if a YAML block is encountered.
1..1
ok 1 - woo hooo!
C<1..1> is the plan. Gotta have a plan.
=head1 OVERRIDDEN METHODS
Mainly listed here to shut up the pitiful screams of the pod coverage tests.
They keep me awake at night.
=over 4
=item * C<as_string>
=item * C<raw>
=back
=cut
##############################################################################
=head2 Instance Methods
=head3 C<data>
if ( $result->is_yaml ) {
print $result->data;
}
Return the parsed YAML data for this result
=cut
sub data { shift->{data} }
1;
package TAP::Parser::Scheduler::Job;
use strict;
use vars qw($VERSION);
use Carp;
=head1 NAME
TAP::Parser::Scheduler::Job - A single testing job.
=head1 VERSION
Version 3.17
=cut
$VERSION = '3.17';
=head1 SYNOPSIS
use TAP::Parser::Scheduler::Job;
=head1 DESCRIPTION
Represents a single test 'job'.
=head1 METHODS
=head2 Class Methods
=head3 C<new>
my $job = TAP::Parser::Scheduler::Job->new(
$name, $desc
);
Returns a new C<TAP::Parser::Scheduler::Job> object.
=cut
sub new {
my ( $class, $name, $desc, @ctx ) = @_;
return bless {
filename => $name,
description => $desc,
@ctx ? ( context => \@ctx ) : (),
}, $class;
}
=head3 C<on_finish>
Register a closure to be called when this job is destroyed.
=cut
sub on_finish {
my ( $self, $cb ) = @_;
$self->{on_finish} = $cb;
}
=head3 C<finish>
Called when a job is complete to unlock it.
=cut
sub finish {
my $self = shift;
if ( my $cb = $self->{on_finish} ) {
$cb->($self);
}
}
=head3 C<filename>
=head3 C<description>
=head3 C<context>
=cut
sub filename { shift->{filename} }
sub description { shift->{description} }
sub context { @{ shift->{context} || [] } }
=head3 C<as_array_ref>
For backwards compatibility in callbacks.
=cut
sub as_array_ref {
my $self = shift;
return [ $self->filename, $self->description, $self->{context} ||= [] ];
}
=head3 C<is_spinner>
Returns false indicating that this is a real job rather than a
'spinner'. Spinners are returned when the scheduler still has pending
jobs but can't (because of locking) return one right now.
=cut
sub is_spinner {0}
1;
package TAP::Parser::Scheduler::Spinner;
use strict;
use vars qw($VERSION);
use Carp;
=head1 NAME
TAP::Parser::Scheduler::Spinner - A no-op job.
=head1 VERSION
Version 3.17
=cut
$VERSION = '3.17';
=head1 SYNOPSIS
use TAP::Parser::Scheduler::Spinner;
=head1 DESCRIPTION
A no-op job. Returned by C<TAP::Parser::Scheduler> as an instruction to
the harness to spin (keep executing tests) while the scheduler can't
return a real job.
=head1 METHODS
=head2 Class Methods
=head3 C<new>
my $job = TAP::Parser::Scheduler::Spinner->new;
Returns a new C<TAP::Parser::Scheduler::Spinner> object.
=cut
sub new { bless {}, shift }
=head3 C<is_spinner>
Returns true indicating that is a 'spinner' job. Spinners are returned
when the scheduler still has pending jobs but can't (because of locking)
return one right now.
=cut
sub is_spinner {1}
1;
package TAP::Parser::Source::Perl;
use strict;
use Config;
use vars qw($VERSION @ISA);
use constant IS_WIN32 => ( $^O =~ /^(MS)?Win32$/ );
use constant IS_VMS => ( $^O eq 'VMS' );
use TAP::Parser::Source;
use TAP::Parser::Utils qw( split_shell );
@ISA = 'TAP::Parser::Source';
=head1 NAME
TAP::Parser::Source::Perl - Stream Perl output
=head1 VERSION
Version 3.17
=cut
$VERSION = '3.17';
=head1 SYNOPSIS
use TAP::Parser::Source::Perl;
my $perl = TAP::Parser::Source::Perl->new;
my $stream = $perl->source( [ $filename, @args ] )->get_stream;
=head1 DESCRIPTION
Takes a filename and hopefully returns a stream from it. The filename should
be the name of a Perl program.
Note that this is a subclass of L<TAP::Parser::Source>. See that module for
more methods.
=head1 METHODS
=head2 Class Methods
=head3 C<new>
my $perl = TAP::Parser::Source::Perl->new;
Returns a new C<TAP::Parser::Source::Perl> object.
=head2 Instance Methods
=head3 C<source>
Getter/setter the name of the test program and any arguments it requires.
my ($filename, @args) = @{ $perl->source };
$perl->source( [ $filename, @args ] );
C<croak>s if C<$filename> could not be found.
=cut
sub source {
my $self = shift;
$self->_croak("Cannot find ($_[0][0])")
if @_ && !-f $_[0][0];
return $self->SUPER::source(@_);
}
=head3 C<switches>
my $switches = $perl->switches;
my @switches = $perl->switches;
$perl->switches( \@switches );
Getter/setter for the additional switches to pass to the perl executable. One
common switch would be to set an include directory:
$perl->switches( ['-Ilib'] );
=cut
sub switches {
my $self = shift;
unless (@_) {
return wantarray ? @{ $self->{switches} } : $self->{switches};
}
my $switches = shift;
$self->{switches} = [@$switches]; # force a copy
return $self;
}
##############################################################################
=head3 C<get_stream>
my $stream = $source->get_stream($parser);
Returns a stream of the output generated by executing C<source>. Must be
passed an object that implements a C<make_iterator> method. Typically
this is a TAP::Parser instance.
=cut
sub get_stream {
my ( $self, $factory ) = @_;
my @switches = $self->_switches;
my $path_sep = $Config{path_sep};
my $path_pat = qr{$path_sep};
# Filter out any -I switches to be handled as libs later.
#
# Nasty kludge. It might be nicer if we got the libs separately
# although at least this way we find any -I switches that were
# supplied other then as explicit libs.
#
# We filter out any names containing colons because they will break
# PERL5LIB
my @libs;
my @filtered_switches;
for (@switches) {
if ( !/$path_pat/ && / ^ ['"]? -I ['"]? (.*?) ['"]? $ /x ) {
push @libs, $1;
}
else {
push @filtered_switches, $_;
}
}
@switches = @filtered_switches;
my $setup = sub {
if (@libs) {
$ENV{PERL5LIB}
= join( $path_sep, grep {defined} @libs, $ENV{PERL5LIB} );
}
};
# Cargo culted from comments seen elsewhere about VMS / environment
# variables. I don't know if this is actually necessary.
my $previous = $ENV{PERL5LIB};
my $teardown = sub {
if ( defined $previous ) {
$ENV{PERL5LIB} = $previous;
}
else {
delete $ENV{PERL5LIB};
}
};
# Taint mode ignores environment variables so we must retranslate
# PERL5LIB as -I switches and place PERL5OPT on the command line
# in order that it be seen.
if ( grep { $_ eq "-T" || $_ eq "-t" } @switches ) {
push @switches, $self->_libs2switches(@libs);
push @switches, split_shell( $ENV{PERL5OPT} );
}
my @command = $self->_get_command_for_switches(@switches)
or $self->_croak("No command found!");
return $factory->make_iterator(
{ command => \@command,
merge => $self->merge,
setup => $setup,
teardown => $teardown,
}
);
}
sub _get_command_for_switches {
my $self = shift;
my @switches = @_;
my ( $file, @args ) = @{ $self->source };
my $command = $self->_get_perl;
# XXX we never need to quote if we treat the parts as atoms (except maybe vms)
#$file = qq["$file"] if ( $file =~ /\s/ ) && ( $file !~ /^".*"$/ );
my @command = ( $command, @switches, $file, @args );
return @command;
}
sub _get_command {
my $self = shift;
return $self->_get_command_for_switches( $self->_switches );
}
sub _libs2switches {
my $self = shift;
return map {"-I$_"} grep {$_} @_;
}
=head3 C<shebang>
Get the shebang line for a script file.
my $shebang = TAP::Parser::Source::Perl->shebang( $some_script );
May be called as a class method
=cut
{
# Global shebang cache.
my %shebang_for;
sub _read_shebang {
my $file = shift;
local *TEST;
my $shebang;
if ( open( TEST, $file ) ) {
$shebang = <TEST>;
close(TEST) or print "Can't close $file. $!\n";
}
else {
print "Can't open $file. $!\n";
}
return $shebang;
}
sub shebang {
my ( $class, $file ) = @_;
unless ( exists $shebang_for{$file} ) {
$shebang_for{$file} = _read_shebang($file);
}
return $shebang_for{$file};
}
}
=head3 C<get_taint>
Decode any taint switches from a Perl shebang line.
# $taint will be 't'
my $taint = TAP::Parser::Source::Perl->get_taint( '#!/usr/bin/perl -t' );
# $untaint will be undefined
my $untaint = TAP::Parser::Source::Perl->get_taint( '#!/usr/bin/perl' );
=cut
sub get_taint {
my ( $class, $shebang ) = @_;
return
unless defined $shebang
&& $shebang =~ /^#!.*\bperl.*\s-\w*([Tt]+)/;
return $1;
}
sub _switches {
my $self = shift;
my ( $file, @args ) = @{ $self->source };
my @switches = (
$self->switches,
);
my $shebang = $self->shebang($file);
return unless defined $shebang;
my $taint = $self->get_taint($shebang);
push @switches, "-$taint" if defined $taint;
# Quote the argument if we're VMS, since VMS will downcase anything
# not quoted.
if (IS_VMS) {
for (@switches) {
$_ = qq["$_"];
}
}
return @switches;
}
sub _get_perl {
my $self = shift;
return $ENV{HARNESS_PERL} if defined $ENV{HARNESS_PERL};
return Win32::GetShortPathName($^X) if IS_WIN32;
return $^X;
}
1;
=head1 SUBCLASSING
Please see L<TAP::Parser/SUBCLASSING> for a subclassing overview.
=head2 Example
package MyPerlSource;
use strict;
use vars '@ISA';
use Carp qw( croak );
use TAP::Parser::Source::Perl;
@ISA = qw( TAP::Parser::Source::Perl );
sub source {
my ($self, $args) = @_;
if ($args) {
$self->{file} = $args->[0];
return $self->SUPER::source($args);
}
return $self->SUPER::source;
}
# use the version of perl from the shebang line in the test file
sub _get_perl {
my $self = shift;
if (my $shebang = $self->shebang( $self->{file} )) {
$shebang =~ /^#!(.*\bperl.*?)(?:(?:\s)|(?:$))/;
return $1 if $1;
}
return $self->SUPER::_get_perl(@_);
}
=head1 SEE ALSO
L<TAP::Object>,
L<TAP::Parser>,
L<TAP::Parser::Source>,
=cut
package TAP::Parser::YAMLish::Reader;
use strict;
use vars qw($VERSION @ISA);
use TAP::Object ();
@ISA = 'TAP::Object';
$VERSION = '3.17';
# TODO:
# Handle blessed object syntax
# Printable characters for escapes
my %UNESCAPES = (
z => "\x00", a => "\x07", t => "\x09",
n => "\x0a", v => "\x0b", f => "\x0c",
r => "\x0d", e => "\x1b", '\\' => '\\',
);
my $QQ_STRING = qr{ " (?:\\. | [^"])* " }x;
my $HASH_LINE = qr{ ^ ($QQ_STRING|\S+) \s* : \s* (?: (.+?) \s* )? $ }x;
my $IS_HASH_KEY = qr{ ^ [\w\'\"] }x;
my $IS_END_YAML = qr{ ^ \.\.\. \s* $ }x;
my $IS_QQ_STRING = qr{ ^ $QQ_STRING $ }x;
# new() implementation supplied by TAP::Object
sub read {
my $self = shift;
my $obj = shift;
die "Must have a code reference to read input from"
unless ref $obj eq 'CODE';
$self->{reader} = $obj;
$self->{capture} = [];
#Prime the reader
$self->_next;
return unless $self->{next};
my $doc = $self->_read;
# The terminator is mandatory otherwise we'd consume a line from the
# iterator that doesn't belong to us. If we want to remove this
# restriction we'll have to implement look-ahead in the iterators.
# Which might not be a bad idea.
my $dots = $self->_peek;
die "Missing '...' at end of YAMLish"
unless defined $dots
and $dots =~ $IS_END_YAML;
delete $self->{reader};
delete $self->{next};
return $doc;
}
sub get_raw { join( "\n", grep defined, @{ shift->{capture} || [] } ) . "\n" }
sub _peek {
my $self = shift;
return $self->{next} unless wantarray;
my $line = $self->{next};
$line =~ /^ (\s*) (.*) $ /x;
return ( $2, length $1 );
}
sub _next {
my $self = shift;
die "_next called with no reader"
unless $self->{reader};
my $line = $self->{reader}->();
$self->{next} = $line;
push @{ $self->{capture} }, $line;
}
sub _read {
my $self = shift;
my $line = $self->_peek;
# Do we have a document header?
if ( $line =~ /^ --- (?: \s* (.+?) \s* )? $/x ) {
$self->_next;
return $self->_read_scalar($1) if defined $1; # Inline?
my ( $next, $indent ) = $self->_peek;
if ( $next =~ /^ - /x ) {
return $self->_read_array($indent);
}
elsif ( $next =~ $IS_HASH_KEY ) {
return $self->_read_hash( $next, $indent );
}
elsif ( $next =~ $IS_END_YAML ) {
die "Premature end of YAMLish";
}
else {
die "Unsupported YAMLish syntax: '$next'";
}
}
else {
die "YAMLish document header not found";
}
}
# Parse a double quoted string
sub _read_qq {
my $self = shift;
my $str = shift;
unless ( $str =~ s/^ " (.*?) " $/$1/x ) {
die "Internal: not a quoted string";
}
$str =~ s/\\"/"/gx;
$str =~ s/ \\ ( [tartan\\favez] | x([0-9a-fA-F]{2}) )
/ (length($1) > 1) ? pack("H2", $2) : $UNESCAPES{$1} /gex;
return $str;
}
# Parse a scalar string to the actual scalar
sub _read_scalar {
my $self = shift;
my $string = shift;
return undef if $string eq '~';
return {} if $string eq '{}';
return [] if $string eq '[]';
if ( $string eq '>' || $string eq '|' ) {
my ( $line, $indent ) = $self->_peek;
die "Multi-line scalar content missing" unless defined $line;
my @multiline = ($line);
while (1) {
$self->_next;
my ( $next, $ind ) = $self->_peek;
last if $ind < $indent;
my $pad = $string eq '|' ? ( ' ' x ( $ind - $indent ) ) : '';
push @multiline, $pad . $next;
}
return join( ( $string eq '>' ? ' ' : "\n" ), @multiline ) . "\n";
}
if ( $string =~ /^ ' (.*) ' $/x ) {
( my $rv = $1 ) =~ s/''/'/g;
return $rv;
}
if ( $string =~ $IS_QQ_STRING ) {
return $self->_read_qq($string);
}
if ( $string =~ /^['"]/ ) {
# A quote with folding... we don't support that
die __PACKAGE__ . " does not support multi-line quoted scalars";
}
# Regular unquoted string
return $string;
}
sub _read_nested {
my $self = shift;
my ( $line, $indent ) = $self->_peek;
if ( $line =~ /^ -/x ) {
return $self->_read_array($indent);
}
elsif ( $line =~ $IS_HASH_KEY ) {
return $self->_read_hash( $line, $indent );
}
else {
die "Unsupported YAMLish syntax: '$line'";
}
}
# Parse an array
sub _read_array {
my ( $self, $limit ) = @_;
my $ar = [];
while (1) {
my ( $line, $indent ) = $self->_peek;
last
if $indent < $limit
|| !defined $line
|| $line =~ $IS_END_YAML;
if ( $indent > $limit ) {
die "Array line over-indented";
}
if ( $line =~ /^ (- \s+) \S+ \s* : (?: \s+ | $ ) /x ) {
$indent += length $1;
$line =~ s/-\s+//;
push @$ar, $self->_read_hash( $line, $indent );
}
elsif ( $line =~ /^ - \s* (.+?) \s* $/x ) {
die "Unexpected start of YAMLish" if $line =~ /^---/;
$self->_next;
push @$ar, $self->_read_scalar($1);
}
elsif ( $line =~ /^ - \s* $/x ) {
$self->_next;
push @$ar, $self->_read_nested;
}
elsif ( $line =~ $IS_HASH_KEY ) {
$self->_next;
push @$ar, $self->_read_hash( $line, $indent, );
}
else {
die "Unsupported YAMLish syntax: '$line'";
}
}
return $ar;
}
sub _read_hash {
my ( $self, $line, $limit ) = @_;
my $indent;
my $hash = {};
while (1) {
die "Badly formed hash line: '$line'"
unless $line =~ $HASH_LINE;
my ( $key, $value ) = ( $self->_read_scalar($1), $2 );
$self->_next;
if ( defined $value ) {
$hash->{$key} = $self->_read_scalar($value);
}
else {
$hash->{$key} = $self->_read_nested;
}
( $line, $indent ) = $self->_peek;
last
if $indent < $limit
|| !defined $line
|| $line =~ $IS_END_YAML;
}
return $hash;
}
1;
__END__
=pod
=head1 NAME
TAP::Parser::YAMLish::Reader - Read YAMLish data from iterator
=head1 VERSION
Version 3.17
=head1 SYNOPSIS
=head1 DESCRIPTION
Note that parts of this code were derived from L<YAML::Tiny> with the
permission of Adam Kennedy.
=head1 METHODS
=head2 Class Methods
=head3 C<new>
The constructor C<new> creates and returns an empty
C<TAP::Parser::YAMLish::Reader> object.
my $reader = TAP::Parser::YAMLish::Reader->new;
=head2 Instance Methods
=head3 C<read>
my $got = $reader->read($stream);
Read YAMLish from a L<TAP::Parser::Iterator> and return the data structure it
represents.
=head3 C<get_raw>
my $source = $reader->get_source;
Return the raw YAMLish source from the most recent C<read>.
=head1 AUTHOR
Andy Armstrong, <andy@hexten.net>
Adam Kennedy wrote L<YAML::Tiny> which provided the template and many of
the YAML matching regular expressions for this module.
=head1 SEE ALSO
L<YAML::Tiny>, L<YAML>, L<YAML::Syck>, L<Config::Tiny>, L<CSS::Tiny>,
L<http://use.perl.org/~Alias/journal/29427>
=head1 COPYRIGHT
Copyright 2007-2008 Andy Armstrong.
Portions copyright 2006-2008 Adam Kennedy.
This program is free software; you can redistribute
it and/or modify it under the same terms as Perl itself.
The full text of the license can be found in the
LICENSE file included with this module.
=cut
package TAP::Parser::YAMLish::Writer;
use strict;
use vars qw($VERSION @ISA);
use TAP::Object ();
@ISA = 'TAP::Object';
$VERSION = '3.17';
my $ESCAPE_CHAR = qr{ [ \x00-\x1f \" ] }x;
my $ESCAPE_KEY = qr{ (?: ^\W ) | $ESCAPE_CHAR }x;
my @UNPRINTABLE = qw(
z x01 x02 x03 x04 x05 x06 a
x08 t n v f r x0e x0f
x10 x11 x12 x13 x14 x15 x16 x17
x18 x19 x1a e x1c x1d x1e x1f
);
# new() implementation supplied by TAP::Object
sub write {
my $self = shift;
die "Need something to write"
unless @_;
my $obj = shift;
my $out = shift || \*STDOUT;
die "Need a reference to something I can write to"
unless ref $out;
$self->{writer} = $self->_make_writer($out);
$self->_write_obj( '---', $obj );
$self->_put('...');
delete $self->{writer};
}
sub _make_writer {
my $self = shift;
my $out = shift;
my $ref = ref $out;
if ( 'CODE' eq $ref ) {
return $out;
}
elsif ( 'ARRAY' eq $ref ) {
return sub { push @$out, shift };
}
elsif ( 'SCALAR' eq $ref ) {
return sub { $$out .= shift() . "\n" };
}
elsif ( 'GLOB' eq $ref || 'IO::Handle' eq $ref ) {
return sub { print $out shift(), "\n" };
}
die "Can't write to $out";
}
sub _put {
my $self = shift;
$self->{writer}->( join '', @_ );
}
sub _enc_scalar {
my $self = shift;
my $val = shift;
my $rule = shift;
return '~' unless defined $val;
if ( $val =~ /$rule/ ) {
$val =~ s/\\/\\\\/g;
$val =~ s/"/\\"/g;
$val =~ s/ ( [\x00-\x1f] ) / '\\' . $UNPRINTABLE[ ord($1) ] /gex;
return qq{"$val"};
}
if ( length($val) == 0 or $val =~ /\s/ ) {
$val =~ s/'/''/;
return "'$val'";
}
return $val;
}
sub _write_obj {
my $self = shift;
my $prefix = shift;
my $obj = shift;
my $indent = shift || 0;
if ( my $ref = ref $obj ) {
my $pad = ' ' x $indent;
if ( 'HASH' eq $ref ) {
if ( keys %$obj ) {
$self->_put($prefix);
for my $key ( sort keys %$obj ) {
my $value = $obj->{$key};
$self->_write_obj(
$pad . $self->_enc_scalar( $key, $ESCAPE_KEY ) . ':',
$value, $indent + 1
);
}
}
else {
$self->_put( $prefix, ' {}' );
}
}
elsif ( 'ARRAY' eq $ref ) {
if (@$obj) {
$self->_put($prefix);
for my $value (@$obj) {
$self->_write_obj(
$pad . '-', $value,
$indent + 1
);
}
}
else {
$self->_put( $prefix, ' []' );
}
}
else {
die "Don't know how to encode $ref";
}
}
else {
$self->_put( $prefix, ' ', $self->_enc_scalar( $obj, $ESCAPE_CHAR ) );
}
}
1;
__END__
=pod
=head1 NAME
TAP::Parser::YAMLish::Writer - Write YAMLish data
=head1 VERSION
Version 3.17
=head1 SYNOPSIS
use TAP::Parser::YAMLish::Writer;

my $data = {
one => 1,
two => 2,
three => [ 1, 2, 3 ],
};

my $yw = TAP::Parser::YAMLish::Writer->new;

# Write to an array...
$yw->write( $data, \@some_array );

# ...an open file handle...
$yw->write( $data, $some_file_handle );

# ...a string ...
$yw->write( $data, \$some_string );

# ...or a closure
$yw->write( $data, sub {
my $line = shift;
print "$line\n";
} );
=head1 DESCRIPTION
Encodes a scalar, hash reference or array reference as YAMLish.
=head1 METHODS
=head2 Class Methods
=head3 C<new>
my $writer = TAP::Parser::YAMLish::Writer->new;
The constructor C<new> creates and returns an empty
C<TAP::Parser::YAMLish::Writer> object.
=head2 Instance Methods
=head3 C<write>
$writer->write($obj, $output );
Encode a scalar, hash reference or array reference as YAML.
my $writer = sub {
my $line = shift;
print SOMEFILE "$line\n";
};

my $data = {
one => 1,
two => 2,
three => [ 1, 2, 3 ],
};

my $yw = TAP::Parser::YAMLish::Writer->new;
$yw->write( $data, $writer );
The C< $output > argument may be:
=over
=item * a reference to a scalar to append YAML to
=item * the handle of an open file
=item * a reference to an array into which YAML will be pushed
=item * a code reference
=back
If you supply a code reference the subroutine will be called once for
each line of output with the line as its only argument. Passed lines
will have no trailing newline.
=head1 AUTHOR
Andy Armstrong, <andy@hexten.net>
=head1 SEE ALSO
L<YAML::Tiny>, L<YAML>, L<YAML::Syck>, L<Config::Tiny>, L<CSS::Tiny>,
L<http://use.perl.org/~Alias/journal/29427>
=head1 COPYRIGHT
Copyright 2007-2008 Andy Armstrong.
This program is free software; you can redistribute
it and/or modify it under the same terms as Perl itself.
The full text of the license can be found in the
LICENSE file included with this module.
=cut
# Term::ANSIColor -- Color screen output using ANSI escape sequences.
#
# Copyright 1996, 1997, 1998, 2000, 2001, 2002, 2005, 2006, 2008, 2009
# Russ Allbery <rra@stanford.edu> and Zenin
# PUSH/POP support submitted 2007 by openmethods.com voice solutions
#
# This program is free software; you may redistribute it and/or modify it
# under the same terms as Perl itself.
#
# Ah, September, when the sysadmins turn colors and fall off the trees....
# -- Dave Van Domelen
##############################################################################
# Modules and declarations
##############################################################################
package Term::ANSIColor;
require 5.001;
$VERSION = '2.00';
use strict;
use vars qw($AUTOLOAD $AUTOLOCAL $AUTORESET @COLORLIST @COLORSTACK $EACHLINE
@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS $VERSION %ATTRIBUTES
%ATTRIBUTES_R);
use Exporter ();
BEGIN {
@COLORLIST = qw(CLEAR RESET BOLD DARK UNDERLINE UNDERSCORE BLINK REVERSE
CONCEALED BLACK RED GREEN YELLOW BLUE MAGENTA CYAN WHITE
ON_BLACK ON_RED ON_GREEN ON_YELLOW ON_BLUE ON_MAGENTA
ON_CYAN ON_WHITE);
@ISA = qw(Exporter);
@EXPORT = qw(color colored);
@EXPORT_OK = qw(uncolor);
%EXPORT_TAGS = (constants => \@COLORLIST,
pushpop => [ @COLORLIST,
qw(PUSHCOLOR POPCOLOR LOCALCOLOR) ]);
Exporter::export_ok_tags ('pushpop');
}
##############################################################################
# Internal data structures
##############################################################################
%ATTRIBUTES = ('clear' => 0,
'reset' => 0,
'bold' => 1,
'dark' => 2,
'faint' => 2,
'underline' => 4,
'underscore' => 4,
'blink' => 5,
'reverse' => 7,
'concealed' => 8,
'black' => 30, 'on_black' => 40,
'red' => 31, 'on_red' => 41,
'green' => 32, 'on_green' => 42,
'yellow' => 33, 'on_yellow' => 43,
'blue' => 34, 'on_blue' => 44,
'magenta' => 35, 'on_magenta' => 45,
'cyan' => 36, 'on_cyan' => 46,
'white' => 37, 'on_white' => 47);
# Reverse lookup. Alphabetically first name for a sequence is preferred.
for (reverse sort keys %ATTRIBUTES) {
$ATTRIBUTES_R{$ATTRIBUTES{$_}} = $_;
}
##############################################################################
# Implementation (constant form)
##############################################################################
# Time to have fun! We now want to define the constant subs, which are named
# the same as the attributes above but in all caps. Each constant sub needs
# to act differently depending on whether $AUTORESET is set. Without
# autoreset:
#
# BLUE "text\n" ==> "\e[34mtext\n"
#
# If $AUTORESET is set, we should instead get:
#
# BLUE "text\n" ==> "\e[34mtext\n\e[0m"
#
# The sub also needs to handle the case where it has no arguments correctly.
# Maintaining all of this as separate subs would be a major nightmare, as well
# as duplicate the %ATTRIBUTES hash, so instead we define an AUTOLOAD sub to
# define the constant subs on demand. To do that, we check the name of the
# called sub against the list of attributes, and if it's an all-caps version
# of one of them, we define the sub on the fly and then run it.
#
# If the environment variable ANSI_COLORS_DISABLED is set, just return the
# arguments without adding any escape sequences. This is to make it easier to
# write scripts that also work on systems without any ANSI support, like
# Windows consoles.
sub AUTOLOAD {
if (defined $ENV{ANSI_COLORS_DISABLED}) {
return join ('', @_);
}
my $sub;
($sub = $AUTOLOAD) =~ s/^.*:://;
my $attr = $ATTRIBUTES{lc $sub};
if ($sub =~ /^[A-Z_]+$/ && defined $attr) {
$attr = "\e[" . $attr . 'm';
eval qq {
sub $AUTOLOAD {
if (\$AUTORESET && \@_) {
return '$attr' . join ('', \@_) . "\e[0m";
} elsif (\$AUTOLOCAL && \@_) {
return PUSHCOLOR ('$attr') . join ('', \@_) . POPCOLOR;
} else {
return '$attr' . join ('', \@_);
}
}
};
goto &$AUTOLOAD;
} else {
require Carp;
Carp::croak ("undefined subroutine &$AUTOLOAD called");
}
}
# Append a new color to the top of the color stack and return the top of
# the stack.
sub PUSHCOLOR {
my ($text) = @_;
my ($color) = ($text =~ m/^((?:\e\[[\d;]+m)+)/);
if (@COLORSTACK) {
$color = $COLORSTACK[-1] . $color;
}
push (@COLORSTACK, $color);
return $text;
}
# Pop the color stack and return the new top of the stack (or reset, if
# the stack is empty).
sub POPCOLOR {
pop @COLORSTACK;
if (@COLORSTACK) {
return $COLORSTACK[-1] . join ('', @_);
} else {
return RESET (@_);
}
}
# Surround arguments with a push and a pop.
sub LOCALCOLOR {
return PUSHCOLOR (join ('', @_)) . POPCOLOR ();
}
##############################################################################
# Implementation (attribute string form)
##############################################################################
# Return the escape code for a given set of color attributes.
sub color {
return '' if defined $ENV{ANSI_COLORS_DISABLED};
my @codes = map { split } @_;
my $attribute = '';
foreach (@codes) {
$_ = lc $_;
unless (defined $ATTRIBUTES{$_}) {
require Carp;
Carp::croak ("Invalid attribute name $_");
}
$attribute .= $ATTRIBUTES{$_} . ';';
}
chop $attribute;
return ($attribute ne '') ? "\e[${attribute}m" : undef;
}
# Return a list of named color attributes for a given set of escape codes.
# Escape sequences can be given with or without enclosing "\e[" and "m". The
# empty escape sequence '' or "\e[m" gives an empty list of attrs.
sub uncolor {
my (@nums, @result);
for (@_) {
my $escape = $_;
$escape =~ s/^\e\[//;
$escape =~ s/m$//;
unless ($escape =~ /^((?:\d+;)*\d*)$/) {
require Carp;
Carp::croak ("Bad escape sequence $_");
}
push (@nums, split (/;/, $1));
}
for (@nums) {
$_ += 0; # Strip leading zeroes
my $name = $ATTRIBUTES_R{$_};
if (!defined $name) {
require Carp;
Carp::croak ("No name for escape sequence $_" );
}
push (@result, $name);
}
return @result;
}
# Given a string and a set of attributes, returns the string surrounded by
# escape codes to set those attributes and then clear them at the end of the
# string. The attributes can be given either as an array ref as the first
# argument or as a list as the second and subsequent arguments. If $EACHLINE
# is set, insert a reset before each occurrence of the string $EACHLINE and
# the starting attribute code after the string $EACHLINE, so that no attribute
# crosses line delimiters (this is often desirable if the output is to be
# piped to a pager or some other program).
sub colored {
my ($string, @codes);
if (ref $_[0]) {
@codes = @{+shift};
$string = join ('', @_);
} else {
$string = shift;
@codes = @_;
}
return $string if defined $ENV{ANSI_COLORS_DISABLED};
if (defined $EACHLINE) {
my $attr = color (@codes);
return join '',
map { ($_ ne $EACHLINE) ? $attr . $_ . "\e[0m" : $_ }
grep { length ($_) > 0 }
split (/(\Q$EACHLINE\E)/, $string);
} else {
return color (@codes) . $string . "\e[0m";
}
}
##############################################################################
# Module return value and documentation
##############################################################################
# Ensure we evaluate to true.
1;
__END__
=head1 NAME
Term::ANSIColor - Color screen output using ANSI escape sequences
=for stopwords
cyan colorize namespace runtime TMTOWTDI cmd.exe 4nt.exe command.com NT
ESC Delvare SSH OpenSSH aixterm ECMA-048 Fraktur overlining Zenin
reimplemented Allbery PUSHCOLOR POPCOLOR LOCALCOLOR openmethods.com
=head1 SYNOPSIS
use Term::ANSIColor;
print color 'bold blue';
print "This text is bold blue.\n";
print color 'reset';
print "This text is normal.\n";
print colored ("Yellow on magenta.", 'yellow on_magenta'), "\n";
print "This text is normal.\n";
print colored ['yellow on_magenta'], 'Yellow on magenta.';
print "\n";
use Term::ANSIColor qw(uncolor);
print uncolor '01;31', "\n";
use Term::ANSIColor qw(:constants);
print BOLD, BLUE, "This text is in bold blue.\n", RESET;
use Term::ANSIColor qw(:constants);
{
local $Term::ANSIColor::AUTORESET = 1;
print BOLD BLUE "This text is in bold blue.\n";
print "This text is normal.\n";
}
use Term::ANSIColor qw(:pushpop);
print PUSHCOLOR RED ON_GREEN "This text is red on green.\n";
print PUSHCOLOR BLUE "This text is blue on green.\n";
print RESET BLUE "This text is just blue.\n";
print POPCOLOR "Back to red on green.\n";
print LOCALCOLOR GREEN ON_BLUE "This text is green on blue.\n";
print "This text is red on green.\n";
{
local $Term::ANSIColor::AUTOLOCAL = 1;
print ON_BLUE "This text is red on blue.\n";
print "This text is red on green.\n";
}
print POPCOLOR "Back to whatever we started as.\n";
=head1 DESCRIPTION
This module has two interfaces, one through color() and colored() and the
other through constants. It also offers the utility function uncolor(),
which has to be explicitly imported to be used (see L</SYNOPSIS>).
color() takes any number of strings as arguments and considers them to be
space-separated lists of attributes. It then forms and returns the escape
sequence to set those attributes. It doesn't print it out, just returns
it, so you'll have to print it yourself if you want to (this is so that
you can save it as a string, pass it to something else, send it to a file
handle, or do anything else with it that you might care to).
uncolor() performs the opposite translation, turning escape sequences
into a list of strings.
The recognized non-color attributes are clear, reset, bold, dark, faint,
underline, underscore, blink, reverse, and concealed. Clear and reset
(reset to default attributes), dark and faint (dim and saturated), and
underline and underscore are equivalent, so use whichever is the most
intuitive to you. The recognized foreground color attributes are black,
red, green, yellow, blue, magenta, cyan, and white. The recognized
background color attributes are on_black, on_red, on_green, on_yellow,
on_blue, on_magenta, on_cyan, and on_white. Case is not significant.
Note that not all attributes are supported by all terminal types, and some
terminals may not support any of these sequences. Dark and faint, blink,
and concealed in particular are frequently not implemented.
Attributes, once set, last until they are unset (by sending the attribute
C<clear> or C<reset>). Be careful to do this, or otherwise your attribute
will last after your script is done running, and people get very annoyed
at having their prompt and typing changed to weird colors.
As an aid to help with this, colored() takes a scalar as the first
argument and any number of attribute strings as the second argument and
returns the scalar wrapped in escape codes so that the attributes will be
set as requested before the string and reset to normal after the string.
Alternately, you can pass a reference to an array as the first argument,
and then the contents of that array will be taken as attributes and color
codes and the remainder of the arguments as text to colorize.
Normally, colored() just puts attribute codes at the beginning and end of
the string, but if you set $Term::ANSIColor::EACHLINE to some string, that
string will be considered the line delimiter and the attribute will be set
at the beginning of each line of the passed string and reset at the end of
each line. This is often desirable if the output contains newlines and
you're using background colors, since a background color that persists
across a newline is often interpreted by the terminal as providing the
default background color for the next line. Programs like pagers can also
be confused by attributes that span lines. Normally you'll want to set
$Term::ANSIColor::EACHLINE to C<"\n"> to use this feature.
Alternately, if you import C<:constants>, you can use the constants CLEAR,
RESET, BOLD, DARK, UNDERLINE, UNDERSCORE, BLINK, REVERSE, CONCEALED,
BLACK, RED, GREEN, YELLOW, BLUE, MAGENTA, CYAN, WHITE, ON_BLACK, ON_RED,
ON_GREEN, ON_YELLOW, ON_BLUE, ON_MAGENTA, ON_CYAN, and ON_WHITE directly.
These are the same as color('attribute') and can be used if you prefer
typing:
print BOLD BLUE ON_WHITE "Text", RESET, "\n";
to
print colored ("Text", 'bold blue on_white'), "\n";
(Note that the newline is kept separate to avoid confusing the terminal as
described above since a background color is being used.)
When using the constants, if you don't want to have to remember to add the
C<, RESET> at the end of each print line, you can set
$Term::ANSIColor::AUTORESET to a true value. Then, the display mode will
automatically be reset if there is no comma after the constant. In other
words, with that variable set:
print BOLD BLUE "Text\n";
will reset the display mode afterward, whereas:
print BOLD, BLUE, "Text\n";
will not. If you are using background colors, you will probably want to
print the newline with a separate print statement to avoid confusing the
terminal.
The subroutine interface has the advantage over the constants interface in
that only two subroutines are exported into your namespace, versus
twenty-two in the constants interface. On the flip side, the constants
interface has the advantage of better compile time error checking, since
misspelled names of colors or attributes in calls to color() and colored()
won't be caught until runtime whereas misspelled names of constants will
be caught at compile time. So, pollute your namespace with almost two
dozen subroutines that you may not even use that often, or risk a silly
bug by mistyping an attribute. Your choice, TMTOWTDI after all.
As of Term::ANSIColor 2.0, you can import C<:pushpop> and maintain a stack
of colors using PUSHCOLOR, POPCOLOR, and LOCALCOLOR. PUSHCOLOR takes the
attribute string that starts its argument and pushes it onto a stack of
attributes. POPCOLOR removes the top of the stack and restores the
previous attributes set by the argument of a prior PUSHCOLOR. LOCALCOLOR
surrounds its argument in a PUSHCOLOR and POPCOLOR so that the color
resets afterward.
When using PUSHCOLOR, POPCOLOR, and LOCALCOLOR, it's particularly
important to not put commas between the constants.
print PUSHCOLOR BLUE "Text\n";
will correctly push BLUE onto the top of the stack.
print PUSHCOLOR, BLUE, "Text\n"; # wrong!
will not, and a subsequent pop won't restore the correct attributes.
PUSHCOLOR pushes the attributes set by its argument, which is normally a
string of color constants. It can't ask the terminal what the current
attributes are.
=head1 DIAGNOSTICS
=over 4
=item Bad escape sequence %s
(F) You passed an invalid ANSI escape sequence to uncolor().
=item Bareword "%s" not allowed while "strict subs" in use
(F) You probably mistyped a constant color name such as:
$Foobar = FOOBAR . "This line should be blue\n";
or:
@Foobar = FOOBAR, "This line should be blue\n";
This will only show up under use strict (another good reason to run under
use strict).
=item Invalid attribute name %s
(F) You passed an invalid attribute name to either color() or colored().
=item Name "%s" used only once: possible typo
(W) You probably mistyped a constant color name such as:
print FOOBAR "This text is color FOOBAR\n";
It's probably better to always use commas after constant names in order to
force the next error.
=item No comma allowed after filehandle
(F) You probably mistyped a constant color name such as:
print FOOBAR, "This text is color FOOBAR\n";
Generating this fatal compile error is one of the main advantages of using
the constants interface, since you'll immediately know if you mistype a
color name.
=item No name for escape sequence %s
(F) The ANSI escape sequence passed to uncolor() contains escapes which
aren't recognized and can't be translated to names.
=back
=head1 ENVIRONMENT
=over 4
=item ANSI_COLORS_DISABLED
If this environment variable is set, all of the functions defined by this
module (color(), colored(), and all of the constants not previously used
in the program) will not output any escape sequences and instead will just
return the empty string or pass through the original text as appropriate.
This is intended to support easy use of scripts using this module on
platforms that don't support ANSI escape sequences.
For it to have its proper effect, this environment variable must be set
before any color constants are used in the program.
=back
=head1 RESTRICTIONS
It would be nice if one could leave off the commas around the constants
entirely and just say:
print BOLD BLUE ON_WHITE "Text\n" RESET;
but the syntax of Perl doesn't allow this. You need a comma after the
string. (Of course, you may consider it a bug that commas between all the
constants aren't required, in which case you may feel free to insert
commas unless you're using $Term::ANSIColor::AUTORESET or
PUSHCOLOR/POPCOLOR.)
For easier debugging, you may prefer to always use the commas when not
setting $Term::ANSIColor::AUTORESET or PUSHCOLOR/POPCOLOR so that you'll
get a fatal compile error rather than a warning.
=head1 NOTES
The codes generated by this module are standard terminal control codes,
complying with ECMA-048 and ISO 6429 (generally referred to as "ANSI
color" for the color codes). The non-color control codes (bold, dark,
italic, underline, and reverse) are part of the earlier ANSI X3.64
standard for control sequences for video terminals and peripherals.
Note that not all displays are ISO 6429-compliant, or even X3.64-compliant
(or are even attempting to be so). This module will not work as expected
on displays that do not honor these escape sequences, such as cmd.exe,
4nt.exe, and command.com under either Windows NT or Windows 2000. They
may just be ignored, or they may display as an ESC character followed by
some apparent garbage.
Jean Delvare provided the following table of different common terminal
emulators and their support for the various attributes and others have
helped me flesh it out:
clear bold faint under blink reverse conceal
------------------------------------------------------------------------
xterm yes yes no yes bold yes yes
linux yes yes yes bold yes yes no
rxvt yes yes no yes bold/black yes no
dtterm yes yes yes yes reverse yes yes
teraterm yes reverse no yes rev/red yes no
aixterm kinda normal no yes no yes yes
PuTTY yes color no yes no yes no
Windows yes no no no no yes no
Cygwin SSH yes yes no color color color yes
Mac Terminal yes yes no yes yes yes yes
Windows is Windows telnet, Cygwin SSH is the OpenSSH implementation under
Cygwin on Windows NT, and Mac Terminal is the Terminal application in Mac
OS X. Where the entry is other than yes or no, that emulator displays the
given attribute as something else instead. Note that on an aixterm, clear
doesn't reset colors; you have to explicitly set the colors back to what
you want. More entries in this table are welcome.
Note that codes 3 (italic), 6 (rapid blink), and 9 (strike-through) are
specified in ANSI X3.64 and ECMA-048 but are not commonly supported by
most displays and emulators and therefore aren't supported by this module
at the present time. ECMA-048 also specifies a large number of other
attributes, including a sequence of attributes for font changes, Fraktur
characters, double-underlining, framing, circling, and overlining. As
none of these attributes are widely supported or useful, they also aren't
currently supported by this module.
=head1 SEE ALSO
ECMA-048 is available on-line (at least at the time of this writing) at
L<http://www.ecma-international.org/publications/standards/ECMA-048.HTM>.
ISO 6429 is available from ISO for a charge; the author of this module
does not own a copy of it. Since the source material for ISO 6429 was
ECMA-048 and the latter is available for free, there seems little reason
to obtain the ISO standard.
The current version of this module is always available from its web site
at L<http://www.eyrie.org/~eagle/software/ansicolor/>. It is also part of
the Perl core distribution as of 5.6.0.
=head1 AUTHORS
Original idea (using constants) by Zenin, reimplemented using subs by Russ
Allbery <rra@stanford.edu>, and then combined with the original idea by
Russ with input from Zenin. Russ Allbery now maintains this module.
=head1 COPYRIGHT AND LICENSE
Copyright 1996, 1997, 1998, 2000, 2001, 2002, 2005, 2006, 2008, 2009 Russ
Allbery <rra@stanford.edu> and Zenin. This program is free software; you
may redistribute it and/or modify it under the same terms as Perl itself.
PUSHCOLOR, POPCOLOR, and LOCALCOLOR were contributed by openmethods.com
voice solutions.
=cut
package Term::Cap;
# Since the debugger uses Term::ReadLine which uses Term::Cap, we want
# to load as few modules as possible. This includes Carp.pm.
sub carp
{
require Carp;
goto &Carp::carp;
}
sub croak
{
require Carp;
goto &Carp::croak;
}
use strict;
use vars qw($VERSION $VMS_TERMCAP);
use vars qw($termpat $state $first $entry);
$VERSION = '1.12';
# Version undef: Thu Dec 14 20:02:42 CST 1995 by sanders@bsdi.com
# Version 1.00: Thu Nov 30 23:34:29 EST 2000 by schwern@pobox.com
# [PATCH] $VERSION crusade, strict, tests, etc... all over lib/
# Version 1.01: Wed May 23 00:00:00 CST 2001 by d-lewart@uiuc.edu
# Avoid warnings in Tgetent and Tputs
# Version 1.02: Sat Nov 17 13:50:39 GMT 2001 by jns@gellyfish.com
# Altered layout of the POD
# Added Test::More to PREREQ_PM in Makefile.PL
# Fixed no argument Tgetent()
# Version 1.03: Wed Nov 28 10:09:38 GMT 2001
# VMS Support from Charles Lane <lane@DUPHY4.Physics.Drexel.Edu>
# Version 1.04: Thu Nov 29 16:22:03 GMT 2001
# Fixed warnings in test
# Version 1.05: Mon Dec 3 15:33:49 GMT 2001
# Don't try to fall back on infocmp if it's not there. From chromatic.
# Version 1.06: Thu Dec 6 18:43:22 GMT 2001
# Preload the default VMS termcap from Charles Lane
# Don't carp at setting OSPEED unless warnings are on.
# Version 1.07: Wed Jan 2 21:35:09 GMT 2002
# Sanity check on infocmp output from Norton Allen
# Repaired INSTALLDIRS thanks to Michael Schwern
# Version 1.08: Sat Sep 28 11:33:15 BST 2002
# Late loading of 'Carp' as per Michael Schwern
# Version 1.09: Tue Apr 20 12:06:51 BST 2004
# Merged in changes from and to Core
# Core (Fri Aug 30 14:15:55 CEST 2002):
# Cope with comments lines from 'infocmp' from Brendan O'Dea
# Allow for EBCDIC in Tgoto magic test.
# Version 1.10: Thu Oct 18 16:52:20 BST 2007
# Don't use try to use $ENV{HOME} if it doesn't exist
# Give Win32 'dumb' if TERM isn't set
# Provide fallback 'dumb' termcap entry as last resort
# Version 1.11: Thu Oct 25 09:33:07 BST 2007
# EBDIC fixes from Chun Bing Ge <gecb@cn.ibm.com>
# Version 1.12: Sat Dec 8 00:10:21 GMT 2007
# QNX test fix from Matt Kraai <kraai@ftbfs.org>
#
# TODO:
# support Berkeley DB termcaps
# force $FH into callers package?
# keep $FH in object at Tgetent time?
=head1 NAME
Term::Cap - Perl termcap interface
=head1 SYNOPSIS
require Term::Cap;
$terminal = Tgetent Term::Cap { TERM => undef, OSPEED => $ospeed };
$terminal->Trequire(qw/ce ku kd/);
$terminal->Tgoto('cm', $col, $row, $FH);
$terminal->Tputs('dl', $count, $FH);
$terminal->Tpad($string, $count, $FH);
=head1 DESCRIPTION
These are low-level functions to extract and use capabilities from
a terminal capability (termcap) database.
More information on the terminal capabilities will be found in the
termcap manpage on most Unix-like systems.
=head2 METHODS
=over 4
The output strings for B<Tputs> are cached for counts of 1 for performance.
B<Tgoto> and B<Tpad> do not cache. C<$self-E<gt>{_xx}> is the raw termcap
data and C<$self-E<gt>{xx}> is the cached version.
print $terminal->Tpad($self->{_xx}, 1);
B<Tgoto>, B<Tputs>, and B<Tpad> return the string and will also
output the string to $FH if specified.
=cut
# Preload the default VMS termcap.
# If a different termcap is required then the text of one can be supplied
# in $Term::Cap::VMS_TERMCAP before Tgetent is called.
if ( $^O eq 'VMS' )
{
chomp( my @entry = <DATA> );
$VMS_TERMCAP = join '', @entry;
}
# Returns a list of termcap files to check.
sub termcap_path
{ ## private
my @termcap_path;
# $TERMCAP, if it's a filespec
push( @termcap_path, $ENV{TERMCAP} )
if (
( exists $ENV{TERMCAP} )
&& (
( $^O eq 'os2' || $^O eq 'MSWin32' || $^O eq 'dos' )
? $ENV{TERMCAP} =~ /^[a-z]:[\\\/]/is
: $ENV{TERMCAP} =~ /^\//s
)
);
if ( ( exists $ENV{TERMPATH} ) && ( $ENV{TERMPATH} ) )
{
# Add the users $TERMPATH
push( @termcap_path, split( /(:|\s+)/, $ENV{TERMPATH} ) );
}
else
{
# Defaults
push( @termcap_path,
exists $ENV{'HOME'} ? $ENV{'HOME'} . '/.termcap' : undef,
'/etc/termcap', '/usr/share/misc/termcap', );
}
# return the list of those termcaps that exist
return grep { defined $_ && -f $_ } @termcap_path;
}
=item B<Tgetent>
Returns a blessed object reference which the user can
then use to send the control strings to the terminal using B<Tputs>
and B<Tgoto>.
The function extracts the entry of the specified terminal
type I<TERM> (defaults to the environment variable I<TERM>) from the
database.
It will look in the environment for a I<TERMCAP> variable. If
found, and the value does not begin with a slash, and the terminal
type name is the same as the environment string I<TERM>, the
I<TERMCAP> string is used instead of reading a termcap file. If
it does begin with a slash, the string is used as a path name of
the termcap file to search. If I<TERMCAP> does not begin with a
slash and name is different from I<TERM>, B<Tgetent> searches the
files F<$HOME/.termcap>, F</etc/termcap>, and F</usr/share/misc/termcap>,
in that order, unless the environment variable I<TERMPATH> exists,
in which case it specifies a list of file pathnames (separated by
spaces or colons) to be searched B<instead>. Whenever multiple
files are searched and a tc field occurs in the requested entry,
the entry it names must be found in the same file or one of the
succeeding files. If there is a C<:tc=...:> in the I<TERMCAP>
environment variable string it will continue the search in the
files as above.
The extracted termcap entry is available in the object
as C<$self-E<gt>{TERMCAP}>.
It takes a hash reference as an argument with two optional keys:
=over 2
=item OSPEED
The terminal output bit rate (often mistakenly called the baud rate)
for this terminal - if not set a warning will be generated
and it will be defaulted to 9600. I<OSPEED> can be be specified as
either a POSIX termios/SYSV termio speeds (where 9600 equals 9600) or
an old DSD-style speed ( where 13 equals 9600).
=item TERM
The terminal type whose termcap entry will be used - if not supplied it will
default to $ENV{TERM}: if that is not set then B<Tgetent> will croak.
=back
It calls C<croak> on failure.
=cut
sub Tgetent
{ ## public -- static method
my $class = shift;
my ($self) = @_;
$self = {} unless defined $self;
bless $self, $class;
my ( $term, $cap, $search, $field, $max, $tmp_term, $TERMCAP );
local ( $termpat, $state, $first, $entry ); # used inside eval
local $_;
# Compute PADDING factor from OSPEED (to be used by Tpad)
if ( !$self->{OSPEED} )
{
if ($^W)
{
carp "OSPEED was not set, defaulting to 9600";
}
$self->{OSPEED} = 9600;
}
if ( $self->{OSPEED} < 16 )
{
# delays for old style speeds
my @pad = (
0, 200, 133.3, 90.9, 74.3, 66.7, 50, 33.3,
16.7, 8.3, 5.5, 4.1, 2, 1, .5, .2
);
$self->{PADDING} = $pad[ $self->{OSPEED} ];
}
else
{
$self->{PADDING} = 10000 / $self->{OSPEED};
}
unless ( $self->{TERM} )
{
if ( $ENV{TERM} )
{
$self->{TERM} = $ENV{TERM} ;
}
else
{
if ( $^O eq 'Win32' )
{
$self->{TERM} = 'dumb';
}
else
{
croak "TERM not set";
}
}
}
$term = $self->{TERM}; # $term is the term type we are looking for
# $tmp_term is always the next term (possibly :tc=...:) we are looking for
$tmp_term = $self->{TERM};
# protect any pattern metacharacters in $tmp_term
$termpat = $tmp_term;
$termpat =~ s/(\W)/\\$1/g;
my $foo = ( exists $ENV{TERMCAP} ? $ENV{TERMCAP} : '' );
# $entry is the extracted termcap entry
if ( ( $foo !~ m:^/:s ) && ( $foo =~ m/(^|\|)${termpat}[:|]/s ) )
{
$entry = $foo;
}
my @termcap_path = termcap_path();
unless ( @termcap_path || $entry )
{
# last resort--fake up a termcap from terminfo
local $ENV{TERM} = $term;
if ( $^O eq 'VMS' )
{
$entry = $VMS_TERMCAP;
}
else
{
if ( grep { -x "$_/infocmp" } split /:/, $ENV{PATH} )
{
eval {
my $tmp = `infocmp -C 2>/dev/null`;
$tmp =~ s/^#.*\n//gm; # remove comments
if ( ( $tmp !~ m%^/%s )
&& ( $tmp =~ /(^|\|)${termpat}[:|]/s ) )
{
$entry = $tmp;
}
};
}
else
{
# this is getting desperate now
if ( $self->{TERM} eq 'dumb' )
{
$entry = 'dumb|80-column dumb tty::am::co#80::bl=^G:cr=^M:do=^
J:sf=^J:';
}
}
}
}
croak "Can't find a valid termcap file" unless @termcap_path || $entry;
$state = 1; # 0 == finished
# 1 == next file
# 2 == search again
$first = 0; # first entry (keeps term name)
$max = 32; # max :tc=...:'s
if ($entry)
{
# ok, we're starting with $TERMCAP
$first++; # we're the first entry
# do we need to continue?
if ( $entry =~ s/:tc=([^:]+):/:/ )
{
$tmp_term = $1;
# protect any pattern metacharacters in $tmp_term
$termpat = $tmp_term;
$termpat =~ s/(\W)/\\$1/g;
}
else
{
$state = 0; # we're already finished
}
}
# This is eval'ed inside the while loop for each file
$search = q{
while (<TERMCAP>) {
next if /^\\t/ || /^#/;
if ($_ =~ m/(^|\\|)${termpat}[:|]/o) {
chomp;
s/^[^:]*:// if $first++;
$state = 0;
while ($_ =~ s/\\\\$//) {
defined(my $x = <TERMCAP>) or last;
$_ .= $x; chomp;
}
last;
}
}
defined $entry or $entry = '';
$entry .= $_ if $_;
};
while ( $state != 0 )
{
if ( $state == 1 )
{
# get the next TERMCAP
$TERMCAP = shift @termcap_path
|| croak "failed termcap lookup on $tmp_term";
}
else
{
# do the same file again
# prevent endless recursion
$max-- || croak "failed termcap loop at $tmp_term";
$state = 1; # ok, maybe do a new file next time
}
open( TERMCAP, "< $TERMCAP\0" ) || croak "open $TERMCAP: $!";
eval $search;
die $@ if $@;
close TERMCAP;
# If :tc=...: found then search this file again
$entry =~ s/:tc=([^:]+):/:/ && ( $tmp_term = $1, $state = 2 );
# protect any pattern metacharacters in $tmp_term
$termpat = $tmp_term;
$termpat =~ s/(\W)/\\$1/g;
}
croak "Can't find $term" if $entry eq '';
$entry =~ s/:+\s*:+/:/g; # cleanup $entry
$entry =~ s/:+/:/g; # cleanup $entry
$self->{TERMCAP} = $entry; # save it
# print STDERR "DEBUG: $entry = ", $entry, "\n";
# Precompile $entry into the object
$entry =~ s/^[^:]*://;
foreach $field ( split( /:[\s:\\]*/, $entry ) )
{
if ( defined $field && $field =~ /^(\w\w)$/ )
{
$self->{ '_' . $field } = 1 unless defined $self->{ '_' . $1 };
# print STDERR "DEBUG: flag $1\n";
}
elsif ( defined $field && $field =~ /^(\w\w)\@/ )
{
$self->{ '_' . $1 } = "";
# print STDERR "DEBUG: unset $1\n";
}
elsif ( defined $field && $field =~ /^(\w\w)#(.*)/ )
{
$self->{ '_' . $1 } = $2 unless defined $self->{ '_' . $1 };
# print STDERR "DEBUG: numeric $1 = $2\n";
}
elsif ( defined $field && $field =~ /^(\w\w)=(.*)/ )
{
# print STDERR "DEBUG: string $1 = $2\n";
next if defined $self->{ '_' . ( $cap = $1 ) };
$_ = $2;
if ( ord('A') == 193 )
{
s/\\E/\047/g;
s/\\(\d\d\d)/pack('c',oct($1) & 0177)/eg;
s/\\n/\n/g;
s/\\r/\r/g;
s/\\t/\t/g;
s/\\b/\b/g;
s/\\f/\f/g;
s/\\\^/\337/g;
s/\^\?/\007/g;
s/\^(.)/pack('c',ord($1) & 31)/eg;
s/\\(.)/$1/g;
s/\337/^/g;
}
else
{
s/\\E/\033/g;
s/\\(\d\d\d)/pack('c',oct($1) & 0177)/eg;
s/\\n/\n/g;
s/\\r/\r/g;
s/\\t/\t/g;
s/\\b/\b/g;
s/\\f/\f/g;
s/\\\^/\377/g;
s/\^\?/\177/g;
s/\^(.)/pack('c',ord($1) & 31)/eg;
s/\\(.)/$1/g;
s/\377/^/g;
}
$self->{ '_' . $cap } = $_;
}
# else { carp "junk in $term ignored: $field"; }
}
$self->{'_pc'} = "\0" unless defined $self->{'_pc'};
$self->{'_bc'} = "\b" unless defined $self->{'_bc'};
$self;
}
# $terminal->Tpad($string, $cnt, $FH);
=item B<Tpad>
Outputs a literal string with appropriate padding for the current terminal.
It takes three arguments:
=over 2
=item B<$string>
The literal string to be output. If it starts with a number and an optional
'*' then the padding will be increased by an amount relative to this number,
if the '*' is present then this amount will me multiplied by $cnt. This part
of $string is removed before output/
=item B<$cnt>
Will be used to modify the padding applied to string as described above.
=item B<$FH>
An optional filehandle (or IO::Handle ) that output will be printed to.
=back
The padded $string is returned.
=cut
sub Tpad
{ ## public
my $self = shift;
my ( $string, $cnt, $FH ) = @_;
my ( $decr, $ms );
if ( defined $string && $string =~ /(^[\d.]+)(\*?)(.*)$/ )
{
$ms = $1;
$ms *= $cnt if $2;
$string = $3;
$decr = $self->{PADDING};
if ( $decr > .1 )
{
$ms += $decr / 2;
$string .= $self->{'_pc'} x ( $ms / $decr );
}
}
print $FH $string if $FH;
$string;
}
# $terminal->Tputs($cap, $cnt, $FH);
=item B<Tputs>
Output the string for the given capability padded as appropriate without
any parameter substitution.
It takes three arguments:
=over 2
=item B<$cap>
The capability whose string is to be output.
=item B<$cnt>
A count passed to Tpad to modify the padding applied to the output string.
If $cnt is zero or one then the resulting string will be cached.
=item B<$FH>
An optional filehandle (or IO::Handle ) that output will be printed to.
=back
The appropriate string for the capability will be returned.
=cut
sub Tputs
{ ## public
my $self = shift;
my ( $cap, $cnt, $FH ) = @_;
my $string;
$cnt = 0 unless $cnt;
if ( $cnt > 1 )
{
$string = Tpad( $self, $self->{ '_' . $cap }, $cnt );
}
else
{
# cache result because Tpad can be slow
unless ( exists $self->{$cap} )
{
$self->{$cap} =
exists $self->{"_$cap"}
? Tpad( $self, $self->{"_$cap"}, 1 )
: undef;
}
$string = $self->{$cap};
}
print $FH $string if $FH;
$string;
}
# $terminal->Tgoto($cap, $col, $row, $FH);
=item B<Tgoto>
B<Tgoto> decodes a cursor addressing string with the given parameters.
There are four arguments:
=over 2
=item B<$cap>
The name of the capability to be output.
=item B<$col>
The first value to be substituted in the output string ( usually the column
in a cursor addressing capability )
=item B<$row>
The second value to be substituted in the output string (usually the row
in cursor addressing capabilities)
=item B<$FH>
An optional filehandle (or IO::Handle ) to which the output string will be
printed.
=back
Substitutions are made with $col and $row in the output string with the
following sprintf() line formats:
%% output `%'
%d output value as in printf %d
%2 output value as in printf %2d
%3 output value as in printf %3d
%. output value as in printf %c
%+x add x to value, then do %.
%>xy if value > x then add y, no output
%r reverse order of two parameters, no output
%i increment by one, no output
%B BCD (16*(value/10)) + (value%10), no output
%n exclusive-or all parameters with 0140 (Datamedia 2500)
%D Reverse coding (value - 2*(value%16)), no output (Delta Data)
The output string will be returned.
=cut
sub Tgoto
{ ## public
my $self = shift;
my ( $cap, $code, $tmp, $FH ) = @_;
my $string = $self->{ '_' . $cap };
my $result = '';
my $after = '';
my $online = 0;
my @tmp = ( $tmp, $code );
my $cnt = $code;
while ( $string =~ /^([^%]*)%(.)(.*)/ )
{
$result .= $1;
$code = $2;
$string = $3;
if ( $code eq 'd' )
{
$result .= sprintf( "%d", shift(@tmp) );
}
elsif ( $code eq '.' )
{
$tmp = shift(@tmp);
if ( $tmp == 0 || $tmp == 4 || $tmp == 10 )
{
if ($online)
{
++$tmp, $after .= $self->{'_up'} if $self->{'_up'};
}
else
{
++$tmp, $after .= $self->{'_bc'};
}
}
$result .= sprintf( "%c", $tmp );
$online = !$online;
}
elsif ( $code eq '+' )
{
$result .= sprintf( "%c", shift(@tmp) + ord($string) );
$string = substr( $string, 1, 99 );
$online = !$online;
}
elsif ( $code eq 'r' )
{
( $code, $tmp ) = @tmp;
@tmp = ( $tmp, $code );
$online = !$online;
}
elsif ( $code eq '>' )
{
( $code, $tmp, $string ) = unpack( "CCa99", $string );
if ( $tmp[$[] > $code )
{
$tmp[$[] += $tmp;
}
}
elsif ( $code eq '2' )
{
$result .= sprintf( "%02d", shift(@tmp) );
$online = !$online;
}
elsif ( $code eq '3' )
{
$result .= sprintf( "%03d", shift(@tmp) );
$online = !$online;
}
elsif ( $code eq 'i' )
{
( $code, $tmp ) = @tmp;
@tmp = ( $code + 1, $tmp + 1 );
}
else
{
return "OOPS";
}
}
$string = Tpad( $self, $result . $string . $after, $cnt );
print $FH $string if $FH;
$string;
}
# $terminal->Trequire(qw/ce ku kd/);
=item B<Trequire>
Takes a list of capabilities as an argument and will croak if one is not
found.
=cut
sub Trequire
{ ## public
my $self = shift;
my ( $cap, @undefined );
foreach $cap (@_)
{
push( @undefined, $cap )
unless defined $self->{ '_' . $cap } && $self->{ '_' . $cap };
}
croak "Terminal does not support: (@undefined)" if @undefined;
}
=back
=head1 EXAMPLES
use Term::Cap;
# Get terminal output speed
require POSIX;
my $termios = new POSIX::Termios;
$termios->getattr;
my $ospeed = $termios->getospeed;
# Old-style ioctl code to get ospeed:
# require 'ioctl.pl';
# ioctl(TTY,$TIOCGETP,$sgtty);
# ($ispeed,$ospeed) = unpack('cc',$sgtty);
# allocate and initialize a terminal structure
$terminal = Tgetent Term::Cap { TERM => undef, OSPEED => $ospeed };
# require certain capabilities to be available
$terminal->Trequire(qw/ce ku kd/);
# Output Routines, if $FH is undefined these just return the string
# Tgoto does the % expansion stuff with the given args
$terminal->Tgoto('cm', $col, $row, $FH);
# Tputs doesn't do any % expansion.
$terminal->Tputs('dl', $count = 1, $FH);
=head1 COPYRIGHT AND LICENSE
Please see the README file in distribution.
=head1 AUTHOR
This module is part of the core Perl distribution and is also maintained
for CPAN by Jonathan Stowe <jns@gellyfish.com>.
=head1 SEE ALSO
termcap(5)
=cut
# Below is a default entry for systems where there are terminals but no
# termcap
1;
__DATA__
vt220|vt200|DEC VT220 in vt100 emulation mode:
am:mi:xn:xo:
co#80:li#24:
RA=\E[?7l:SA=\E[?7h:
ac=kkllmmjjnnwwqquuttvvxx:ae=\E(B:al=\E[L:as=\E(0:
bl=^G:cd=\E[J:ce=\E[K:cl=\E[H\E[2J:cm=\E[%i%d;%dH:
cr=^M:cs=\E[%i%d;%dr:dc=\E[P:dl=\E[M:do=\E[B:
ei=\E[4l:ho=\E[H:im=\E[4h:
is=\E[1;24r\E[24;1H:
nd=\E[C:
kd=\E[B::kl=\E[D:kr=\E[C:ku=\E[A:le=^H:
mb=\E[5m:md=\E[1m:me=\E[m:mr=\E[7m:
kb=\0177:
r2=\E>\E[24;1H\E[?3l\E[?4l\E[?5l\E[?7h\E[?8h\E=:rc=\E8:
sc=\E7:se=\E[27m:sf=\ED:so=\E[7m:sr=\EM:ta=^I:
ue=\E[24m:up=\E[A:us=\E[4m:ve=\E[?25h:vi=\E[?25l:
package Term::Complete;
require 5.000;
require Exporter;
use strict;
our @ISA = qw(Exporter);
our @EXPORT = qw(Complete);
our $VERSION = '1.402';
# @(#)complete.pl,v1.2 (me@anywhere.EBay.Sun.COM) 09/23/91
=head1 NAME
Term::Complete - Perl word completion module
=head1 SYNOPSIS
$input = Complete('prompt_string', \@completion_list);
$input = Complete('prompt_string', @completion_list);
=head1 DESCRIPTION
This routine provides word completion on the list of words in
the array (or array ref).
The tty driver is put into raw mode and restored using an operating
system specific command, in UNIX-like environments C<stty>.
The following command characters are defined:
=over 4
=item E<lt>tabE<gt>
Attempts word completion.
Cannot be changed.
=item ^D
Prints completion list.
Defined by I<$Term::Complete::complete>.
=item ^U
Erases the current input.
Defined by I<$Term::Complete::kill>.
=item E<lt>delE<gt>, E<lt>bsE<gt>
Erases one character.
Defined by I<$Term::Complete::erase1> and I<$Term::Complete::erase2>.
=back
=head1 DIAGNOSTICS
Bell sounds when word completion fails.
=head1 BUGS
The completion character E<lt>tabE<gt> cannot be changed.
=head1 AUTHOR
Wayne Thompson
=cut
our($complete, $kill, $erase1, $erase2, $tty_raw_noecho, $tty_restore, $stty, $t
ty_safe_restore);
our($tty_saved_state) = '';
CONFIG: {
$complete = "\004";
$kill = "\025";
$erase1 = "\177";
$erase2 = "\010";
foreach my $s (qw(/bin/stty /usr/bin/stty)) {
if (-x $s) {
$tty_raw_noecho = "$s raw -echo";
$tty_restore = "$s -raw echo";
$tty_safe_restore = $tty_restore;
$stty = $s;
last;
}
}
}
sub Complete {
my($prompt, @cmp_lst, $cmp, $test, $l, @match);
my ($return, $r) = ("", 0);
$return = "";
$r = 0;
$prompt = shift;
if (ref $_[0] || $_[0] =~ /^\*/) {
@cmp_lst = sort @{$_[0]};
}
else {
@cmp_lst = sort(@_);
}
# Attempt to save the current stty state, to be restored later
if (defined $stty && defined $tty_saved_state && $tty_saved_state eq '') {
$tty_saved_state = qx($stty -g 2>/dev/null);
if ($?) {
# stty -g not supported
$tty_saved_state = undef;
}
else {
$tty_saved_state =~ s/\s+$//g;
$tty_restore = qq($stty "$tty_saved_state" 2>/dev/null);
}
}
system $tty_raw_noecho if defined $tty_raw_noecho;
LOOP: {
local $_;
print($prompt, $return);
while (($_ = getc(STDIN)) ne "\r") {
CASE: {
# (TAB) attempt completion
$_ eq "\t" && do {
@match = grep(/^\Q$return/, @cmp_lst);
unless ($#match < 0) {
$l = length($test = shift(@match));
foreach $cmp (@match) {
until (substr($cmp, 0, $l) eq substr($test, 0, $l))
{
$l--;
}
}
print("\a");
print($test = substr($test, $r, $l - $r));
$r = length($return .= $test);
}
last CASE;
};
# (^D) completion list
$_ eq $complete && do {
print(join("\r\n", '', grep(/^\Q$return/, @cmp_lst)), "\r\n"
);
redo LOOP;
};
# (^U) kill
$_ eq $kill && do {
if ($r) {
$r = 0;
$return = "";
print("\r\n");
redo LOOP;
}
last CASE;
};
# (DEL) || (BS) erase
($_ eq $erase1 || $_ eq $erase2) && do {
if($r) {
print("\b \b");
chop($return);
$r--;
}
last CASE;
};
# printable char
ord >= 32 && do {
$return .= $_;
$r++;
print;
last CASE;
};
}
}
}
# system $tty_restore if defined $tty_restore;
if (defined $tty_saved_state && defined $tty_restore && defined $tty_safe_re
store)
{
system $tty_restore;
if ($?) {
# tty_restore caused error
system $tty_safe_restore;
}
}
print("\n");
$return;
}
1;
=head1 NAME
Term::ReadLine - Perl interface to various C<readline> packages.
If no real package is found, substitutes stubs instead of basic functions.
=head1 SYNOPSIS
use Term::ReadLine;
my $term = Term::ReadLine->new('Simple Perl calc');
my $prompt = "Enter your arithmetic expression: ";
my $OUT = $term->OUT || \*STDOUT;
while ( defined ($_ = $term->readline($prompt)) ) {
my $res = eval($_);
warn $@ if $@;
print $OUT $res, "\n" unless $@;
$term->addhistory($_) if /\S/;
}
=head1 DESCRIPTION
This package is just a front end to some other packages. It's a stub to
set up a common interface to the various ReadLine implementations found on
CPAN (under the C<Term::ReadLine::*> namespace).
=head1 Minimal set of supported functions
All the supported functions should be called as methods, i.e., either as
$term = Term::ReadLine->new('name');
or as
$term->addhistory('row');
where $term is a return value of Term::ReadLine-E<gt>new().
=over 12
=item C<ReadLine>
returns the actual package that executes the commands. Among possible
values are C<Term::ReadLine::Gnu>, C<Term::ReadLine::Perl>,
C<Term::ReadLine::Stub>.
=item C<new>
returns the handle for subsequent calls to following
functions. Argument is the name of the application. Optionally can be
followed by two arguments for C<IN> and C<OUT> filehandles. These
arguments should be globs.
=item C<readline>
gets an input line, I<possibly> with actual C<readline>
support. Trailing newline is removed. Returns C<undef> on C<EOF>.
=item C<addhistory>
adds the line to the history of input, from where it can be used if
the actual C<readline> is present.
=item C<IN>, C<OUT>
return the filehandles for input and output or C<undef> if C<readline>
input and output cannot be used for Perl.
=item C<MinLine>
If argument is specified, it is an advice on minimal size of line to
be included into history. C<undef> means do not include anything into
history. Returns the old value.
=item C<findConsole>
returns an array with two strings that give most appropriate names for
files for input and output using conventions C<"E<lt>$in">, C<"E<gt>out">.
=item Attribs
returns a reference to a hash which describes internal configuration
of the package. Names of keys in this hash conform to standard
conventions with the leading C<rl_> stripped.
=item C<Features>
Returns a reference to a hash with keys being features present in
current implementation. Several optional features are used in the
minimal interface: C<appname> should be present if the first argument
to C<new> is recognized, and C<minline> should be present if
C<MinLine> method is not dummy. C<autohistory> should be present if
lines are put into history automatically (maybe subject to
C<MinLine>), and C<addhistory> if C<addhistory> method is not dummy.
If C<Features> method reports a feature C<attribs> as present, the
method C<Attribs> is not dummy.
=back
=head1 Additional supported functions
Actually C<Term::ReadLine> can use some other package, that will
support a richer set of commands.
All these commands are callable via method interface and have names
which conform to standard conventions with the leading C<rl_> stripped.
The stub package included with the perl distribution allows some
additional methods:
=over 12
=item C<tkRunning>
makes Tk event loop run when waiting for user input (i.e., during
C<readline> method).
=item C<ornaments>
makes the command line stand out by using termcap data. The argument
to C<ornaments> should be 0, 1, or a string of a form
C<"aa,bb,cc,dd">. Four components of this string should be names of
I<terminal capacities>, first two will be issued to make the prompt
standout, last two to make the input line standout.
=item C<newTTY>
takes two arguments which are input filehandle and output filehandle.
Switches to use these filehandles.
=back
One can check whether the currently loaded ReadLine package supports
these methods by checking for corresponding C<Features>.
=head1 EXPORTS
None
=head1 ENVIRONMENT
The environment variable C<PERL_RL> governs which ReadLine clone is
loaded. If the value is false, a dummy interface is used. If the value
is true, it should be tail of the name of the package to use, such as
C<Perl> or C<Gnu>.
As a special case, if the value of this variable is space-separated,
the tail might be used to disable the ornaments by setting the tail to
be C<o=0> or C<ornaments=0>. The head should be as described above, say
If the variable is not set, or if the head of space-separated list is
empty, the best available package is loaded.
export "PERL_RL=Perl o=0" # Use Perl ReadLine without ornaments
export "PERL_RL= o=0" # Use best available ReadLine without ornaments
(Note that processing of C<PERL_RL> for ornaments is in the discretion of the
particular used C<Term::ReadLine::*> package).
=head1 CAVEATS
It seems that using Term::ReadLine from Emacs minibuffer doesn't work
quite right and one will get an error message like
Cannot open /dev/tty for read at ...
One possible workaround for this is to explicitly open /dev/tty like this
open (FH, "/dev/tty" )
or eval 'sub Term::ReadLine::findConsole { ("&STDIN", "&STDERR") }';
die $@ if $@;
close (FH);
or you can try using the 4-argument form of Term::ReadLine->new().
=cut
use strict;
package Term::ReadLine::Stub;
our @ISA = qw'Term::ReadLine::Tk Term::ReadLine::TermCap';
$DB::emacs = $DB::emacs; # To peacify -w
our @rl_term_set;
*rl_term_set = \@Term::ReadLine::TermCap::rl_term_set;
sub PERL_UNICODE_STDIN () { 0x0001 }
sub ReadLine {'Term::ReadLine::Stub'}
sub readline {
my $self = shift;
my ($in,$out,$str) = @$self;
my $prompt = shift;
print $out $rl_term_set[0], $prompt, $rl_term_set[1], $rl_term_set[2];
$self->register_Tk
if not $Term::ReadLine::registered and $Term::ReadLine::toloop
and defined &Tk::DoOneEvent;
#$str = scalar <$in>;
$str = $self->get_line;
$str =~ s/^\s*\Q$prompt\E// if ($^O eq 'MacOS');
utf8::upgrade($str)
if (${^UNICODE} & PERL_UNICODE_STDIN || defined ${^ENCODING}) &&
utf8::valid($str);
print $out $rl_term_set[3];
# bug in 5.000: chomping empty string creats length -1:
chomp $str if defined $str;
$str;
}
sub addhistory {}
sub findConsole {
my $console;
my $consoleOUT;
if ($^O eq 'MacOS') {
$console = "Dev:Console";
} elsif (-e "/dev/tty") {
$console = "/dev/tty";
} elsif (-e "con" or $^O eq 'MSWin32') {
$console = 'CONIN$';
$consoleOUT = 'CONOUT$';
} else {
$console = "sys\$command";
}
if (($^O eq 'amigaos') || ($^O eq 'beos') || ($^O eq 'epoc')) {
$console = undef;
}
elsif ($^O eq 'os2') {
if ($DB::emacs) {
$console = undef;
} else {
$console = "/dev/con";
}
}
$consoleOUT = $console unless defined $consoleOUT;
$console = "&STDIN" unless defined $console;
if (!defined $consoleOUT) {
$consoleOUT = defined fileno(STDERR) && $^O ne 'MSWin32' ? "&STDERR" : "&S
TDOUT";
}
($console,$consoleOUT);
}
sub new {
die "method new called with wrong number of arguments"
unless @_==2 or @_==4;
#local (*FIN, *FOUT);
my ($FIN, $FOUT, $ret);
if (@_==2) {
my($console, $consoleOUT) = $_[0]->findConsole;
# the Windows CONIN$ needs GENERIC_WRITE mode to allow
# a SetConsoleMode() if we end up using Term::ReadKey
open FIN, ( $^O eq 'MSWin32' && $console eq 'CONIN$' ) ? "+<$console" :
"<$console";
open FOUT,">$consoleOUT";
#OUT->autoflush(1); # Conflicts with debugger?
my $sel = select(FOUT);
$| = 1; # for DB::OUT
select($sel);
$ret = bless [\*FIN, \*FOUT];
} else { # Filehandles supplied
$FIN = $_[2]; $FOUT = $_[3];
#OUT->autoflush(1); # Conflicts with debugger?
my $sel = select($FOUT);
$| = 1; # for DB::OUT
select($sel);
$ret = bless [$FIN, $FOUT];
}
if ($ret->Features->{ornaments}
and not ($ENV{PERL_RL} and $ENV{PERL_RL} =~ /\bo\w*=0/)) {
local $Term::ReadLine::termcap_nowarn = 1;
$ret->ornaments(1);
}
return $ret;
}
sub newTTY {
my ($self, $in, $out) = @_;
$self->[0] = $in;
$self->[1] = $out;
my $sel = select($out);
$| = 1; # for DB::OUT
select($sel);
}
sub IN { shift->[0] }
sub OUT { shift->[1] }
sub MinLine { undef }
sub Attribs { {} }
my %features = (tkRunning => 1, ornaments => 1, 'newTTY' => 1);
sub Features { \%features }
sub get_line {
my $self = shift;
my $in = $self->IN;
local ($/) = "\n";
return scalar <$in>;
}
package Term::ReadLine; # So late to allow the above code be defined?
our $VERSION = '1.04';
my ($which) = exists $ENV{PERL_RL} ? split /\s+/, $ENV{PERL_RL} : undef;
if ($which) {
if ($which =~ /\bgnu\b/i){
eval "use Term::ReadLine::Gnu;";
} elsif ($which =~ /\bperl\b/i) {
eval "use Term::ReadLine::Perl;";
} else {
eval "use Term::ReadLine::$which;";
}
} elsif (defined $which and $which ne '') { # Defined but false
# Do nothing fancy
} else {
eval "use Term::ReadLine::Gnu; 1" or eval "use Term::ReadLine::Perl; 1";
}
#require FileHandle;
# To make possible switch off RL in debugger: (Not needed, work done
# in debugger).
our @ISA;
if (defined &Term::ReadLine::Gnu::readline) {
@ISA = qw(Term::ReadLine::Gnu Term::ReadLine::Stub);
} elsif (defined &Term::ReadLine::Perl::readline) {
@ISA = qw(Term::ReadLine::Perl Term::ReadLine::Stub);
} elsif (defined $which && defined &{"Term::ReadLine::$which\::readline"}) {
@ISA = "Term::ReadLine::$which";
} else {
@ISA = qw(Term::ReadLine::Stub);
}
package Term::ReadLine::TermCap;
# Prompt-start, prompt-end, command-line-start, command-line-end
# -- zero-width beautifies to emit around prompt and the command line.
our @rl_term_set = ("","","","");
# string encoded:
our $rl_term_set = ',,,';
our $terminal;
sub LoadTermCap {
return if defined $terminal;

require Term::Cap;
$terminal = Tgetent Term::Cap ({OSPEED => 9600}); # Avoid warning.
}
sub ornaments {
shift;
return $rl_term_set unless @_;
$rl_term_set = shift;
$rl_term_set ||= ',,,';
$rl_term_set = 'us,ue,md,me' if $rl_term_set eq '1';
my @ts = split /,/, $rl_term_set, 4;
eval { LoadTermCap };
unless (defined $terminal) {
warn("Cannot find termcap: $@\n") unless $Term::ReadLine::termcap_nowarn;
$rl_term_set = ',,,';
return;
}
@rl_term_set = map {$_ ? $terminal->Tputs($_,1) || '' : ''} @ts;
return $rl_term_set;
}
package Term::ReadLine::Tk;
our($count_handle, $count_DoOne, $count_loop);
$count_handle = $count_DoOne = $count_loop = 0;
our($giveup);
sub handle {$giveup = 1; $count_handle++}
sub Tk_loop {
# Tk->tkwait('variable',\$giveup); # needs Widget
$count_DoOne++, Tk::DoOneEvent(0) until $giveup;
$count_loop++;
$giveup = 0;
}
sub register_Tk {
my $self = shift;
$Term::ReadLine::registered++
or Tk->fileevent($self->IN,'readable',\&handle);
}
sub tkRunning {
$Term::ReadLine::toloop = $_[1] if @_ > 1;
$Term::ReadLine::toloop;
}
sub get_c {
my $self = shift;
$self->Tk_loop if $Term::ReadLine::toloop && defined &Tk::DoOneEvent;
return getc $self->IN;
}
sub get_line {
my $self = shift;
$self->Tk_loop if $Term::ReadLine::toloop && defined &Tk::DoOneEvent;
my $in = $self->IN;
local ($/) = "\n";
return scalar <$in>;
}
1;
package Term::UI;
use Carp;
use Params::Check qw[check allow];
use Term::ReadLine;
use Locale::Maketext::Simple Style => 'gettext';
use Term::UI::History;
use strict;
BEGIN {
use vars qw[$VERSION $AUTOREPLY $VERBOSE $INVALID];
$VERBOSE = 1;
$VERSION = '0.20';
$INVALID = loc('Invalid selection, please try again: ');
}
push @Term::ReadLine::Stub::ISA, __PACKAGE__
unless grep { $_ eq __PACKAGE__ } @Term::ReadLine::Stub::ISA;
=pod
=head1 NAME
Term::UI - Term::ReadLine UI made easy
=head1 SYNOPSIS
use Term::UI;
use Term::ReadLine;
my $term = Term::ReadLine->new('brand');
my $reply = $term->get_reply(
prompt => 'What is your favourite colour?',
choices => [qw|blue red green|],
default => blue,
);
my $bool = $term->ask_yn(
prompt => 'Do you like cookies?',
default => 'y',
);
my $string = q[some_command -option --no-foo --quux='this thing'];
my ($options,$munged_input) = $term->parse_options($string);
### don't have Term::UI issue warnings -- default is '1'
$Term::UI::VERBOSE = 0;
### always pick the default (good for non-interactive terms)
### -- default is '0'
$Term::UI::AUTOREPLY = 1;

### Retrieve the entire session as a printable string:
$hist = Term::UI::History->history_as_string;
$hist = $term->history_as_string;
=head1 DESCRIPTION
C<Term::UI> is a transparent way of eliminating the overhead of having
to format a question and then validate the reply, informing the user
if the answer was not proper and re-issuing the question.
Simply give it the question you want to ask, optionally with choices
the user can pick from and a default and C<Term::UI> will DWYM.
For asking a yes or no question, there's even a shortcut.
=head1 HOW IT WORKS
C<Term::UI> places itself at the back of the C<Term::ReadLine>
C<@ISA> array, so you can call its functions through your term object.
C<Term::UI> uses C<Term::UI::History> to record all interactions
with the commandline. You can retrieve this history, or alter
the filehandle the interaction is printed to. See the
C<Term::UI::History> manpage or the C<SYNOPSIS> for details.
=head1 METHODS
=head2 $reply = $term->get_reply( prompt => 'question?', [choices => \@list, def
ault => $list[0], multi => BOOL, print_me => "extra text to print & record", all
ow => $ref] );
C<get_reply> asks a user a question, and then returns the reply to the
caller. If the answer is invalid (more on that below), the question will
be reposed, until a satisfactory answer has been entered.
You have the option of providing a list of choices the user can pick from
using the C<choices> argument. If the answer is not in the list of choices
presented, the question will be reposed.
If you provide a C<default> answer, this will be returned when either
C<$AUTOREPLY> is set to true, (see the C<GLOBAL VARIABLES> section further
below), or when the user just hits C<enter>.
You can indicate that the user is allowed to enter multiple answers by
toggling the C<multi> flag. Note that a list of answers will then be
returned to you, rather than a simple string.
By specifying an C<allow> hander, you can yourself validate the answer
a user gives. This can be any of the types that the Params::Check C<allow>
function allows, so please refer to that manpage for details.
Finally, you have the option of adding a C<print_me> argument, which is
simply printed before the prompt. It's printed to the same file handle
as the rest of the questions, so you can use this to keep track of a
full session of Q&A with the user, and retrieve it later using the
C<< Term::UI->history_as_string >> function.
See the C<EXAMPLES> section for samples of how to use this function.
=cut
sub get_reply {
my $term = shift;
my %hash = @_;
my $tmpl = {
default => { default => undef, strict_type => 1 },
prompt => { default => '', strict_type => 1, required => 1 },
choices => { default => [], strict_type => 1 },
multi => { default => 0, allow => [0, 1] },
allow => { default => qr/.*/ },
print_me => { default => '', strict_type => 1 },
};
my $args = check( $tmpl, \%hash, $VERBOSE )
or ( carp( loc(q[Could not parse arguments]) ), return );
### add this to the prompt to indicate the default
### answer to the question if there is one.
my $prompt_add;

### if you supplied several choices to pick from,
### we'll print them seperately before the prompt
if( @{$args->{choices}} ) {
my $i;
for my $choice ( @{$args->{choices}} ) {
$i++; # the answer counter -- but humans start counting
# at 1 :D

### so this choice is the default? add it to 'prompt_add'
### so we can construct a "foo? [DIGIT]" type prompt
$prompt_add = $i if (defined $args->{default} and $choice eq $args->
{default});
### create a "DIGIT> choice" type line
$args->{print_me} .= sprintf "\n%3s> %-s", $i, $choice;
}
### we listed some choices -- add another newline for
### pretty printing
$args->{print_me} .= "\n" if $i;
### allowable answers are now equal to the choices listed
$args->{allow} = $args->{choices};
### no choices, but a default? set 'prompt_add' to the default
### to construct a 'foo? [DEFAULT]' type prompt
} elsif ( defined $args->{default} ) {
$prompt_add = $args->{default};
}
### we set up the defaults, prompts etc, dispatch to the readline call
return $term->_tt_readline( %$args, prompt_add => $prompt_add );
}
=head2 $bool = $term->ask_yn( prompt => "your question", [default => (y|1,n|0),
print_me => "extra text to print & record"] )
Asks a simple C<yes> or C<no> question to the user, returning a boolean
indicating C<true> or C<false> to the caller.
The C<default> answer will automatically returned, if the user hits
C<enter> or if C<$AUTOREPLY> is set to true. See the C<GLOBAL VARIABLES>
section further below.
Also, you have the option of adding a C<print_me> argument, which is
simply printed before the prompt. It's printed to the same file handle
as the rest of the questions, so you can use this to keep track of a
full session of Q&A with the user, and retrieve it later using the
C<< Term::UI->history_as_string >> function.
See the C<EXAMPLES> section for samples of how to use this function.
=cut
sub ask_yn {
my $term = shift;
my %hash = @_;
my $tmpl = {
default => { default => undef, allow => [qw|0 1 y n|],
strict_type => 1 },
prompt => { default => '', required => 1, strict_type => 1 },
print_me => { default => '', strict_type => 1 },

multi => { default => 0, no_override => 1 },
choices => { default => [qw|y n|], no_override => 1 },
allow => { default => [qr/^y(?:es)?$/i, qr/^n(?:o)?$/i],
no_override => 1
},
};
my $args = check( $tmpl, \%hash, $VERBOSE ) or return undef;

### uppercase the default choice, if there is one, to be added
### to the prompt in a 'foo? [Y/n]' type style.
my $prompt_add;
{ my @list = @{$args->{choices}};
if( defined $args->{default} ) {
### if you supplied the default as a boolean, rather than y/n
### transform it to a y/n now
$args->{default} = $args->{default} =~ /\d/
? { 0 => 'n', 1 => 'y' }->{ $args->{default} }
: $args->{default};

@list = map { lc $args->{default} eq lc $_
? uc $args->{default}
: $_
} @list;
}
$prompt_add .= join("/", @list);
}
my $rv = $term->_tt_readline( %$args, prompt_add => $prompt_add );

return $rv =~ /^y/i ? 1 : 0;
}
sub _tt_readline {
my $term = shift;
my %hash = @_;
local $Params::Check::VERBOSE = 0; # why is this?
local $| = 1; # print ASAP
my ($default, $prompt, $choices, $multi, $allow, $prompt_add, $print_me);
my $tmpl = {
default => { default => undef, strict_type => 1,
store => \$default },
prompt => { default => '', strict_type => 1, required => 1,
store => \$prompt },
choices => { default => [], strict_type => 1,
store => \$choices },
multi => { default => 0, allow => [0, 1], store => \$multi },
allow => { default => qr/.*/, store => \$allow, },
prompt_add => { default => '', store => \$prompt_add },
print_me => { default => '', store => \$print_me },
};
check( $tmpl, \%hash, $VERBOSE ) or return;
### prompts for Term::ReadLine can't be longer than one line, or
### it can display wonky on some terminals.
history( $print_me ) if $print_me;

### we might have to add a default value to the prompt, to
### show the user what will be picked by default:
$prompt .= " [$prompt_add]: " if $prompt_add;
### are we in autoreply mode?
if ($AUTOREPLY) {

### you used autoreply, but didnt provide a default!
carp loc(
q[You have '%1' set to true, but did not provide a default!],
'$AUTOREPLY'
) if( !defined $default && $VERBOSE);
### print it out for visual feedback
history( join ' ', grep { defined } $prompt, $default );

### and return the default
return $default;
}
### so, no AUTOREPLY, let's see what the user will answer
LOOP: {

### annoying bug in T::R::Perl that mucks up lines with a \n
### in them; So split by \n, save the last line as the prompt
### and just print the rest
{ my @lines = split "\n", $prompt;
$prompt = pop @lines;

history( "$_\n" ) for @lines;
}

### pose the question
my $answer = $term->readline($prompt);
$answer = $default unless length $answer;
$term->addhistory( $answer ) if length $answer;
### add both prompt and answer to the history
history( "$prompt $answer", 0 );
### if we're allowed to give multiple answers, split
### the answer on whitespace
my @answers = $multi ? split(/\s+/, $answer) : $answer;
### the return value list
my @rv;

if( @$choices ) {

for my $answer (@answers) {

### a digit implies a multiple choice question,
### a non-digit is an open answer
if( $answer =~ /\D/ ) {
push @rv, $answer if allow( $answer, $allow );
} else {
### remember, the answer digits are +1 compared to
### the choices, because humans want to start counting
### at 1, not at 0
push @rv, $choices->[ $answer - 1 ]
if $answer > 0 && defined $choices->[ $answer - 1];
}
}

### no fixed list of choices.. just check if the answers
### (or otherwise the default!) pass the allow handler
} else {
push @rv, grep { allow( $_, $allow ) }
scalar @answers ? @answers : ($default);
}
### if not all the answers made it to the return value list,
### at least one of them was an invalid answer -- make the
### user do it again
if( (@rv != @answers) or
(scalar(@$choices) and not scalar(@answers))
) {
$prompt = $INVALID;
$prompt .= "[$prompt_add] " if $prompt_add;
redo LOOP;
### otherwise just return the answer, or answers, depending
### on the multi setting
} else {
return $multi ? @rv : $rv[0];
}
}
}
=head2 ($opts, $munged) = $term->parse_options( STRING );
C<parse_options> will convert all options given from an input string
to a hash reference. If called in list context it will also return
the part of the input string that it found no options in.
Consider this example:
my $str = q[command --no-foo --baz --bar=0 --quux=bleh ] .
q[--option="some'thing" -one-dash -single=blah' arg];
my ($options,$munged) = $term->parse_options($str);
### $options would contain: ###
$options = {
'foo' => 0,
'bar' => 0,
'one-dash' => 1,
'baz' => 1,
'quux' => 'bleh',
'single' => 'blah\'',
'option' => 'some\'thing'
};
### and this is the munged version of the input string,
### ie what's left of the input minus the options
$munged = 'command arg';
As you can see, you can either use a single or a double C<-> to
indicate an option.
If you prefix an option with C<no-> and do not give it a value, it
will be set to 0.
If it has no prefix and no value, it will be set to 1.
Otherwise, it will be set to its value. Note also that it can deal
fine with single/double quoting issues.
=cut
sub parse_options {
my $term = shift;
my $input = shift;
my $return = {};
### there's probably a more elegant way to do this... ###
while ( $input =~ s/(?:^|\s+)--?([-\w]+=("|').+?\2)(?=\Z|\s+)// or
$input =~ s/(?:^|\s+)--?([-\w]+=\S+)(?=\Z|\s+)// or
$input =~ s/(?:^|\s+)--?([-\w]+)(?=\Z|\s+)//
) {
my $match = $1;
if( $match =~ /^([-\w]+)=("|')(.+?)\2$/ ) {
$return->{$1} = $3;
} elsif( $match =~ /^([-\w]+)=(\S+)$/ ) {
$return->{$1} = $2;
} elsif( $match =~ /^no-?([-\w]+)$/i ) {
$return->{$1} = 0;
} elsif ( $match =~ /^([-\w]+)$/ ) {
$return->{$1} = 1;
} else {
carp(loc(q[I do not understand option "%1"\n], $match)) if $VERBOSE;
}
}
return wantarray ? ($return,$input) : $return;
}
=head2 $str = $term->history_as_string
Convenience wrapper around C<< Term::UI::History->history_as_string >>.
Consult the C<Term::UI::History> man page for details.
=cut
sub history_as_string { return Term::UI::History->history_as_string };
1;
=head1 GLOBAL VARIABLES
The behaviour of Term::UI can be altered by changing the following
global variables:
=head2 $Term::UI::VERBOSE
This controls whether Term::UI will issue warnings and explanations
as to why certain things may have failed. If you set it to 0,
Term::UI will not output any warnings.
The default is 1;
=head2 $Term::UI::AUTOREPLY
This will make every question be answered by the default, and warn if
there was no default provided. This is particularly useful if your
program is run in non-interactive mode.
The default is 0;
=head2 $Term::UI::INVALID
This holds the string that will be printed when the user makes an
invalid choice.
You can override this string from your program if you, for example,
wish to do localization.
The default is C<Invalid selection, please try again: >
=head2 $Term::UI::History::HISTORY_FH
This is the filehandle all the print statements from this module
are being sent to. Please consult the C<Term::UI::History> manpage
for details.
This defaults to C<*STDOUT>.
=head1 EXAMPLES
=head2 Basic get_reply sample
### ask a user (with an open question) for their favourite colour
$reply = $term->get_reply( prompt => 'Your favourite colour? );

which would look like:
Your favourite colour?
and C<$reply> would hold the text the user typed.
=head2 get_reply with choices
### now provide a list of choices, so the user has to pick one
$reply = $term->get_reply(
prompt => 'Your favourite colour?',
choices => [qw|red green blue|] );

which would look like:
1> red
2> green
3> blue

Your favourite colour?

C<$reply> will hold one of the choices presented. C<Term::UI> will repose
the question if the user attempts to enter an answer that's not in the
list of choices. The string presented is held in the C<$Term::UI::INVALID>
variable (see the C<GLOBAL VARIABLES> section for details.
=head2 get_reply with choices and default
### provide a sensible default option -- everyone loves blue!
$reply = $term->get_reply(
prompt => 'Your favourite colour?',
choices => [qw|red green blue|],
default => 'blue' );
which would look like:
1> red
2> green
3> blue

Your favourite colour? [3]:
Note the default answer after the prompt. A user can now just hit C<enter>
(or set C<$Term::UI::AUTOREPLY> -- see the C<GLOBAL VARIABLES> section) and
the sensible answer 'blue' will be returned.
=head2 get_reply using print_me & multi
### allow the user to pick more than one colour and add an
### introduction text
@reply = $term->get_reply(
print_me => 'Tell us what colours you like',
prompt => 'Your favourite colours?',
choices => [qw|red green blue|],
multi => 1 );
which would look like:
Tell us what colours you like
1> red
2> green
3> blue

Your favourite colours?
An answer of C<3 2 1> would fill C<@reply> with C<blue green red>
=head2 get_reply & allow
### pose an open question, but do a custom verification on
### the answer, which will only exit the question loop, if
### the answer matches the allow handler.
$reply = $term->get_reply(
prompt => "What is the magic number?",
allow => 42 );

Unless the user now enters C<42>, the question will be reposed over
and over again. You can use more sophisticated C<allow> handlers (even
subroutines can be used). The C<allow> handler is implemented using
C<Params::Check>'s C<allow> function. Check its manpage for details.
=head2 an elaborate ask_yn sample
### ask a user if he likes cookies. Default to a sensible 'yes'
### and inform him first what cookies are.
$bool = $term->ask_yn( prompt => 'Do you like cookies?',
default => 'y',
print_me => 'Cookies are LOVELY!!!' );
would print:
Cookies are LOVELY!!!
Do you like cookies? [Y/n]:
If a user then simply hits C<enter>, agreeing with the default,
C<$bool> would be set to C<true>. (Simply hitting 'y' would also
return C<true>. Hitting 'n' would return C<false>)
We could later retrieve this interaction by printing out the Q&A
history as follows:
print $term->history_as_string;
which would then print:
Cookies are LOVELY!!!
Do you like cookies? [Y/n]: y
There's a chance we're doing this non-interactively, because a console
is missing, the user indicated he just wanted the defaults, etc.
In this case, simply setting C<$Term::UI::AUTOREPLY> to true, will
return from every question with the default answer set for the question.
Do note that if C<AUTOREPLY> is true, and no default is set, C<Term::UI>
will warn about this and return C<undef>.
=head1 See Also
C<Params::Check>, C<Term::ReadLine>, C<Term::UI::History>
=head1 BUG REPORTS
Please report bugs or other issues to E<lt>bug-term-ui@rt.cpan.org<gt>.
=head1 AUTHOR
This module by Jos Boumans E<lt>kane@cpan.orgE<gt>.
=head1 COPYRIGHT
This library is free software; you may redistribute and/or modify it
under the same terms as Perl itself.
=cut
package Term::UI::History;
use strict;
use base 'Exporter';
use base 'Log::Message::Simple';
=pod
=head1 NAME
Term::UI::History
=head1 SYNOPSIS
use Term::UI::History qw[history];
history("Some message");
### retrieve the history in printable form
$hist = Term::UI::History->history_as_string;
### redirect output
local $Term::UI::History::HISTORY_FH = \*STDERR;
=head1 DESCRIPTION
This module provides the C<history> function for C<Term::UI>,
printing and saving all the C<UI> interaction.
Refer to the C<Term::UI> manpage for details on usage from
C<Term::UI>.
This module subclasses C<Log::Message::Simple>. Refer to its
manpage for additional functionality available via this package.
=head1 FUNCTIONS
=head2 history("message string" [,VERBOSE])
Records a message on the stack, and prints it to C<STDOUT>
(or actually C<$HISTORY_FH>, see the C<GLOBAL VARIABLES> section
below), if the C<VERBOSE> option is true.
The C<VERBOSE> option defaults to true.
=cut
BEGIN {
use Log::Message private => 0;
use vars qw[ @EXPORT $HISTORY_FH ];
@EXPORT = qw[ history ];
my $log = new Log::Message;
$HISTORY_FH = \*STDOUT;
for my $func ( @EXPORT ) {
no strict 'refs';

*$func = sub { my $msg = shift;
$log->store(
message => $msg,
tag => uc $func,
level => $func,
extra => [@_]
);
};
}
sub history_as_string {
my $class = shift;
return join $/, map { $_->message } __PACKAGE__->stack;
}
}
{ package Log::Message::Handlers;

sub history {
my $self = shift;
my $verbose = shift;
$verbose = 1 unless defined $verbose; # default to true
### so you don't want us to print the msg? ###
return if defined $verbose && $verbose == 0;
local $| = 1;
my $old_fh = select $Term::UI::History::HISTORY_FH;
print $self->message . "\n";
select $old_fh;
return;
}
}
=head1 GLOBAL VARIABLES
=over 4
=item $HISTORY_FH
This is the filehandle all the messages sent to C<history()> are being
printed. This defaults to C<*STDOUT>.
=back
=head1 See Also
C<Log::Message::Simple>, C<Term::UI>
=head1 AUTHOR
This module by
Jos Boumans E<lt>kane@cpan.orgE<gt>.
=head1 COPYRIGHT
This module is
copyright (c) 2005 Jos Boumans E<lt>kane@cpan.orgE<gt>.
All rights reserved.
This library is free software;
you may redistribute and/or modify it under the same
terms as Perl itself.
=cut
1;
# Local variables:
# c-indentation-style: bsd
# c-basic-offset: 4
# indent-tabs-mode: nil
# End:
# vim: expandtab shiftwidth=4:
package Test::Builder;
use 5.006;
use strict;
use warnings;
our $VERSION = '0.92';
$VERSION = eval $VERSION; ## no critic (BuiltinFunctions::ProhibitStringyEval
)
BEGIN {
if( $] < 5.008 ) {
require Test::Builder::IO::Scalar;
}
}
# Make Test::Builder thread-safe for ithreads.
BEGIN {
use Config;
# Load threads::shared when threads are turned on.
# 5.8.0's threads are so busted we no longer support them.
if( $] >= 5.008001 && $Config{useithreads} && $INC{'threads.pm'} ) {
require threads::shared;
# Hack around YET ANOTHER threads::shared bug. It would
# occassionally forget the contents of the variable when sharing it.
# So we first copy the data, then share, then put our copy back.
*share = sub (\[$@%]) {
my $type = ref $_[0];
my $data;
if( $type eq 'HASH' ) {
%$data = %{ $_[0] };
}
elsif( $type eq 'ARRAY' ) {
@$data = @{ $_[0] };
}
elsif( $type eq 'SCALAR' ) {
$$data = ${ $_[0] };
}
else {
die( "Unknown type: " . $type );
}
$_[0] = &threads::shared::share( $_[0] );
if( $type eq 'HASH' ) {
%{ $_[0] } = %$data;
}
elsif( $type eq 'ARRAY' ) {
@{ $_[0] } = @$data;
}
elsif( $type eq 'SCALAR' ) {
${ $_[0] } = $$data;
}
else {
die( "Unknown type: " . $type );
}
return $_[0];
};
}
# 5.8.0's threads::shared is busted when threads are off
# and earlier Perls just don't have that module at all.
else {
*share = sub { return $_[0] };
*lock = sub { 0 };
}
}
=head1 NAME
Test::Builder - Backend for building test libraries
=head1 SYNOPSIS
package My::Test::Module;
use base 'Test::Builder::Module';
my $CLASS = __PACKAGE__;
sub ok {
my($test, $name) = @_;
my $tb = $CLASS->builder;
$tb->ok($test, $name);
}
=head1 DESCRIPTION
Test::Simple and Test::More have proven to be popular testing modules,
but they're not always flexible enough. Test::Builder provides the a
building block upon which to write your own test libraries I<which can
work together>.
=head2 Construction
=over 4
=item B<new>
my $Test = Test::Builder->new;
Returns a Test::Builder object representing the current state of the
test.
Since you only run one test per program C<new> always returns the same
Test::Builder object. No matter how many times you call C<new()>, you're
getting the same object. This is called a singleton. This is done so that
multiple modules share such global information as the test counter and
where test output is going.
If you want a completely new Test::Builder object different from the
singleton, use C<create>.
=cut
my $Test = Test::Builder->new;
sub new {
my($class) = shift;
$Test ||= $class->create;
return $Test;
}
=item B<create>
my $Test = Test::Builder->create;
Ok, so there can be more than one Test::Builder object and this is how
you get it. You might use this instead of C<new()> if you're testing
a Test::Builder based module, but otherwise you probably want C<new>.
B<NOTE>: the implementation is not complete. C<level>, for example, is
still shared amongst B<all> Test::Builder objects, even ones created using
this method. Also, the method name may change in the future.
=cut
sub create {
my $class = shift;
my $self = bless {}, $class;
$self->reset;
return $self;
}
=item B<reset>
$Test->reset;
Reinitializes the Test::Builder singleton to its original state.
Mostly useful for tests run in persistent environments where the same
test might be run multiple times in the same process.
=cut
our $Level;
sub reset { ## no critic (Subroutines::ProhibitBuiltinHomonyms)
my($self) = @_;
# We leave this a global because it has to be localized and localizing
# hash keys is just asking for pain. Also, it was documented.
$Level = 1;
$self->{Have_Plan} = 0;
$self->{No_Plan} = 0;
$self->{Have_Output_Plan} = 0;
$self->{Original_Pid} = $$;
share( $self->{Curr_Test} );
$self->{Curr_Test} = 0;
$self->{Test_Results} = &share( [] );
$self->{Exported_To} = undef;
$self->{Expected_Tests} = 0;
$self->{Skip_All} = 0;
$self->{Use_Nums} = 1;
$self->{No_Header} = 0;
$self->{No_Ending} = 0;
$self->{Todo} = undef;
$self->{Todo_Stack} = [];
$self->{Start_Todo} = 0;
$self->{Opened_Testhandles} = 0;
$self->_dup_stdhandles;
return;
}
=back
=head2 Setting up tests
These methods are for setting up tests and declaring how many there
are. You usually only want to call one of these methods.
=over 4
=item B<plan>
$Test->plan('no_plan');
$Test->plan( skip_all => $reason );
$Test->plan( tests => $num_tests );
A convenient way to set up your tests. Call this and Test::Builder
will print the appropriate headers and take the appropriate actions.
If you call C<plan()>, don't call any of the other methods below.
=cut
my %plan_cmds = (
no_plan => \&no_plan,
skip_all => \&skip_all,
tests => \&_plan_tests,
);
sub plan {
my( $self, $cmd, $arg ) = @_;
return unless $cmd;
local $Level = $Level + 1;
$self->croak("You tried to plan twice") if $self->{Have_Plan};
if( my $method = $plan_cmds{$cmd} ) {
local $Level = $Level + 1;
$self->$method($arg);
}
else {
my @args = grep { defined } ( $cmd, $arg );
$self->croak("plan() doesn't understand @args");
}
return 1;
}
sub _plan_tests {
my($self, $arg) = @_;
if($arg) {
local $Level = $Level + 1;
return $self->expected_tests($arg);
}
elsif( !defined $arg ) {
$self->croak("Got an undefined number of tests");
}
else {
$self->croak("You said to run 0 tests");
}
return;
}
=item B<expected_tests>
my $max = $Test->expected_tests;
$Test->expected_tests($max);
Gets/sets the number of tests we expect this test to run and prints out
the appropriate headers.
=cut
sub expected_tests {
my $self = shift;
my($max) = @_;
if(@_) {
$self->croak("Number of tests must be a positive integer. You gave it '
$max'")
unless $max =~ /^\+?\d+$/;
$self->{Expected_Tests} = $max;
$self->{Have_Plan} = 1;
$self->_output_plan($max) unless $self->no_header;
}
return $self->{Expected_Tests};
}
=item B<no_plan>
$Test->no_plan;
Declares that this test will run an indeterminate number of tests.
=cut
sub no_plan {
my($self, $arg) = @_;
$self->carp("no_plan takes no arguments") if $arg;
$self->{No_Plan} = 1;
$self->{Have_Plan} = 1;
return 1;
}
=begin private
=item B<_output_plan>
$tb->_output_plan($max);
$tb->_output_plan($max, $directive);
$tb->_output_plan($max, $directive => $reason);
Handles displaying the test plan.
If a C<$directive> and/or C<$reason> are given they will be output with the
plan. So here's what skipping all tests looks like:
$tb->_output_plan(0, "SKIP", "Because I said so");
It sets C<< $tb->{Have_Output_Plan} >> and will croak if the plan was already
output.
=end private
=cut
sub _output_plan {
my($self, $max, $directive, $reason) = @_;
$self->carp("The plan was already output") if $self->{Have_Output_Plan};
my $plan = "1..$max";
$plan .= " # $directive" if defined $directive;
$plan .= " $reason" if defined $reason;
$self->_print("$plan\n");
$self->{Have_Output_Plan} = 1;
return;
}
=item B<done_testing>
$Test->done_testing();
$Test->done_testing($num_tests);
Declares that you are done testing, no more tests will be run after this point.
If a plan has not yet been output, it will do so.
$num_tests is the number of tests you planned to run. If a numbered
plan was already declared, and if this contradicts, a failing test
will be run to reflect the planning mistake. If C<no_plan> was declared,
this will override.
If C<done_testing()> is called twice, the second call will issue a
failing test.
If C<$num_tests> is omitted, the number of tests run will be used, like
no_plan.
C<done_testing()> is, in effect, used when you'd want to use C<no_plan>, but
safer. You'd use it like so:
$Test->ok($a == $b);
$Test->done_testing();
Or to plan a variable number of tests:
for my $test (@tests) {
$Test->ok($test);
}
$Test->done_testing(@tests);
=cut
sub done_testing {
my($self, $num_tests) = @_;
# If done_testing() specified the number of tests, shut off no_plan.
if( defined $num_tests ) {
$self->{No_Plan} = 0;
}
else {
$num_tests = $self->current_test;
}
if( $self->{Done_Testing} ) {
my($file, $line) = @{$self->{Done_Testing}}[1,2];
$self->ok(0, "done_testing() was already called at $file line $line");
return;
}
$self->{Done_Testing} = [caller];
if( $self->expected_tests && $num_tests != $self->expected_tests ) {
$self->ok(0, "planned to run @{[ $self->expected_tests ]} ".
"but done_testing() expects $num_tests");
}
else {
$self->{Expected_Tests} = $num_tests;
}
$self->_output_plan($num_tests) unless $self->{Have_Output_Plan};
$self->{Have_Plan} = 1;
return 1;
}
=item B<has_plan>
$plan = $Test->has_plan
Find out whether a plan has been defined. C<$plan> is either C<undef> (no plan
has been set), C<no_plan> (indeterminate # of tests) or an integer (the number
of expected tests).
=cut
sub has_plan {
my $self = shift;
return( $self->{Expected_Tests} ) if $self->{Expected_Tests};
return('no_plan') if $self->{No_Plan};
return(undef);
}
=item B<skip_all>
$Test->skip_all;
$Test->skip_all($reason);
Skips all the tests, using the given C<$reason>. Exits immediately with 0.
=cut
sub skip_all {
my( $self, $reason ) = @_;
$self->{Skip_All} = 1;
$self->_output_plan(0, "SKIP", $reason) unless $self->no_header;
exit(0);
}
=item B<exported_to>
my $pack = $Test->exported_to;
$Test->exported_to($pack);
Tells Test::Builder what package you exported your functions to.
This method isn't terribly useful since modules which share the same
Test::Builder object might get exported to different packages and only
the last one will be honored.
=cut
sub exported_to {
my( $self, $pack ) = @_;
if( defined $pack ) {
$self->{Exported_To} = $pack;
}
return $self->{Exported_To};
}
=back
=head2 Running tests
These actually run the tests, analogous to the functions in Test::More.
They all return true if the test passed, false if the test failed.
C<$name> is always optional.
=over 4
=item B<ok>
$Test->ok($test, $name);
Your basic test. Pass if C<$test> is true, fail if $test is false. Just
like Test::Simple's C<ok()>.
=cut
sub ok {
my( $self, $test, $name ) = @_;
# $test might contain an object which we don't want to accidentally
# store, so we turn it into a boolean.
$test = $test ? 1 : 0;
lock $self->{Curr_Test};
$self->{Curr_Test}++;
# In case $name is a string overloaded object, force it to stringify.
$self->_unoverload_str( \$name );
$self->diag(<<"ERR") if defined $name and $name =~ /^[\d\s]+$/;
You named your test '$name'. You shouldn't use numbers for your test names.
Very confusing.
ERR
# Capture the value of $TODO for the rest of this ok() call
# so it can more easily be found by other routines.
my $todo = $self->todo();
my $in_todo = $self->in_todo;
local $self->{Todo} = $todo if $in_todo;
$self->_unoverload_str( \$todo );
my $out;
my $result = &share( {} );
unless($test) {
$out .= "not ";
@$result{ 'ok', 'actual_ok' } = ( ( $self->in_todo ? 1 : 0 ), 0 );
}
else {
@$result{ 'ok', 'actual_ok' } = ( 1, $test );
}
$out .= "ok";
$out .= " $self->{Curr_Test}" if $self->use_numbers;
if( defined $name ) {
$name =~ s|#|\\#|g; # # in a name can confuse Test::Harness.
$out .= " - $name";
$result->{name} = $name;
}
else {
$result->{name} = '';
}
if( $self->in_todo ) {
$out .= " # TODO $todo";
$result->{reason} = $todo;
$result->{type} = 'todo';
}
else {
$result->{reason} = '';
$result->{type} = '';
}
$self->{Test_Results}[ $self->{Curr_Test} - 1 ] = $result;
$out .= "\n";
$self->_print($out);
unless($test) {
my $msg = $self->in_todo ? "Failed (TODO)" : "Failed";
$self->_print_to_fh( $self->_diag_fh, "\n" ) if $ENV{HARNESS_ACTIVE};
my( undef, $file, $line ) = $self->caller;
if( defined $name ) {
$self->diag(qq[ $msg test '$name'\n]);
$self->diag(qq[ at $file line $line.\n]);
}
else {
$self->diag(qq[ $msg test at $file line $line.\n]);
}
}
return $test ? 1 : 0;
}
sub _unoverload {
my $self = shift;
my $type = shift;
$self->_try(sub { require overload; }, die_on_fail => 1);
foreach my $thing (@_) {
if( $self->_is_object($$thing) ) {
if( my $string_meth = overload::Method( $$thing, $type ) ) {
$$thing = $$thing->$string_meth();
}
}
}
return;
}
sub _is_object {
my( $self, $thing ) = @_;
return $self->_try( sub { ref $thing && $thing->isa('UNIVERSAL') } ) ? 1 : 0
;
}
sub _unoverload_str {
my $self = shift;
return $self->_unoverload( q[""], @_ );
}
sub _unoverload_num {
my $self = shift;
$self->_unoverload( '0+', @_ );
for my $val (@_) {
next unless $self->_is_dualvar($$val);
$$val = $$val + 0;
}
return;
}
# This is a hack to detect a dualvar such as $!
sub _is_dualvar {
my( $self, $val ) = @_;
# Objects are not dualvars.
return 0 if ref $val;
no warnings 'numeric';
my $numval = $val + 0;
return $numval != 0 and $numval ne $val ? 1 : 0;
}
=item B<is_eq>
$Test->is_eq($got, $expected, $name);
Like Test::More's C<is()>. Checks if C<$got eq $expected>. This is the
string version.
=item B<is_num>
$Test->is_num($got, $expected, $name);
Like Test::More's C<is()>. Checks if C<$got == $expected>. This is the
numeric version.
=cut
sub is_eq {
my( $self, $got, $expect, $name ) = @_;
local $Level = $Level + 1;
$self->_unoverload_str( \$got, \$expect );
if( !defined $got || !defined $expect ) {
# undef only matches undef and nothing else
my $test = !defined $got && !defined $expect;
$self->ok( $test, $name );
$self->_is_diag( $got, 'eq', $expect ) unless $test;
return $test;
}
return $self->cmp_ok( $got, 'eq', $expect, $name );
}
sub is_num {
my( $self, $got, $expect, $name ) = @_;
local $Level = $Level + 1;
$self->_unoverload_num( \$got, \$expect );
if( !defined $got || !defined $expect ) {
# undef only matches undef and nothing else
my $test = !defined $got && !defined $expect;
$self->ok( $test, $name );
$self->_is_diag( $got, '==', $expect ) unless $test;
return $test;
}
return $self->cmp_ok( $got, '==', $expect, $name );
}
sub _diag_fmt {
my( $self, $type, $val ) = @_;
if( defined $$val ) {
if( $type eq 'eq' or $type eq 'ne' ) {
# quote and force string context
$$val = "'$$val'";
}
else {
# force numeric context
$self->_unoverload_num($val);
}
}
else {
$$val = 'undef';
}
return;
}
sub _is_diag {
my( $self, $got, $type, $expect ) = @_;
$self->_diag_fmt( $type, $_ ) for \$got, \$expect;
local $Level = $Level + 1;
return $self->diag(<<"DIAGNOSTIC");
got: $got
expected: $expect
DIAGNOSTIC
}
sub _isnt_diag {
my( $self, $got, $type ) = @_;
$self->_diag_fmt( $type, \$got );
local $Level = $Level + 1;
return $self->diag(<<"DIAGNOSTIC");
got: $got
expected: anything else
DIAGNOSTIC
}
=item B<isnt_eq>
$Test->isnt_eq($got, $dont_expect, $name);
Like Test::More's C<isnt()>. Checks if C<$got ne $dont_expect>. This is
the string version.
=item B<isnt_num>
$Test->isnt_num($got, $dont_expect, $name);
Like Test::More's C<isnt()>. Checks if C<$got ne $dont_expect>. This is
the numeric version.
=cut
sub isnt_eq {
my( $self, $got, $dont_expect, $name ) = @_;
local $Level = $Level + 1;
if( !defined $got || !defined $dont_expect ) {
# undef only matches undef and nothing else
my $test = defined $got || defined $dont_expect;
$self->ok( $test, $name );
$self->_isnt_diag( $got, 'ne' ) unless $test;
return $test;
}
return $self->cmp_ok( $got, 'ne', $dont_expect, $name );
}
sub isnt_num {
my( $self, $got, $dont_expect, $name ) = @_;
local $Level = $Level + 1;
if( !defined $got || !defined $dont_expect ) {
# undef only matches undef and nothing else
my $test = defined $got || defined $dont_expect;
$self->ok( $test, $name );
$self->_isnt_diag( $got, '!=' ) unless $test;
return $test;
}
return $self->cmp_ok( $got, '!=', $dont_expect, $name );
}
=item B<like>
$Test->like($this, qr/$regex/, $name);
$Test->like($this, '/$regex/', $name);
Like Test::More's C<like()>. Checks if $this matches the given C<$regex>.
You'll want to avoid C<qr//> if you want your tests to work before 5.005.
=item B<unlike>
$Test->unlike($this, qr/$regex/, $name);
$Test->unlike($this, '/$regex/', $name);
Like Test::More's C<unlike()>. Checks if $this B<does not match> the
given C<$regex>.
=cut
sub like {
my( $self, $this, $regex, $name ) = @_;
local $Level = $Level + 1;
return $self->_regex_ok( $this, $regex, '=~', $name );
}
sub unlike {
my( $self, $this, $regex, $name ) = @_;
local $Level = $Level + 1;
return $self->_regex_ok( $this, $regex, '!~', $name );
}
=item B<cmp_ok>
$Test->cmp_ok($this, $type, $that, $name);
Works just like Test::More's C<cmp_ok()>.
$Test->cmp_ok($big_num, '!=', $other_big_num);
=cut
my %numeric_cmps = map { ( $_, 1 ) } ( "<", "<=", ">", ">=", "==", "!=", "<=>" )
;
sub cmp_ok {
my( $self, $got, $type, $expect, $name ) = @_;
my $test;
my $error;
{
## no critic (BuiltinFunctions::ProhibitStringyEval)
local( $@, $!, $SIG{__DIE__} ); # isolate eval
my($pack, $file, $line) = $self->caller();
$test = eval qq[
#line 1 "cmp_ok [from $file line $line]"
\$got $type \$expect;
];
$error = $@;
}
local $Level = $Level + 1;
my $ok = $self->ok( $test, $name );
# Treat overloaded objects as numbers if we're asked to do a
# numeric comparison.
my $unoverload
= $numeric_cmps{$type}
? '_unoverload_num'
: '_unoverload_str';
$self->diag(<<"END") if $error;
An error occurred while using $type:
------------------------------------
$error
------------------------------------
END
unless($ok) {
$self->$unoverload( \$got, \$expect );
if( $type =~ /^(eq|==)$/ ) {
$self->_is_diag( $got, $type, $expect );
}
elsif( $type =~ /^(ne|!=)$/ ) {
$self->_isnt_diag( $got, $type );
}
else {
$self->_cmp_diag( $got, $type, $expect );
}
}
return $ok;
}
sub _cmp_diag {
my( $self, $got, $type, $expect ) = @_;
$got = defined $got ? "'$got'" : 'undef';
$expect = defined $expect ? "'$expect'" : 'undef';
local $Level = $Level + 1;
return $self->diag(<<"DIAGNOSTIC");
$got
$type
$expect
DIAGNOSTIC
}
sub _caller_context {
my $self = shift;
my( $pack, $file, $line ) = $self->caller(1);
my $code = '';
$code .= "#line $line $file\n" if defined $file and defined $line;
return $code;
}
=back
=head2 Other Testing Methods
These are methods which are used in the course of writing a test but are not the
mselves tests.
=over 4
=item B<BAIL_OUT>
$Test->BAIL_OUT($reason);
Indicates to the Test::Harness that things are going so badly all
testing should terminate. This includes running any additional test
scripts.
It will exit with 255.
=cut
sub BAIL_OUT {
my( $self, $reason ) = @_;
$self->{Bailed_Out} = 1;
$self->_print("Bail out! $reason");
exit 255;
}
=for deprecated
BAIL_OUT() used to be BAILOUT()
=cut
*BAILOUT = \&BAIL_OUT;
=item B<skip>
$Test->skip;
$Test->skip($why);
Skips the current test, reporting C<$why>.
=cut
sub skip {
my( $self, $why ) = @_;
$why ||= '';
$self->_unoverload_str( \$why );
lock( $self->{Curr_Test} );
$self->{Curr_Test}++;
$self->{Test_Results}[ $self->{Curr_Test} - 1 ] = &share(
{
'ok' => 1,
actual_ok => 1,
name => '',
type => 'skip',
reason => $why,
}
);
my $out = "ok";
$out .= " $self->{Curr_Test}" if $self->use_numbers;
$out .= " # skip";
$out .= " $why" if length $why;
$out .= "\n";
$self->_print($out);
return 1;
}
=item B<todo_skip>
$Test->todo_skip;
$Test->todo_skip($why);
Like C<skip()>, only it will declare the test as failing and TODO. Similar
to
print "not ok $tnum # TODO $why\n";
=cut
sub todo_skip {
my( $self, $why ) = @_;
$why ||= '';
lock( $self->{Curr_Test} );
$self->{Curr_Test}++;
$self->{Test_Results}[ $self->{Curr_Test} - 1 ] = &share(
{
'ok' => 1,
actual_ok => 0,
name => '',
type => 'todo_skip',
reason => $why,
}
);
my $out = "not ok";
$out .= " $self->{Curr_Test}" if $self->use_numbers;
$out .= " # TODO & SKIP $why\n";
$self->_print($out);
return 1;
}
=begin _unimplemented
=item B<skip_rest>
$Test->skip_rest;
$Test->skip_rest($reason);
Like C<skip()>, only it skips all the rest of the tests you plan to run
and terminates the test.
If you're running under C<no_plan>, it skips once and terminates the
test.
=end _unimplemented
=back
=head2 Test building utility methods
These methods are useful when writing your own test methods.
=over 4
=item B<maybe_regex>
$Test->maybe_regex(qr/$regex/);
$Test->maybe_regex('/$regex/');
Convenience method for building testing functions that take regular
expressions as arguments, but need to work before perl 5.005.
Takes a quoted regular expression produced by C<qr//>, or a string
representing a regular expression.
Returns a Perl value which may be used instead of the corresponding
regular expression, or C<undef> if its argument is not recognised.
For example, a version of C<like()>, sans the useful diagnostic messages,
could be written as:
sub laconic_like {
my ($self, $this, $regex, $name) = @_;
my $usable_regex = $self->maybe_regex($regex);
die "expecting regex, found '$regex'\n"
unless $usable_regex;
$self->ok($this =~ m/$usable_regex/, $name);
}
=cut
sub maybe_regex {
my( $self, $regex ) = @_;
my $usable_regex = undef;
return $usable_regex unless defined $regex;
my( $re, $opts );
# Check for qr/foo/
if( _is_qr($regex) ) {
$usable_regex = $regex;
}
# Check for '/foo/' or 'm,foo,'
elsif(( $re, $opts ) = $regex =~ m{^ /(.*)/ (\w*) $ }sx
or
( undef, $re, $opts ) = $regex =~ m,^ m([^\w\s]) (.+) \1 (\w*) $,sx
)
{
$usable_regex = length $opts ? "(?$opts)$re" : $re;
}
return $usable_regex;
}
sub _is_qr {
my $regex = shift;
# is_regexp() checks for regexes in a robust manner, say if they're
# blessed.
return re::is_regexp($regex) if defined &re::is_regexp;
return ref $regex eq 'Regexp';
}
sub _regex_ok {
my( $self, $this, $regex, $cmp, $name ) = @_;
my $ok = 0;
my $usable_regex = $self->maybe_regex($regex);
unless( defined $usable_regex ) {
local $Level = $Level + 1;
$ok = $self->ok( 0, $name );
$self->diag(" '$regex' doesn't look much like a regex to me.");
return $ok;
}
{
## no critic (BuiltinFunctions::ProhibitStringyEval)
my $test;
my $code = $self->_caller_context;
local( $@, $!, $SIG{__DIE__} ); # isolate eval
# Yes, it has to look like this or 5.4.5 won't see the #line
# directive.
# Don't ask me, man, I just work here.
$test = eval "
$code" . q{$test = $this =~ /$usable_regex/ ? 1 : 0};
$test = !$test if $cmp eq '!~';
local $Level = $Level + 1;
$ok = $self->ok( $test, $name );
}
unless($ok) {
$this = defined $this ? "'$this'" : 'undef';
my $match = $cmp eq '=~' ? "doesn't match" : "matches";
local $Level = $Level + 1;
$self->diag( sprintf <<'DIAGNOSTIC', $this, $match, $regex );
%s
%13s '%s'
DIAGNOSTIC
}
return $ok;
}
# I'm not ready to publish this. It doesn't deal with array return
# values from the code or context.
=begin private
=item B<_try>
my $return_from_code = $Test->try(sub { code });
my($return_from_code, $error) = $Test->try(sub { code });
Works like eval BLOCK except it ensures it has no effect on the rest
of the test (ie. C<$@> is not set) nor is effected by outside
interference (ie. C<$SIG{__DIE__}>) and works around some quirks in older
Perls.
C<$error> is what would normally be in C<$@>.
It is suggested you use this in place of eval BLOCK.
=cut
sub _try {
my( $self, $code, %opts ) = @_;
my $error;
my $return;
{
local $!; # eval can mess up $!
local $@; # don't set $@ in the test
local $SIG{__DIE__}; # don't trip an outside DIE handler.
$return = eval { $code->() };
$error = $@;
}
die $error if $error and $opts{die_on_fail};
return wantarray ? ( $return, $error ) : $return;
}
=end private
=item B<is_fh>
my $is_fh = $Test->is_fh($thing);
Determines if the given C<$thing> can be used as a filehandle.
=cut
sub is_fh {
my $self = shift;
my $maybe_fh = shift;
return 0 unless defined $maybe_fh;
return 1 if ref $maybe_fh eq 'GLOB'; # its a glob ref
return 1 if ref \$maybe_fh eq 'GLOB'; # its a glob
return eval { $maybe_fh->isa("IO::Handle") } ||
# 5.5.4's tied() and can() doesn't like getting undef
eval { ( tied($maybe_fh) || '' )->can('TIEHANDLE') };
}
=back
=head2 Test style
=over 4
=item B<level>
$Test->level($how_high);
How far up the call stack should C<$Test> look when reporting where the
test failed.
Defaults to 1.
Setting L<$Test::Builder::Level> overrides. This is typically useful
localized:
sub my_ok {
my $test = shift;
local $Test::Builder::Level = $Test::Builder::Level + 1;
$TB->ok($test);
}
To be polite to other functions wrapping your own you usually want to increment
C<$Level> rather than set it to a constant.
=cut
sub level {
my( $self, $level ) = @_;
if( defined $level ) {
$Level = $level;
}
return $Level;
}
=item B<use_numbers>
$Test->use_numbers($on_or_off);
Whether or not the test should output numbers. That is, this if true:
ok 1
ok 2
ok 3
or this if false
ok
ok
ok
Most useful when you can't depend on the test output order, such as
when threads or forking is involved.
Defaults to on.
=cut
sub use_numbers {
my( $self, $use_nums ) = @_;
if( defined $use_nums ) {
$self->{Use_Nums} = $use_nums;
}
return $self->{Use_Nums};
}
=item B<no_diag>
$Test->no_diag($no_diag);
If set true no diagnostics will be printed. This includes calls to
C<diag()>.
=item B<no_ending>
$Test->no_ending($no_ending);
Normally, Test::Builder does some extra diagnostics when the test
ends. It also changes the exit code as described below.
If this is true, none of that will be done.
=item B<no_header>
$Test->no_header($no_header);
If set to true, no "1..N" header will be printed.
=cut
foreach my $attribute (qw(No_Header No_Ending No_Diag)) {
my $method = lc $attribute;
my $code = sub {
my( $self, $no ) = @_;
if( defined $no ) {
$self->{$attribute} = $no;
}
return $self->{$attribute};
};
no strict 'refs'; ## no critic
*{ __PACKAGE__ . '::' . $method } = $code;
}
=back
=head2 Output
Controlling where the test output goes.
It's ok for your test to change where STDOUT and STDERR point to,
Test::Builder's default output settings will not be affected.
=over 4
=item B<diag>
$Test->diag(@msgs);
Prints out the given C<@msgs>. Like C<print>, arguments are simply
appended together.
Normally, it uses the C<failure_output()> handle, but if this is for a
TODO test, the C<todo_output()> handle is used.
Output will be indented and marked with a # so as not to interfere
with test output. A newline will be put on the end if there isn't one
already.
We encourage using this rather than calling print directly.
Returns false. Why? Because C<diag()> is often used in conjunction with
a failing test (C<ok() || diag()>) it "passes through" the failure.
return ok(...) || diag(...);
=for blame transfer
Mark Fowler <mark@twoshortplanks.com>
=cut
sub diag {
my $self = shift;
$self->_print_comment( $self->_diag_fh, @_ );
}
=item B<note>
$Test->note(@msgs);
Like C<diag()>, but it prints to the C<output()> handle so it will not
normally be seen by the user except in verbose mode.
=cut
sub note {
my $self = shift;
$self->_print_comment( $self->output, @_ );
}
sub _diag_fh {
my $self = shift;
local $Level = $Level + 1;
return $self->in_todo ? $self->todo_output : $self->failure_output;
}
sub _print_comment {
my( $self, $fh, @msgs ) = @_;
return if $self->no_diag;
return unless @msgs;
# Prevent printing headers when compiling (i.e. -c)
return if $^C;
# Smash args together like print does.
# Convert undef to 'undef' so its readable.
my $msg = join '', map { defined($_) ? $_ : 'undef' } @msgs;
# Escape the beginning, _print will take care of the rest.
$msg =~ s/^/# /;
local $Level = $Level + 1;
$self->_print_to_fh( $fh, $msg );
return 0;
}
=item B<explain>
my @dump = $Test->explain(@msgs);
Will dump the contents of any references in a human readable format.
Handy for things like...
is_deeply($have, $want) || diag explain $have;
or
is_deeply($have, $want) || note explain $have;
=cut
sub explain {
my $self = shift;
return map {
ref $_
? do {
$self->_try(sub { require Data::Dumper }, die_on_fail => 1);
my $dumper = Data::Dumper->new( [$_] );
$dumper->Indent(1)->Terse(1);
$dumper->Sortkeys(1) if $dumper->can("Sortkeys");
$dumper->Dump;
}
: $_
} @_;
}
=begin _private
=item B<_print>
$Test->_print(@msgs);
Prints to the C<output()> filehandle.
=end _private
=cut
sub _print {
my $self = shift;
return $self->_print_to_fh( $self->output, @_ );
}
sub _print_to_fh {
my( $self, $fh, @msgs ) = @_;
# Prevent printing headers when only compiling. Mostly for when
# tests are deparsed with B::Deparse
return if $^C;
my $msg = join '', @msgs;
local( $\, $", $, ) = ( undef, ' ', '' );
# Escape each line after the first with a # so we don't
# confuse Test::Harness.
$msg =~ s{\n(?!\z)}{\n# }sg;
# Stick a newline on the end if it needs it.
$msg .= "\n" unless $msg =~ /\n\z/;
return print $fh $msg;
}
=item B<output>
=item B<failure_output>
=item B<todo_output>
my $filehandle = $Test->output;
$Test->output($filehandle);
$Test->output($filename);
$Test->output(\$scalar);
These methods control where Test::Builder will print its output.
They take either an open C<$filehandle>, a C<$filename> to open and write to
or a C<$scalar> reference to append to. It will always return a C<$filehandle>.
B<output> is where normal "ok/not ok" test output goes.
Defaults to STDOUT.
B<failure_output> is where diagnostic output on test failures and
C<diag()> goes. It is normally not read by Test::Harness and instead is
displayed to the user.
Defaults to STDERR.
C<todo_output> is used instead of C<failure_output()> for the
diagnostics of a failing TODO test. These will not be seen by the
user.
Defaults to STDOUT.
=cut
sub output {
my( $self, $fh ) = @_;
if( defined $fh ) {
$self->{Out_FH} = $self->_new_fh($fh);
}
return $self->{Out_FH};
}
sub failure_output {
my( $self, $fh ) = @_;
if( defined $fh ) {
$self->{Fail_FH} = $self->_new_fh($fh);
}
return $self->{Fail_FH};
}
sub todo_output {
my( $self, $fh ) = @_;
if( defined $fh ) {
$self->{Todo_FH} = $self->_new_fh($fh);
}
return $self->{Todo_FH};
}
sub _new_fh {
my $self = shift;
my($file_or_fh) = shift;
my $fh;
if( $self->is_fh($file_or_fh) ) {
$fh = $file_or_fh;
}
elsif( ref $file_or_fh eq 'SCALAR' ) {
# Scalar refs as filehandles was added in 5.8.
if( $] >= 5.008 ) {
open $fh, ">>", $file_or_fh
or $self->croak("Can't open scalar ref $file_or_fh: $!");
}
# Emulate scalar ref filehandles with a tie.
else {
$fh = Test::Builder::IO::Scalar->new($file_or_fh)
or $self->croak("Can't tie scalar ref $file_or_fh");
}
}
else {
open $fh, ">", $file_or_fh
or $self->croak("Can't open test output log $file_or_fh: $!");
_autoflush($fh);
}
return $fh;
}
sub _autoflush {
my($fh) = shift;
my $old_fh = select $fh;
$| = 1;
select $old_fh;
return;
}
my( $Testout, $Testerr );
sub _dup_stdhandles {
my $self = shift;
$self->_open_testhandles;
# Set everything to unbuffered else plain prints to STDOUT will
# come out in the wrong order from our own prints.
_autoflush($Testout);
_autoflush( \*STDOUT );
_autoflush($Testerr);
_autoflush( \*STDERR );
$self->reset_outputs;
return;
}
sub _open_testhandles {
my $self = shift;
return if $self->{Opened_Testhandles};
# We dup STDOUT and STDERR so people can change them in their
# test suites while still getting normal test output.
open( $Testout, ">&STDOUT" ) or die "Can't dup STDOUT: $!";
open( $Testerr, ">&STDERR" ) or die "Can't dup STDERR: $!";
# $self->_copy_io_layers( \*STDOUT, $Testout );
# $self->_copy_io_layers( \*STDERR, $Testerr );
$self->{Opened_Testhandles} = 1;
return;
}
sub _copy_io_layers {
my( $self, $src, $dst ) = @_;
$self->_try(
sub {
require PerlIO;
my @src_layers = PerlIO::get_layers($src);
binmode $dst, join " ", map ":$_", @src_layers if @src_layers;
}
);
return;
}
=item reset_outputs
$tb->reset_outputs;
Resets all the output filehandles back to their defaults.
=cut
sub reset_outputs {
my $self = shift;
$self->output ($Testout);
$self->failure_output($Testerr);
$self->todo_output ($Testout);
return;
}
=item carp
$tb->carp(@message);
Warns with C<@message> but the message will appear to come from the
point where the original test function was called (C<< $tb->caller >>).
=item croak
$tb->croak(@message);
Dies with C<@message> but the message will appear to come from the
point where the original test function was called (C<< $tb->caller >>).
=cut
sub _message_at_caller {
my $self = shift;
local $Level = $Level + 1;
my( $pack, $file, $line ) = $self->caller;
return join( "", @_ ) . " at $file line $line.\n";
}
sub carp {
my $self = shift;
return warn $self->_message_at_caller(@_);
}
sub croak {
my $self = shift;
return die $self->_message_at_caller(@_);
}
=back
=head2 Test Status and Info
=over 4
=item B<current_test>
my $curr_test = $Test->current_test;
$Test->current_test($num);
Gets/sets the current test number we're on. You usually shouldn't
have to set this.
If set forward, the details of the missing tests are filled in as 'unknown'.
if set backward, the details of the intervening tests are deleted. You
can erase history if you really want to.
=cut
sub current_test {
my( $self, $num ) = @_;
lock( $self->{Curr_Test} );
if( defined $num ) {
$self->{Curr_Test} = $num;
# If the test counter is being pushed forward fill in the details.
my $test_results = $self->{Test_Results};
if( $num > @$test_results ) {
my $start = @$test_results ? @$test_results : 0;
for( $start .. $num - 1 ) {
$test_results->[$_] = &share(
{
'ok' => 1,
actual_ok => undef,
reason => 'incrementing test number',
type => 'unknown',
name => undef
}
);
}
}
# If backward, wipe history. Its their funeral.
elsif( $num < @$test_results ) {
$#{$test_results} = $num - 1;
}
}
return $self->{Curr_Test};
}
=item B<summary>
my @tests = $Test->summary;
A simple summary of the tests so far. True for pass, false for fail.
This is a logical pass/fail, so todos are passes.
Of course, test #1 is $tests[0], etc...
=cut
sub summary {
my($self) = shift;
return map { $_->{'ok'} } @{ $self->{Test_Results} };
}
=item B<details>
my @tests = $Test->details;
Like C<summary()>, but with a lot more detail.
$tests[$test_num - 1] =
{ 'ok' => is the test considered a pass?
actual_ok => did it literally say 'ok'?
name => name of the test (if any)
type => type of test (if any, see below).
reason => reason for the above (if any)
};
'ok' is true if Test::Harness will consider the test to be a pass.
'actual_ok' is a reflection of whether or not the test literally
printed 'ok' or 'not ok'. This is for examining the result of 'todo'
tests.
'name' is the name of the test.
'type' indicates if it was a special test. Normal tests have a type
of ''. Type can be one of the following:
skip see skip()
todo see todo()
todo_skip see todo_skip()
unknown see below
Sometimes the Test::Builder test counter is incremented without it
printing any test output, for example, when C<current_test()> is changed.
In these cases, Test::Builder doesn't know the result of the test, so
its type is 'unknown'. These details for these tests are filled in.
They are considered ok, but the name and actual_ok is left C<undef>.
For example "not ok 23 - hole count # TODO insufficient donuts" would
result in this structure:
$tests[22] = # 23 - 1, since arrays start from 0.
{ ok => 1, # logically, the test passed since its todo
actual_ok => 0, # in absolute terms, it failed
name => 'hole count',
type => 'todo',
reason => 'insufficient donuts'
};
=cut
sub details {
my $self = shift;
return @{ $self->{Test_Results} };
}
=item B<todo>
my $todo_reason = $Test->todo;
my $todo_reason = $Test->todo($pack);
If the current tests are considered "TODO" it will return the reason,
if any. This reason can come from a C<$TODO> variable or the last call
to C<todo_start()>.
Since a TODO test does not need a reason, this function can return an
empty string even when inside a TODO block. Use C<< $Test->in_todo >>
to determine if you are currently inside a TODO block.
C<todo()> is about finding the right package to look for C<$TODO> in. It's
pretty good at guessing the right package to look at. It first looks for
the caller based on C<$Level + 1>, since C<todo()> is usually called inside
a test function. As a last resort it will use C<exported_to()>.
Sometimes there is some confusion about where todo() should be looking
for the C<$TODO> variable. If you want to be sure, tell it explicitly
what $pack to use.
=cut
sub todo {
my( $self, $pack ) = @_;
return $self->{Todo} if defined $self->{Todo};
local $Level = $Level + 1;
my $todo = $self->find_TODO($pack);
return $todo if defined $todo;
return '';
}
=item B<find_TODO>
my $todo_reason = $Test->find_TODO();
my $todo_reason = $Test->find_TODO($pack):
Like C<todo()> but only returns the value of C<$TODO> ignoring
C<todo_start()>.
=cut
sub find_TODO {
my( $self, $pack ) = @_;
$pack = $pack || $self->caller(1) || $self->exported_to;
return unless $pack;
no strict 'refs'; ## no critic
return ${ $pack . '::TODO' };
}
=item B<in_todo>
my $in_todo = $Test->in_todo;
Returns true if the test is currently inside a TODO block.
=cut
sub in_todo {
my $self = shift;
local $Level = $Level + 1;
return( defined $self->{Todo} || $self->find_TODO ) ? 1 : 0;
}
=item B<todo_start>
$Test->todo_start();
$Test->todo_start($message);
This method allows you declare all subsequent tests as TODO tests, up until
the C<todo_end> method has been called.
The C<TODO:> and C<$TODO> syntax is generally pretty good about figuring out
whether or not we're in a TODO test. However, often we find that this is not
possible to determine (such as when we want to use C<$TODO> but
the tests are being executed in other packages which can't be inferred
beforehand).
Note that you can use this to nest "todo" tests
$Test->todo_start('working on this');
# lots of code
$Test->todo_start('working on that');
# more code
$Test->todo_end;
$Test->todo_end;
This is generally not recommended, but large testing systems often have weird
internal needs.
We've tried to make this also work with the TODO: syntax, but it's not
guaranteed and its use is also discouraged:
TODO: {
local $TODO = 'We have work to do!';
$Test->todo_start('working on this');
# lots of code
$Test->todo_start('working on that');
# more code
$Test->todo_end;
$Test->todo_end;
}
Pick one style or another of "TODO" to be on the safe side.
=cut
sub todo_start {
my $self = shift;
my $message = @_ ? shift : '';
$self->{Start_Todo}++;
if( $self->in_todo ) {
push @{ $self->{Todo_Stack} } => $self->todo;
}
$self->{Todo} = $message;
return;
}
=item C<todo_end>
$Test->todo_end;
Stops running tests as "TODO" tests. This method is fatal if called without a
preceding C<todo_start> method call.
=cut
sub todo_end {
my $self = shift;
if( !$self->{Start_Todo} ) {
$self->croak('todo_end() called without todo_start()');
}
$self->{Start_Todo}--;
if( $self->{Start_Todo} && @{ $self->{Todo_Stack} } ) {
$self->{Todo} = pop @{ $self->{Todo_Stack} };
}
else {
delete $self->{Todo};
}
return;
}
=item B<caller>
my $package = $Test->caller;
my($pack, $file, $line) = $Test->caller;
my($pack, $file, $line) = $Test->caller($height);
Like the normal C<caller()>, except it reports according to your C<level()>.
C<$height> will be added to the C<level()>.
If C<caller()> winds up off the top of the stack it report the highest context.
=cut
sub caller { ## no critic (Subroutines::ProhibitBuiltinHomonyms)
my( $self, $height ) = @_;
$height ||= 0;
my $level = $self->level + $height + 1;
my @caller;
do {
@caller = CORE::caller( $level );
$level--;
} until @caller;
return wantarray ? @caller : $caller[0];
}
=back
=cut
=begin _private
=over 4
=item B<_sanity_check>
$self->_sanity_check();
Runs a bunch of end of test sanity checks to make sure reality came
through ok. If anything is wrong it will die with a fairly friendly
error message.
=cut
#'#
sub _sanity_check {
my $self = shift;
$self->_whoa( $self->{Curr_Test} < 0, 'Says here you ran a negative number o
f tests!' );
$self->_whoa( $self->{Curr_Test} != @{ $self->{Test_Results} },
'Somehow you got a different number of results than tests ran!' );
return;
}
=item B<_whoa>
$self->_whoa($check, $description);
A sanity check, similar to C<assert()>. If the C<$check> is true, something
has gone horribly wrong. It will die with the given C<$description> and
a note to contact the author.
=cut
sub _whoa {
my( $self, $check, $desc ) = @_;
if($check) {
local $Level = $Level + 1;
$self->croak(<<"WHOA");
WHOA! $desc
This should never happen! Please contact the author immediately!
WHOA
}
return;
}
=item B<_my_exit>
_my_exit($exit_num);
Perl seems to have some trouble with exiting inside an C<END> block. 5.005_03
and 5.6.1 both seem to do odd things. Instead, this function edits C<$?>
directly. It should B<only> be called from inside an C<END> block. It
doesn't actually exit, that's your job.
=cut
sub _my_exit {
$? = $_[0]; ## no critic (Variables::RequireLocalizedPunctuationVars)
return 1;
}
=back
=end _private
=cut
sub _ending {
my $self = shift;
my $real_exit_code = $?;
# Don't bother with an ending if this is a forked copy. Only the parent
# should do the ending.
if( $self->{Original_Pid} != $$ ) {
return;
}
# Ran tests but never declared a plan or hit done_testing
if( !$self->{Have_Plan} and $self->{Curr_Test} ) {
$self->diag("Tests were run but no plan was declared and done_testing()
was not seen.");
}
# Exit if plan() was never called. This is so "require Test::Simple"
# doesn't puke.
if( !$self->{Have_Plan} ) {
return;
}
# Don't do an ending if we bailed out.
if( $self->{Bailed_Out} ) {
return;
}
# Figure out if we passed or failed and print helpful messages.
my $test_results = $self->{Test_Results};
if(@$test_results) {
# The plan? We have no plan.
if( $self->{No_Plan} ) {
$self->_output_plan($self->{Curr_Test}) unless $self->no_header;
$self->{Expected_Tests} = $self->{Curr_Test};
}
# Auto-extended arrays and elements which aren't explicitly
# filled in with a shared reference will puke under 5.8.0
# ithreads. So we have to fill them in by hand. :(
my $empty_result = &share( {} );
for my $idx ( 0 .. $self->{Expected_Tests} - 1 ) {
$test_results->[$idx] = $empty_result
unless defined $test_results->[$idx];
}
my $num_failed = grep !$_->{'ok'}, @{$test_results}[ 0 .. $self->{Curr_T
est} - 1 ];
my $num_extra = $self->{Curr_Test} - $self->{Expected_Tests};
if( $num_extra != 0 ) {
my $s = $self->{Expected_Tests} == 1 ? '' : 's';
$self->diag(<<"FAIL");
Looks like you planned $self->{Expected_Tests} test$s but ran $self->{Curr_Test}
.
FAIL
}
if($num_failed) {
my $num_tests = $self->{Curr_Test};
my $s = $num_failed == 1 ? '' : 's';
my $qualifier = $num_extra == 0 ? '' : ' run';
$self->diag(<<"FAIL");
Looks like you failed $num_failed test$s of $num_tests$qualifier.
FAIL
}
if($real_exit_code) {
$self->diag(<<"FAIL");
Looks like your test exited with $real_exit_code just after $self->{Curr_Test}.
FAIL
_my_exit($real_exit_code) && return;
}
my $exit_code;
if($num_failed) {
$exit_code = $num_failed <= 254 ? $num_failed : 254;
}
elsif( $num_extra != 0 ) {
$exit_code = 255;
}
else {
$exit_code = 0;
}
_my_exit($exit_code) && return;
}
elsif( $self->{Skip_All} ) {
_my_exit(0) && return;
}
elsif($real_exit_code) {
$self->diag(<<"FAIL");
Looks like your test exited with $real_exit_code before it could output anything
.
FAIL
_my_exit($real_exit_code) && return;
}
else {
$self->diag("No tests run!\n");
_my_exit(255) && return;
}
$self->_whoa( 1, "We fell off the end of _ending()" );
}
END {
$Test->_ending if defined $Test and !$Test->no_ending;
}
=head1 EXIT CODES
If all your tests passed, Test::Builder will exit with zero (which is
normal). If anything failed it will exit with how many failed. If
you run less (or more) tests than you planned, the missing (or extras)
will be considered failures. If no tests were ever run Test::Builder
will throw a warning and exit with 255. If the test died, even after
having successfully completed all its tests, it will still be
considered a failure and will exit with 255.
So the exit codes are...
0 all tests successful
255 test died or all passed but wrong # of tests run
any other number how many failed (including missing or extras)
If you fail more than 254 tests, it will be reported as 254.
=head1 THREADS
In perl 5.8.1 and later, Test::Builder is thread-safe. The test
number is shared amongst all threads. This means if one thread sets
the test number using C<current_test()> they will all be effected.
While versions earlier than 5.8.1 had threads they contain too many
bugs to support.
Test::Builder is only thread-aware if threads.pm is loaded I<before>
Test::Builder.
=head1 MEMORY
An informative hash, accessable via C<<details()>>, is stored for each
test you perform. So memory usage will scale linearly with each test
run. Although this is not a problem for most test suites, it can
become an issue if you do large (hundred thousands to million)
combinatorics tests in the same run.
In such cases, you are advised to either split the test file into smaller
ones, or use a reverse approach, doing "normal" (code) compares and
triggering fail() should anything go unexpected.
Future versions of Test::Builder will have a way to turn history off.
=head1 EXAMPLES
CPAN can provide the best examples. Test::Simple, Test::More,
Test::Exception and Test::Differences all use Test::Builder.
=head1 SEE ALSO
Test::Simple, Test::More, Test::Harness
=head1 AUTHORS
Original code by chromatic, maintained by Michael G Schwern
E<lt>schwern@pobox.comE<gt>
=head1 COPYRIGHT
Copyright 2002-2008 by chromatic E<lt>chromatic@wgz.orgE<gt> and
Michael G Schwern E<lt>schwern@pobox.comE<gt>.
This program is free software; you can redistribute it and/or
modify it under the same terms as Perl itself.
See F<http://www.perl.com/perl/misc/Artistic.html>
=cut
1;
package Test::Harness;
require 5.00405;
use strict;
use constant IS_WIN32 => ( $^O =~ /^(MS)?Win32$/ );
use constant IS_VMS => ( $^O eq 'VMS' );
use TAP::Harness ();
use TAP::Parser::Aggregator ();
use TAP::Parser::Source::Perl ();
use TAP::Parser::Utils qw( split_shell );
use Config;
use Exporter;
# TODO: Emulate at least some of these
use vars qw(
$VERSION
@ISA @EXPORT @EXPORT_OK
$Verbose $Switches $Debug
$verbose $switches $debug
$Columns
$Color
$Directives
$Timer
$Strap
$has_time_hires
$IgnoreExit
);
# $ML $Last_ML_Print
BEGIN {
eval q{use Time::HiRes 'time'};
$has_time_hires = !$@;
}
=head1 NAME
Test::Harness - Run Perl standard test scripts with statistics
=head1 VERSION
Version 3.17
=cut
$VERSION = '3.17';
# Backwards compatibility for exportable variable names.
*verbose = *Verbose;
*switches = *Switches;
*debug = *Debug;
$ENV{HARNESS_ACTIVE} = 1;
$ENV{HARNESS_VERSION} = $VERSION;
END {
# For VMS.
delete $ENV{HARNESS_ACTIVE};
delete $ENV{HARNESS_VERSION};
}
@ISA = ('Exporter');
@EXPORT = qw(&runtests);
@EXPORT_OK = qw(&execute_tests $verbose $switches);
$Verbose = $ENV{HARNESS_VERBOSE} || 0;
$Debug = $ENV{HARNESS_DEBUG} || 0;
$Switches = '-w';
$Columns = $ENV{HARNESS_COLUMNS} || $ENV{COLUMNS} || 80;
$Columns--; # Some shells have trouble with a full line of text.
$Timer = $ENV{HARNESS_TIMER} || 0;
$Color = $ENV{HARNESS_COLOR} || 0;
$IgnoreExit = $ENV{HARNESS_IGNORE_EXIT} || 0;
=head1 SYNOPSIS
use Test::Harness;
runtests(@test_files);
=head1 DESCRIPTION
Although, for historical reasons, the L<Test::Harness> distribution
takes its name from this module it now exists only to provide
L<TAP::Harness> with an interface that is somewhat backwards compatible
with L<Test::Harness> 2.xx. If you're writing new code consider using
L<TAP::Harness> directly instead.
Emulation is provided for C<runtests> and C<execute_tests> but the
pluggable 'Straps' interface that previous versions of L<Test::Harness>
supported is not reproduced here. Straps is now available as a stand
alone module: L<Test::Harness::Straps>.
See L<TAP::Parser>, L<TAP::Harness> for the main documentation for this
distribution.
=head1 FUNCTIONS
The following functions are available.
=head2 runtests( @test_files )
This runs all the given I<@test_files> and divines whether they passed
or failed based on their output to STDOUT (details above). It prints
out each individual test which failed along with a summary report and
a how long it all took.
It returns true if everything was ok. Otherwise it will C<die()> with
one of the messages in the DIAGNOSTICS section.
=cut
sub _has_taint {
my $test = shift;
return TAP::Parser::Source::Perl->get_taint(
TAP::Parser::Source::Perl->shebang($test) );
}
sub _aggregate {
my ( $harness, $aggregate, @tests ) = @_;
# Don't propagate to our children
local $ENV{HARNESS_OPTIONS};
_apply_extra_INC($harness);
_aggregate_tests( $harness, $aggregate, @tests );
}
# Make sure the child seens all the extra junk in @INC
sub _apply_extra_INC {
my $harness = shift;
$harness->callback(
parser_args => sub {
my ( $args, $test ) = @_;
push @{ $args->{switches} }, map {"-I$_"} _filtered_inc();
}
);
}
sub _aggregate_tests {
my ( $harness, $aggregate, @tests ) = @_;
$aggregate->start();
$harness->aggregate_tests( $aggregate, @tests );
$aggregate->stop();
}
sub runtests {
my @tests = @_;
# shield against -l
local ( $\, $, );
my $harness = _new_harness();
my $aggregate = TAP::Parser::Aggregator->new();
_aggregate( $harness, $aggregate, @tests );
$harness->formatter->summary($aggregate);
my $total = $aggregate->total;
my $passed = $aggregate->passed;
my $failed = $aggregate->failed;
my @parsers = $aggregate->parsers;
my $num_bad = 0;
for my $parser (@parsers) {
$num_bad++ if $parser->has_problems;
}
die(sprintf(
"Failed %d/%d test programs. %d/%d subtests failed.\n",
$num_bad, scalar @parsers, $failed, $total
)
) if $num_bad;
return $total && $total == $passed;
}
sub _canon {
my @list = sort { $a <=> $b } @_;
my @ranges = ();
my $count = scalar @list;
my $pos = 0;
while ( $pos < $count ) {
my $end = $pos + 1;
$end++ while $end < $count && $list[$end] <= $list[ $end - 1 ] + 1;
push @ranges, ( $end == $pos + 1 )
? $list[$pos]
: join( '-', $list[$pos], $list[ $end - 1 ] );
$pos = $end;
}
return join( ' ', @ranges );
}
sub _new_harness {
my $sub_args = shift || {};
my ( @lib, @switches );
my @opt = split_shell( $Switches, $ENV{HARNESS_PERL_SWITCHES} );
while ( my $opt = shift @opt ) {
if ( $opt =~ /^ -I (.*) $ /x ) {
push @lib, length($1) ? $1 : shift @opt;
}
else {
push @switches, $opt;
}
}
# Do things the old way on VMS...
push @lib, _filtered_inc() if IS_VMS;
# If $Verbose isn't numeric default to 1. This helps core.
my $verbosity = ( $Verbose ? ( $Verbose !~ /\d/ ) ? 1 : $Verbose : 0 );
my $args = {
timer => $Timer,
directives => $Directives,
lib => \@lib,
switches => \@switches,
color => $Color,
verbosity => $verbosity,
ignore_exit => $IgnoreExit,
};
$args->{stdout} = $sub_args->{out}
if exists $sub_args->{out};
if ( defined( my $env_opt = $ENV{HARNESS_OPTIONS} ) ) {
for my $opt ( split /:/, $env_opt ) {
if ( $opt =~ /^j(\d*)$/ ) {
$args->{jobs} = $1 || 9;
}
elsif ( $opt eq 'c' ) {
$args->{color} = 1;
}
else {
die "Unknown HARNESS_OPTIONS item: $opt\n";
}
}
}
return TAP::Harness->new($args);
}
# Get the parts of @INC which are changed from the stock list AND
# preserve reordering of stock directories.
sub _filtered_inc {
my @inc = grep { !ref } @INC; #28567
if (IS_VMS) {
# VMS has a 255-byte limit on the length of %ENV entries, so
# toss the ones that involve perl_root, the install location
@inc = grep !/perl_root/i, @inc;
}
elsif (IS_WIN32) {
# Lose any trailing backslashes in the Win32 paths
s/[\\\/]+$// foreach @inc;
}
my @default_inc = _default_inc();
my @new_inc;
my %seen;
for my $dir (@inc) {
next if $seen{$dir}++;
if ( $dir eq ( $default_inc[0] || '' ) ) {
shift @default_inc;
}
else {
push @new_inc, $dir;
}
shift @default_inc while @default_inc and $seen{ $default_inc[0] };
}
return @new_inc;
}
{
# Cache this to avoid repeatedly shelling out to Perl.
my @inc;
sub _default_inc {
return @inc if @inc;
local $ENV{PERL5LIB};
local $ENV{PERLLIB};
my $perl = $ENV{HARNESS_PERL} || $^X;
# Avoid using -l for the benefit of Perl 6
chomp( @inc = `$perl -e "print join qq[\\n], \@INC, q[]"` );
return @inc;
}
}
sub _check_sequence {
my @list = @_;
my $prev;
while ( my $next = shift @list ) {
return if defined $prev && $next <= $prev;
$prev = $next;
}
return 1;
}
sub execute_tests {
my %args = @_;
my $harness = _new_harness( \%args );
my $aggregate = TAP::Parser::Aggregator->new();
my %tot = (
bonus => 0,
max => 0,
ok => 0,
bad => 0,
good => 0,
files => 0,
tests => 0,
sub_skipped => 0,
todo => 0,
skipped => 0,
bench => undef,
);
# Install a callback so we get to see any plans the
#harness executes.
$harness->callback(
made_parser => sub {
my $parser = shift;
$parser->callback(
plan => sub {
my $plan = shift;
if ( $plan->directive eq 'SKIP' ) {
$tot{skipped}++;
}
}
);
}
);
_aggregate( $harness, $aggregate, @{ $args{tests} } );
$tot{bench} = $aggregate->elapsed;
my @tests = $aggregate->descriptions;
# TODO: Work out the circumstances under which the files
# and tests totals can differ.
$tot{files} = $tot{tests} = scalar @tests;
my %failedtests = ();
my %todo_passed = ();
for my $test (@tests) {
my ($parser) = $aggregate->parsers($test);
my @failed = $parser->failed;
my $wstat = $parser->wait;
my $estat = $parser->exit;
my $planned = $parser->tests_planned;
my @errors = $parser->parse_errors;
my $passed = $parser->passed;
my $actual_passed = $parser->actual_passed;
my $ok_seq = _check_sequence( $parser->actual_passed );
# Duplicate exit, wait status semantics of old version
$estat ||= '' unless $wstat;
$wstat ||= '';
$tot{max} += ( $planned || 0 );
$tot{bonus} += $parser->todo_passed;
$tot{ok} += $passed > $actual_passed ? $passed : $actual_passed;
$tot{sub_skipped} += $parser->skipped;
$tot{todo} += $parser->todo;
if ( @failed || $estat || @errors ) {
$tot{bad}++;
my $huh_planned = $planned ? undef : '??';
my $huh_errors = $ok_seq ? undef : '??';
$failedtests{$test} = {
'canon' => $huh_planned
|| $huh_errors
|| _canon(@failed)
|| '??',
'estat' => $estat,
'failed' => $huh_planned
|| $huh_errors
|| scalar @failed,
'max' => $huh_planned || $planned,
'name' => $test,
'wstat' => $wstat
};
}
else {
$tot{good}++;
}
my @todo = $parser->todo_passed;
if (@todo) {
$todo_passed{$test} = {
'canon' => _canon(@todo),
'estat' => $estat,
'failed' => scalar @todo,
'max' => scalar $parser->todo,
'name' => $test,
'wstat' => $wstat
};
}
}
return ( \%tot, \%failedtests, \%todo_passed );
}
=head2 execute_tests( tests => \@test_files, out => \*FH )
Runs all the given C<@test_files> (just like C<runtests()>) but
doesn't generate the final report. During testing, progress
information will be written to the currently selected output
filehandle (usually C<STDOUT>), or to the filehandle given by the
C<out> parameter. The I<out> is optional.
Returns a list of two values, C<$total> and C<$failed>, describing the
results. C<$total> is a hash ref summary of all the tests run. Its
keys and values are this:
bonus Number of individual todo tests unexpectedly passed
max Number of individual tests ran
ok Number of individual tests passed
sub_skipped Number of individual tests skipped
todo Number of individual todo tests
files Number of test files ran
good Number of test files passed
bad Number of test files failed
tests Number of test files originally given
skipped Number of test files skipped
If C<< $total->{bad} == 0 >> and C<< $total->{max} > 0 >>, you've
got a successful test.
C<$failed> is a hash ref of all the test scripts that failed. Each key
is the name of a test script, each value is another hash representing
how that script failed. Its keys are these:
name Name of the test which failed
estat Script's exit value
wstat Script's wait status
max Number of individual tests
failed Number which failed
canon List of tests which failed (as string).
C<$failed> should be empty if everything passed.
=cut
1;
__END__
=head1 EXPORT
C<&runtests> is exported by C<Test::Harness> by default.
C<&execute_tests>, C<$verbose>, C<$switches> and C<$debug> are
exported upon request.
=head1 ENVIRONMENT VARIABLES THAT TAP::HARNESS::COMPATIBLE SETS
C<Test::Harness> sets these before executing the individual tests.
=over 4
=item C<HARNESS_ACTIVE>
This is set to a true value. It allows the tests to determine if they
are being executed through the harness or by any other means.
=item C<HARNESS_VERSION>
This is the version of C<Test::Harness>.
=back
=head1 ENVIRONMENT VARIABLES THAT AFFECT TEST::HARNESS
=over 4
=item C<HARNESS_TIMER>
Setting this to true will make the harness display the number of
milliseconds each test took. You can also use F<prove>'s C<--timer>
switch.
=item C<HARNESS_VERBOSE>
If true, C<Test::Harness> will output the verbose results of running
its tests. Setting C<$Test::Harness::verbose> will override this,
or you can use the C<-v> switch in the F<prove> utility.
=item C<HARNESS_OPTIONS>
Provide additional options to the harness. Currently supported options are:
=over
=item C<< j<n> >>
Run <n> (default 9) parallel jobs.
=item C<< f >>
Use forked parallelism.
=back
Multiple options may be separated by colons:
HARNESS_OPTIONS=j9:f make test
=back
=head1 Taint Mode
Normally when a Perl program is run in taint mode the contents of the
C<PERL5LIB> environment variable do not appear in C<@INC>.
Because C<PERL5LIB> is often used during testing to add build
directories to C<@INC> C<Test::Harness> (actually
L<TAP::Parser::Source::Perl>) passes the names of any directories found
in C<PERL5LIB> as -I switches. The net effect of this is that
C<PERL5LIB> is honoured even in taint mode.
=head1 SEE ALSO
L<TAP::Harness>
=head1 BUGS
Please report any bugs or feature requests to
C<bug-test-harness at rt.cpan.org>, or through the web interface at
L<http://rt.cpan.org/NoAuth/ReportBug.html?Queue=Test-Harness>. I will be
notified, and then you'll automatically be notified of progress on your bug
as I make changes.
=head1 AUTHORS
Andy Armstrong C<< <andy@hexten.net> >>
L<Test::Harness> 2.64 (maintained by Andy Lester and on which this
module is based) has this attribution:
Either Tim Bunce or Andreas Koenig, we don't know. What we know for
sure is, that it was inspired by Larry Wall's F<TEST> script that came
with perl distributions for ages. Numerous anonymous contributors
exist. Andreas Koenig held the torch for many years, and then
Michael G Schwern.
=head1 LICENCE AND COPYRIGHT
Copyright (c) 2007-2008, Andy Armstrong C<< <andy@hexten.net> >>. All rights res
erved.
This module is free software; you can redistribute it and/or
modify it under the same terms as Perl itself. See L<perlartistic>.
package Test::More;
use 5.006;
use strict;
use warnings;
#---- perlcritic exemptions. ----#
# We use a lot of subroutine prototypes
## no critic (Subroutines::ProhibitSubroutinePrototypes)
# Can't use Carp because it might cause use_ok() to accidentally succeed
# even though the module being used forgot to use Carp. Yes, this
# actually happened.
sub _carp {
my( $file, $line ) = ( caller(1) )[ 1, 2 ];
return warn @_, " at $file line $line\n";
}
our $VERSION = '0.92';
$VERSION = eval $VERSION; ## no critic (BuiltinFunctions::ProhibitStringyEval
)
use Test::Builder::Module;
our @ISA = qw(Test::Builder::Module);
our @EXPORT = qw(ok use_ok require_ok
is isnt like unlike is_deeply
cmp_ok
skip todo todo_skip
pass fail
eq_array eq_hash eq_set
$TODO
plan
done_testing
can_ok isa_ok new_ok
diag note explain
BAIL_OUT
);
=head1 NAME
Test::More - yet another framework for writing test scripts
=head1 SYNOPSIS
use Test::More tests => 23;
# or
use Test::More skip_all => $reason;
# or
use Test::More; # see done_testing()
BEGIN { use_ok( 'Some::Module' ); }
require_ok( 'Some::Module' );
# Various ways to say "ok"
ok($got eq $expected, $test_name);
is ($got, $expected, $test_name);
isnt($got, $expected, $test_name);
# Rather than print STDERR "# here's what went wrong\n"
diag("here's what went wrong");
like ($got, qr/expected/, $test_name);
unlike($got, qr/expected/, $test_name);
cmp_ok($got, '==', $expected, $test_name);
is_deeply($got_complex_structure, $expected_complex_structure, $test_name);
SKIP: {
skip $why, $how_many unless $have_some_feature;
ok( foo(), $test_name );
is( foo(42), 23, $test_name );
};
TODO: {
local $TODO = $why;
ok( foo(), $test_name );
is( foo(42), 23, $test_name );
};
can_ok($module, @methods);
isa_ok($object, $class);
pass($test_name);
fail($test_name);
BAIL_OUT($why);
# UNIMPLEMENTED!!!
my @status = Test::More::status;
=head1 DESCRIPTION
B<STOP!> If you're just getting started writing tests, have a look at
L<Test::Simple> first. This is a drop in replacement for Test::Simple
which you can switch to once you get the hang of basic testing.
The purpose of this module is to provide a wide range of testing
utilities. Various ways to say "ok" with better diagnostics,
facilities to skip tests, test future features and compare complicated
data structures. While you can do almost anything with a simple
C<ok()> function, it doesn't provide good diagnostic output.
=head2 I love it when a plan comes together
Before anything else, you need a testing plan. This basically declares
how many tests your script is going to run to protect against premature
failure.
The preferred way to do this is to declare a plan when you C<use Test::More>.
use Test::More tests => 23;
There are cases when you will not know beforehand how many tests your
script is going to run. In this case, you can declare your tests at
the end.
use Test::More;
... run your tests ...
done_testing( $number_of_tests_run );
Sometimes you really don't know how many tests were run, or it's too
difficult to calculate. In which case you can leave off
$number_of_tests_run.
In some cases, you'll want to completely skip an entire testing script.
use Test::More skip_all => $skip_reason;
Your script will declare a skip with the reason why you skipped and
exit immediately with a zero (success). See L<Test::Harness> for
details.
If you want to control what functions Test::More will export, you
have to use the 'import' option. For example, to import everything
but 'fail', you'd do:
use Test::More tests => 23, import => ['!fail'];
Alternatively, you can use the plan() function. Useful for when you
have to calculate the number of tests.
use Test::More;
plan tests => keys %Stuff * 3;
or for deciding between running the tests at all:
use Test::More;
if( $^O eq 'MacOS' ) {
plan skip_all => 'Test irrelevant on MacOS';
}
else {
plan tests => 42;
}
=cut
sub plan {
my $tb = Test::More->builder;
return $tb->plan(@_);
}
# This implements "use Test::More 'no_diag'" but the behavior is
# deprecated.
sub import_extra {
my $class = shift;
my $list = shift;
my @other = ();
my $idx = 0;
while( $idx <= $#{$list} ) {
my $item = $list->[$idx];
if( defined $item and $item eq 'no_diag' ) {
$class->builder->no_diag(1);
}
else {
push @other, $item;
}
$idx++;
}
@$list = @other;
return;
}
=over 4
=item B<done_testing>
done_testing();
done_testing($number_of_tests);
If you don't know how many tests you're going to run, you can issue
the plan when you're done running tests.
$number_of_tests is the same as plan(), it's the number of tests you
expected to run. You can omit this, in which case the number of tests
you ran doesn't matter, just the fact that your tests ran to
conclusion.
This is safer than and replaces the "no_plan" plan.
=back
=cut
sub done_testing {
my $tb = Test::More->builder;
$tb->done_testing(@_);
}
=head2 Test names
By convention, each test is assigned a number in order. This is
largely done automatically for you. However, it's often very useful to
assign a name to each test. Which would you rather see:
ok 4
not ok 5
ok 6
or
ok 4 - basic multi-variable
not ok 5 - simple exponential
ok 6 - force == mass * acceleration
The later gives you some idea of what failed. It also makes it easier
to find the test in your script, simply search for "simple
exponential".
All test functions take a name argument. It's optional, but highly
suggested that you use it.
=head2 I'm ok, you're not ok.
The basic purpose of this module is to print out either "ok #" or "not
ok #" depending on if a given test succeeded or failed. Everything
else is just gravy.
All of the following print "ok" or "not ok" depending on if the test
succeeded or failed. They all also return true or false,
respectively.
=over 4
=item B<ok>
ok($got eq $expected, $test_name);
This simply evaluates any expression (C<$got eq $expected> is just a
simple example) and uses that to determine if the test succeeded or
failed. A true expression passes, a false one fails. Very simple.
For example:
ok( $exp{9} == 81, 'simple exponential' );
ok( Film->can('db_Main'), 'set_db()' );
ok( $p->tests == 4, 'saw tests' );
ok( !grep !defined $_, @items, 'items populated' );
(Mnemonic: "This is ok.")
$test_name is a very short description of the test that will be printed
out. It makes it very easy to find a test in your script when it fails
and gives others an idea of your intentions. $test_name is optional,
but we B<very> strongly encourage its use.
Should an ok() fail, it will produce some diagnostics:
not ok 18 - sufficient mucus
# Failed test 'sufficient mucus'
# in foo.t at line 42.
This is the same as Test::Simple's ok() routine.
=cut
sub ok ($;$) {
my( $test, $name ) = @_;
my $tb = Test::More->builder;
return $tb->ok( $test, $name );
}
=item B<is>
=item B<isnt>
is ( $got, $expected, $test_name );
isnt( $got, $expected, $test_name );
Similar to ok(), is() and isnt() compare their two arguments
with C<eq> and C<ne> respectively and use the result of that to
determine if the test succeeded or failed. So these:
# Is the ultimate answer 42?
is( ultimate_answer(), 42, "Meaning of Life" );
# $foo isn't empty
isnt( $foo, '', "Got some foo" );
are similar to these:
ok( ultimate_answer() eq 42, "Meaning of Life" );
ok( $foo ne '', "Got some foo" );
(Mnemonic: "This is that." "This isn't that.")
So why use these? They produce better diagnostics on failure. ok()
cannot know what you are testing for (beyond the name), but is() and
isnt() know what the test was and why it failed. For example this
test:
my $foo = 'waffle'; my $bar = 'yarblokos';
is( $foo, $bar, 'Is foo the same as bar?' );
Will produce something like this:
not ok 17 - Is foo the same as bar?
# Failed test 'Is foo the same as bar?'
# in foo.t at line 139.
# got: 'waffle'
# expected: 'yarblokos'
So you can figure out what went wrong without rerunning the test.
You are encouraged to use is() and isnt() over ok() where possible,
however do not be tempted to use them to find out if something is
true or false!
# XXX BAD!
is( exists $brooklyn{tree}, 1, 'A tree grows in Brooklyn' );
This does not check if C<exists $brooklyn{tree}> is true, it checks if
it returns 1. Very different. Similar caveats exist for false and 0.
In these cases, use ok().
ok( exists $brooklyn{tree}, 'A tree grows in Brooklyn' );
A simple call to isnt() usually does not provide a strong test but there
are cases when you cannot say much more about a value than that it is
different from some other value:
new_ok $obj, "Foo";
my $clone = $obj->clone;
isa_ok $obj, "Foo", "Foo->clone";
isnt $obj, $clone, "clone() produces a different object";
For those grammatical pedants out there, there's an C<isn't()>
function which is an alias of isnt().
=cut
sub is ($$;$) {
my $tb = Test::More->builder;
return $tb->is_eq(@_);
}
sub isnt ($$;$) {
my $tb = Test::More->builder;
return $tb->isnt_eq(@_);
}
*isn't = \&isnt;
=item B<like>
like( $got, qr/expected/, $test_name );
Similar to ok(), like() matches $got against the regex C<qr/expected/>.
So this:
like($got, qr/expected/, 'this is like that');
is similar to:
ok( $got =~ /expected/, 'this is like that');
(Mnemonic "This is like that".)
The second argument is a regular expression. It may be given as a
regex reference (i.e. C<qr//>) or (for better compatibility with older
perls) as a string that looks like a regex (alternative delimiters are
currently not supported):
like( $got, '/expected/', 'this is like that' );
Regex options may be placed on the end (C<'/expected/i'>).
Its advantages over ok() are similar to that of is() and isnt(). Better
diagnostics on failure.
=cut
sub like ($$;$) {
my $tb = Test::More->builder;
return $tb->like(@_);
}
=item B<unlike>
unlike( $got, qr/expected/, $test_name );
Works exactly as like(), only it checks if $got B<does not> match the
given pattern.
=cut
sub unlike ($$;$) {
my $tb = Test::More->builder;
return $tb->unlike(@_);
}
=item B<cmp_ok>
cmp_ok( $got, $op, $expected, $test_name );
Halfway between ok() and is() lies cmp_ok(). This allows you to
compare two arguments using any binary perl operator.
# ok( $got eq $expected );
cmp_ok( $got, 'eq', $expected, 'this eq that' );
# ok( $got == $expected );
cmp_ok( $got, '==', $expected, 'this == that' );
# ok( $got && $expected );
cmp_ok( $got, '&&', $expected, 'this && that' );
...etc...
Its advantage over ok() is when the test fails you'll know what $got
and $expected were:
not ok 1
# Failed test in foo.t at line 12.
# '23'
# &&
# undef
It's also useful in those cases where you are comparing numbers and
is()'s use of C<eq> will interfere:
cmp_ok( $big_hairy_number, '==', $another_big_hairy_number );
It's especially useful when comparing greater-than or smaller-than
relation between values:
cmp_ok( $some_value, '<=', $upper_limit );
=cut
sub cmp_ok($$$;$) {
my $tb = Test::More->builder;
return $tb->cmp_ok(@_);
}
=item B<can_ok>
can_ok($module, @methods);
can_ok($object, @methods);
Checks to make sure the $module or $object can do these @methods
(works with functions, too).
can_ok('Foo', qw(this that whatever));
is almost exactly like saying:
ok( Foo->can('this') &&
Foo->can('that') &&
Foo->can('whatever')
);
only without all the typing and with a better interface. Handy for
quickly testing an interface.
No matter how many @methods you check, a single can_ok() call counts
as one test. If you desire otherwise, use:
foreach my $meth (@methods) {
can_ok('Foo', $meth);
}
=cut
sub can_ok ($@) {
my( $proto, @methods ) = @_;
my $class = ref $proto || $proto;
my $tb = Test::More->builder;
unless($class) {
my $ok = $tb->ok( 0, "->can(...)" );
$tb->diag(' can_ok() called with empty class or reference');
return $ok;
}
unless(@methods) {
my $ok = $tb->ok( 0, "$class->can(...)" );
$tb->diag(' can_ok() called with no methods');
return $ok;
}
my @nok = ();
foreach my $method (@methods) {
$tb->_try( sub { $proto->can($method) } ) or push @nok, $method;
}
my $name = (@methods == 1) ? "$class->can('$methods[0]')" :
"$class->can(...)" ;
my $ok = $tb->ok( !@nok, $name );
$tb->diag( map " $class->can('$_') failed\n", @nok );
return $ok;
}
=item B<isa_ok>
isa_ok($object, $class, $object_name);
isa_ok($subclass, $class, $object_name);
isa_ok($ref, $type, $ref_name);
Checks to see if the given C<< $object->isa($class) >>. Also checks to make
sure the object was defined in the first place. Handy for this sort
of thing:
my $obj = Some::Module->new;
isa_ok( $obj, 'Some::Module' );
where you'd otherwise have to write
my $obj = Some::Module->new;
ok( defined $obj && $obj->isa('Some::Module') );
to safeguard against your test script blowing up.
You can also test a class, to make sure that it has the right ancestor:
isa_ok( 'Vole', 'Rodent' );
It works on references, too:
isa_ok( $array_ref, 'ARRAY' );
The diagnostics of this test normally just refer to 'the object'. If
you'd like them to be more specific, you can supply an $object_name
(for example 'Test customer').
=cut
sub isa_ok ($$;$) {
my( $object, $class, $obj_name ) = @_;
my $tb = Test::More->builder;
my $diag;
if( !defined $object ) {
$obj_name = 'The thing' unless defined $obj_name;
$diag = "$obj_name isn't defined";
}
else {
my $whatami = ref $object ? 'object' : 'class';
# We can't use UNIVERSAL::isa because we want to honor isa() overrides
my( $rslt, $error ) = $tb->_try( sub { $object->isa($class) } );
if($error) {
if( $error =~ /^Can't call method "isa" on unblessed reference/ ) {
# Its an unblessed reference
$obj_name = 'The reference' unless defined $obj_name;
if( !UNIVERSAL::isa( $object, $class ) ) {
my $ref = ref $object;
$diag = "$obj_name isn't a '$class' it's a '$ref'";
}
}
elsif( $error =~ /Can't call method "isa" without a package/ ) {
# It's something that can't even be a class
$diag = "$obj_name isn't a class or reference";
}
else {
die <<WHOA;
WHOA! I tried to call ->isa on your $whatami and got some weird error.
Here's the error.
$error
WHOA
}
}
else {
$obj_name = "The $whatami" unless defined $obj_name;
if( !$rslt ) {
my $ref = ref $object;
$diag = "$obj_name isn't a '$class' it's a '$ref'";
}
}
}
my $name = "$obj_name isa $class";
my $ok;
if($diag) {
$ok = $tb->ok( 0, $name );
$tb->diag(" $diag\n");
}
else {
$ok = $tb->ok( 1, $name );
}
return $ok;
}
=item B<new_ok>
my $obj = new_ok( $class );
my $obj = new_ok( $class => \@args );
my $obj = new_ok( $class => \@args, $object_name );
A convenience function which combines creating an object and calling
isa_ok() on that object.
It is basically equivalent to:
my $obj = $class->new(@args);
isa_ok $obj, $class, $object_name;
If @args is not given, an empty list will be used.
This function only works on new() and it assumes new() will return
just a single object which isa C<$class>.
=cut
sub new_ok {
my $tb = Test::More->builder;
$tb->croak("new_ok() must be given at least a class") unless @_;
my( $class, $args, $object_name ) = @_;
$args ||= [];
$object_name = "The object" unless defined $object_name;
my $obj;
my( $success, $error ) = $tb->_try( sub { $obj = $class->new(@$args); 1 } );
if($success) {
local $Test::Builder::Level = $Test::Builder::Level + 1;
isa_ok $obj, $class, $object_name;
}
else {
$tb->ok( 0, "new() died" );
$tb->diag(" Error was: $error");
}
return $obj;
}
=item B<pass>
=item B<fail>
pass($test_name);
fail($test_name);
Sometimes you just want to say that the tests have passed. Usually
the case is you've got some complicated condition that is difficult to
wedge into an ok(). In this case, you can simply use pass() (to
declare the test ok) or fail (for not ok). They are synonyms for
ok(1) and ok(0).
Use these very, very, very sparingly.
=cut
sub pass (;$) {
my $tb = Test::More->builder;
return $tb->ok( 1, @_ );
}
sub fail (;$) {
my $tb = Test::More->builder;
return $tb->ok( 0, @_ );
}
=back
=head2 Module tests
You usually want to test if the module you're testing loads ok, rather
than just vomiting if its load fails. For such purposes we have
C<use_ok> and C<require_ok>.
=over 4
=item B<use_ok>
BEGIN { use_ok($module); }
BEGIN { use_ok($module, @imports); }
These simply use the given $module and test to make sure the load
happened ok. It's recommended that you run use_ok() inside a BEGIN
block so its functions are exported at compile-time and prototypes are
properly honored.
If @imports are given, they are passed through to the use. So this:
BEGIN { use_ok('Some::Module', qw(foo bar)) }
is like doing this:
use Some::Module qw(foo bar);
Version numbers can be checked like so:
# Just like "use Some::Module 1.02"
BEGIN { use_ok('Some::Module', 1.02) }
Don't try to do this:
BEGIN {
use_ok('Some::Module');
...some code that depends on the use...
...happening at compile time...
}
because the notion of "compile-time" is relative. Instead, you want:
BEGIN { use_ok('Some::Module') }
BEGIN { ...some code that depends on the use... }
=cut
sub use_ok ($;@) {
my( $module, @imports ) = @_;
@imports = () unless @imports;
my $tb = Test::More->builder;
my( $pack, $filename, $line ) = caller;
my $code;
if( @imports == 1 and $imports[0] =~ /^\d+(?:\.\d+)?$/ ) {
# probably a version check. Perl needs to see the bare number
# for it to work with non-Exporter based modules.
$code = <<USE;
package $pack;
use $module $imports[0];
1;
USE
}
else {
$code = <<USE;
package $pack;
use $module \@{\$args[0]};
1;
USE
}
my( $eval_result, $eval_error ) = _eval( $code, \@imports );
my $ok = $tb->ok( $eval_result, "use $module;" );
unless($ok) {
chomp $eval_error;
$@ =~ s{^BEGIN failed--compilation aborted at .*$}
{BEGIN failed--compilation aborted at $filename line $line.}m;
$tb->diag(<<DIAGNOSTIC);
Tried to use '$module'.
Error: $eval_error
DIAGNOSTIC
}
return $ok;
}
sub _eval {
my( $code, @args ) = @_;
# Work around oddities surrounding resetting of $@ by immediately
# storing it.
my( $sigdie, $eval_result, $eval_error );
{
local( $@, $!, $SIG{__DIE__} ); # isolate eval
$eval_result = eval $code; ## no critic (BuiltinFunctions::
ProhibitStringyEval)
$eval_error = $@;
$sigdie = $SIG{__DIE__} || undef;
}
# make sure that $code got a chance to set $SIG{__DIE__}
$SIG{__DIE__} = $sigdie if defined $sigdie;
return( $eval_result, $eval_error );
}
=item B<require_ok>
require_ok($module);
require_ok($file);
Like use_ok(), except it requires the $module or $file.
=cut
sub require_ok ($) {
my($module) = shift;
my $tb = Test::More->builder;
my $pack = caller;
# Try to deterine if we've been given a module name or file.
# Module names must be barewords, files not.
$module = qq['$module'] unless _is_module_name($module);
my $code = <<REQUIRE;
package $pack;
require $module;
1;
REQUIRE
my( $eval_result, $eval_error ) = _eval($code);
my $ok = $tb->ok( $eval_result, "require $module;" );
unless($ok) {
chomp $eval_error;
$tb->diag(<<DIAGNOSTIC);
Tried to require '$module'.
Error: $eval_error
DIAGNOSTIC
}
return $ok;
}
sub _is_module_name {
my $module = shift;
# Module names start with a letter.
# End with an alphanumeric.
# The rest is an alphanumeric or ::
$module =~ s/\b::\b//g;
return $module =~ /^[a-zA-Z]\w*$/ ? 1 : 0;
}
=back
=head2 Complex data structures
Not everything is a simple eq check or regex. There are times you
need to see if two data structures are equivalent. For these
instances Test::More provides a handful of useful functions.
B<NOTE> I'm not quite sure what will happen with filehandles.
=over 4
=item B<is_deeply>
is_deeply( $got, $expected, $test_name );
Similar to is(), except that if $got and $expected are references, it
does a deep comparison walking each data structure to see if they are
equivalent. If the two structures are different, it will display the
place where they start differing.
is_deeply() compares the dereferenced values of references, the
references themselves (except for their type) are ignored. This means
aspects such as blessing and ties are not considered "different".
is_deeply() currently has very limited handling of function reference
and globs. It merely checks if they have the same referent. This may
improve in the future.
L<Test::Differences> and L<Test::Deep> provide more in-depth functionality
along these lines.
=cut
our( @Data_Stack, %Refs_Seen );
my $DNE = bless [], 'Does::Not::Exist';
sub _dne {
return ref $_[0] eq ref $DNE;
}
## no critic (Subroutines::RequireArgUnpacking)
sub is_deeply {
my $tb = Test::More->builder;
unless( @_ == 2 or @_ == 3 ) {
my $msg = <<'WARNING';
is_deeply() takes two or three args, you gave %d.
This usually means you passed an array or hash instead
of a reference to it
WARNING
chop $msg; # clip off newline so carp() will put in line/file
_carp sprintf $msg, scalar @_;
return $tb->ok(0);
}
my( $got, $expected, $name ) = @_;
$tb->_unoverload_str( \$expected, \$got );
my $ok;
if( !ref $got and !ref $expected ) { # neither is a reference
$ok = $tb->is_eq( $got, $expected, $name );
}
elsif( !ref $got xor !ref $expected ) { # one's a reference, one isn't
$ok = $tb->ok( 0, $name );
$tb->diag( _format_stack({ vals => [ $got, $expected ] }) );
}
else { # both references
local @Data_Stack = ();
if( _deep_check( $got, $expected ) ) {
$ok = $tb->ok( 1, $name );
}
else {
$ok = $tb->ok( 0, $name );
$tb->diag( _format_stack(@Data_Stack) );
}
}
return $ok;
}
sub _format_stack {
my(@Stack) = @_;
my $var = '$FOO';
my $did_arrow = 0;
foreach my $entry (@Stack) {
my $type = $entry->{type} || '';
my $idx = $entry->{'idx'};
if( $type eq 'HASH' ) {
$var .= "->" unless $did_arrow++;
$var .= "{$idx}";
}
elsif( $type eq 'ARRAY' ) {
$var .= "->" unless $did_arrow++;
$var .= "[$idx]";
}
elsif( $type eq 'REF' ) {
$var = "\${$var}";
}
}
my @vals = @{ $Stack[-1]{vals} }[ 0, 1 ];
my @vars = ();
( $vars[0] = $var ) =~ s/\$FOO/ \$got/;
( $vars[1] = $var ) =~ s/\$FOO/\$expected/;
my $out = "Structures begin differing at:\n";
foreach my $idx ( 0 .. $#vals ) {
my $val = $vals[$idx];
$vals[$idx]
= !defined $val ? 'undef'
: _dne($val) ? "Does not exist"
: ref $val ? "$val"
: "'$val'";
}
$out .= "$vars[0] = $vals[0]\n";
$out .= "$vars[1] = $vals[1]\n";
$out =~ s/^/ /msg;
return $out;
}
sub _type {
my $thing = shift;
return '' if !ref $thing;
for my $type (qw(ARRAY HASH REF SCALAR GLOB CODE Regexp)) {
return $type if UNIVERSAL::isa( $thing, $type );
}
return '';
}
=back
=head2 Diagnostics
If you pick the right test function, you'll usually get a good idea of
what went wrong when it failed. But sometimes it doesn't work out
that way. So here we have ways for you to write your own diagnostic
messages which are safer than just C<print STDERR>.
=over 4
=item B<diag>
diag(@diagnostic_message);
Prints a diagnostic message which is guaranteed not to interfere with
test output. Like C<print> @diagnostic_message is simply concatenated
together.
Returns false, so as to preserve failure.
Handy for this sort of thing:
ok( grep(/foo/, @users), "There's a foo user" ) or
diag("Since there's no foo, check that /etc/bar is set up right");
which would produce:
not ok 42 - There's a foo user
# Failed test 'There's a foo user'
# in foo.t at line 52.
# Since there's no foo, check that /etc/bar is set up right.
You might remember C<ok() or diag()> with the mnemonic C<open() or
die()>.
B<NOTE> The exact formatting of the diagnostic output is still
changing, but it is guaranteed that whatever you throw at it it won't
interfere with the test.
=item B<note>
note(@diagnostic_message);
Like diag(), except the message will not be seen when the test is run
in a harness. It will only be visible in the verbose TAP stream.
Handy for putting in notes which might be useful for debugging, but
don't indicate a problem.
note("Tempfile is $tempfile");
=cut
sub diag {
return Test::More->builder->diag(@_);
}
sub note {
return Test::More->builder->note(@_);
}
=item B<explain>
my @dump = explain @diagnostic_message;
Will dump the contents of any references in a human readable format.
Usually you want to pass this into C<note> or C<diag>.
Handy for things like...
is_deeply($have, $want) || diag explain $have;
or
note explain \%args;
Some::Class->method(%args);
=cut
sub explain {
return Test::More->builder->explain(@_);
}
=back
=head2 Conditional tests
Sometimes running a test under certain conditions will cause the
test script to die. A certain function or method isn't implemented
(such as fork() on MacOS), some resource isn't available (like a
net connection) or a module isn't available. In these cases it's
necessary to skip tests, or declare that they are supposed to fail
but will work in the future (a todo test).
For more details on the mechanics of skip and todo tests see
L<Test::Harness>.
The way Test::More handles this is with a named block. Basically, a
block of tests which can be skipped over or made todo. It's best if I
just show you...
=over 4
=item B<SKIP: BLOCK>
SKIP: {
skip $why, $how_many if $condition;
...normal testing code goes here...
}
This declares a block of tests that might be skipped, $how_many tests
there are, $why and under what $condition to skip them. An example is
the easiest way to illustrate:
SKIP: {
eval { require HTML::Lint };
skip "HTML::Lint not installed", 2 if $@;
my $lint = new HTML::Lint;
isa_ok( $lint, "HTML::Lint" );
$lint->parse( $html );
is( $lint->errors, 0, "No errors found in HTML" );
}
If the user does not have HTML::Lint installed, the whole block of
code I<won't be run at all>. Test::More will output special ok's
which Test::Harness interprets as skipped, but passing, tests.
It's important that $how_many accurately reflects the number of tests
in the SKIP block so the # of tests run will match up with your plan.
If your plan is C<no_plan> $how_many is optional and will default to 1.
It's perfectly safe to nest SKIP blocks. Each SKIP block must have
the label C<SKIP>, or Test::More can't work its magic.
You don't skip tests which are failing because there's a bug in your
program, or for which you don't yet have code written. For that you
use TODO. Read on.
=cut
## no critic (Subroutines::RequireFinalReturn)
sub skip {
my( $why, $how_many ) = @_;
my $tb = Test::More->builder;
unless( defined $how_many ) {
# $how_many can only be avoided when no_plan is in use.
_carp "skip() needs to know \$how_many tests are in the block"
unless $tb->has_plan eq 'no_plan';
$how_many = 1;
}
if( defined $how_many and $how_many =~ /\D/ ) {
_carp
"skip() was passed a non-numeric number of tests. Did you get the arg
uments backwards?";
$how_many = 1;
}
for( 1 .. $how_many ) {
$tb->skip($why);
}
no warnings 'exiting';
last SKIP;
}
=item B<TODO: BLOCK>
TODO: {
local $TODO = $why if $condition;
...normal testing code goes here...
}
Declares a block of tests you expect to fail and $why. Perhaps it's
because you haven't fixed a bug or haven't finished a new feature:
TODO: {
local $TODO = "URI::Geller not finished";
my $card = "Eight of clubs";
is( URI::Geller->your_card, $card, 'Is THIS your card?' );
my $spoon;
URI::Geller->bend_spoon;
is( $spoon, 'bent', "Spoon bending, that's original" );
}
With a todo block, the tests inside are expected to fail. Test::More
will run the tests normally, but print out special flags indicating
they are "todo". Test::Harness will interpret failures as being ok.
Should anything succeed, it will report it as an unexpected success.
You then know the thing you had todo is done and can remove the
TODO flag.
The nice part about todo tests, as opposed to simply commenting out a
block of tests, is it's like having a programmatic todo list. You know
how much work is left to be done, you're aware of what bugs there are,
and you'll know immediately when they're fixed.
Once a todo test starts succeeding, simply move it outside the block.
When the block is empty, delete it.
B<NOTE>: TODO tests require a Test::Harness upgrade else it will
treat it as a normal failure. See L<CAVEATS and NOTES>).
=item B<todo_skip>
TODO: {
todo_skip $why, $how_many if $condition;
...normal testing code...
}
With todo tests, it's best to have the tests actually run. That way
you'll know when they start passing. Sometimes this isn't possible.
Often a failing test will cause the whole program to die or hang, even
inside an C<eval BLOCK> with and using C<alarm>. In these extreme
cases you have no choice but to skip over the broken tests entirely.
The syntax and behavior is similar to a C<SKIP: BLOCK> except the
tests will be marked as failing but todo. Test::Harness will
interpret them as passing.
=cut
sub todo_skip {
my( $why, $how_many ) = @_;
my $tb = Test::More->builder;
unless( defined $how_many ) {
# $how_many can only be avoided when no_plan is in use.
_carp "todo_skip() needs to know \$how_many tests are in the block"
unless $tb->has_plan eq 'no_plan';
$how_many = 1;
}
for( 1 .. $how_many ) {
$tb->todo_skip($why);
}
no warnings 'exiting';
last TODO;
}
=item When do I use SKIP vs. TODO?
B<If it's something the user might not be able to do>, use SKIP.
This includes optional modules that aren't installed, running under
an OS that doesn't have some feature (like fork() or symlinks), or maybe
you need an Internet connection and one isn't available.
B<If it's something the programmer hasn't done yet>, use TODO. This
is for any code you haven't written yet, or bugs you have yet to fix,
but want to put tests in your testing script (always a good idea).
=back
=head2 Test control
=over 4
=item B<BAIL_OUT>
BAIL_OUT($reason);
Indicates to the harness that things are going so badly all testing
should terminate. This includes the running any additional test scripts.
This is typically used when testing cannot continue such as a critical
module failing to compile or a necessary external utility not being
available such as a database connection failing.
The test will exit with 255.
For even better control look at L<Test::Most>.
=cut
sub BAIL_OUT {
my $reason = shift;
my $tb = Test::More->builder;
$tb->BAIL_OUT($reason);
}
=back
=head2 Discouraged comparison functions
The use of the following functions is discouraged as they are not
actually testing functions and produce no diagnostics to help figure
out what went wrong. They were written before is_deeply() existed
because I couldn't figure out how to display a useful diff of two
arbitrary data structures.
These functions are usually used inside an ok().
ok( eq_array(\@got, \@expected) );
C<is_deeply()> can do that better and with diagnostics.
is_deeply( \@got, \@expected );
They may be deprecated in future versions.
=over 4
=item B<eq_array>
my $is_eq = eq_array(\@got, \@expected);
Checks if two arrays are equivalent. This is a deep check, so
multi-level structures are handled correctly.
=cut
#'#
sub eq_array {
local @Data_Stack = ();
_deep_check(@_);
}
sub _eq_array {
my( $a1, $a2 ) = @_;
if( grep _type($_) ne 'ARRAY', $a1, $a2 ) {
warn "eq_array passed a non-array ref";
return 0;
}
return 1 if $a1 eq $a2;
my $ok = 1;
my $max = $#$a1 > $#$a2 ? $#$a1 : $#$a2;
for( 0 .. $max ) {
my $e1 = $_ > $#$a1 ? $DNE : $a1->[$_];
my $e2 = $_ > $#$a2 ? $DNE : $a2->[$_];
push @Data_Stack, { type => 'ARRAY', idx => $_, vals => [ $e1, $e2 ] };
$ok = _deep_check( $e1, $e2 );
pop @Data_Stack if $ok;
last unless $ok;
}
return $ok;
}
sub _deep_check {
my( $e1, $e2 ) = @_;
my $tb = Test::More->builder;
my $ok = 0;
# Effectively turn %Refs_Seen into a stack. This avoids picking up
# the same referenced used twice (such as [\$a, \$a]) to be considered
# circular.
local %Refs_Seen = %Refs_Seen;
{
# Quiet uninitialized value warnings when comparing undefs.
no warnings 'uninitialized';
$tb->_unoverload_str( \$e1, \$e2 );
# Either they're both references or both not.
my $same_ref = !( !ref $e1 xor !ref $e2 );
my $not_ref = ( !ref $e1 and !ref $e2 );
if( defined $e1 xor defined $e2 ) {
$ok = 0;
}
elsif( !defined $e1 and !defined $e2 ) {
# Shortcut if they're both defined.
$ok = 1;
}
elsif( _dne($e1) xor _dne($e2) ) {
$ok = 0;
}
elsif( $same_ref and( $e1 eq $e2 ) ) {
$ok = 1;
}
elsif($not_ref) {
push @Data_Stack, { type => '', vals => [ $e1, $e2 ] };
$ok = 0;
}
else {
if( $Refs_Seen{$e1} ) {
return $Refs_Seen{$e1} eq $e2;
}
else {
$Refs_Seen{$e1} = "$e2";
}
my $type = _type($e1);
$type = 'DIFFERENT' unless _type($e2) eq $type;
if( $type eq 'DIFFERENT' ) {
push @Data_Stack, { type => $type, vals => [ $e1, $e2 ] };
$ok = 0;
}
elsif( $type eq 'ARRAY' ) {
$ok = _eq_array( $e1, $e2 );
}
elsif( $type eq 'HASH' ) {
$ok = _eq_hash( $e1, $e2 );
}
elsif( $type eq 'REF' ) {
push @Data_Stack, { type => $type, vals => [ $e1, $e2 ] };
$ok = _deep_check( $$e1, $$e2 );
pop @Data_Stack if $ok;
}
elsif( $type eq 'SCALAR' ) {
push @Data_Stack, { type => 'REF', vals => [ $e1, $e2 ] };
$ok = _deep_check( $$e1, $$e2 );
pop @Data_Stack if $ok;
}
elsif($type) {
push @Data_Stack, { type => $type, vals => [ $e1, $e2 ] };
$ok = 0;
}
else {
_whoa( 1, "No type in _deep_check" );
}
}
}
return $ok;
}
sub _whoa {
my( $check, $desc ) = @_;
if($check) {
die <<"WHOA";
WHOA! $desc
This should never happen! Please contact the author immediately!
WHOA
}
}
=item B<eq_hash>
my $is_eq = eq_hash(\%got, \%expected);
Determines if the two hashes contain the same keys and values. This
is a deep check.
=cut
sub eq_hash {
local @Data_Stack = ();
return _deep_check(@_);
}
sub _eq_hash {
my( $a1, $a2 ) = @_;
if( grep _type($_) ne 'HASH', $a1, $a2 ) {
warn "eq_hash passed a non-hash ref";
return 0;
}
return 1 if $a1 eq $a2;
my $ok = 1;
my $bigger = keys %$a1 > keys %$a2 ? $a1 : $a2;
foreach my $k ( keys %$bigger ) {
my $e1 = exists $a1->{$k} ? $a1->{$k} : $DNE;
my $e2 = exists $a2->{$k} ? $a2->{$k} : $DNE;
push @Data_Stack, { type => 'HASH', idx => $k, vals => [ $e1, $e2 ] };
$ok = _deep_check( $e1, $e2 );
pop @Data_Stack if $ok;
last unless $ok;
}
return $ok;
}
=item B<eq_set>
my $is_eq = eq_set(\@got, \@expected);
Similar to eq_array(), except the order of the elements is B<not>
important. This is a deep check, but the irrelevancy of order only
applies to the top level.
ok( eq_set(\@got, \@expected) );
Is better written:
is_deeply( [sort @got], [sort @expected] );
B<NOTE> By historical accident, this is not a true set comparison.
While the order of elements does not matter, duplicate elements do.
B<NOTE> eq_set() does not know how to deal with references at the top
level. The following is an example of a comparison which might not work:
eq_set([\1, \2], [\2, \1]);
L<Test::Deep> contains much better set comparison functions.
=cut
sub eq_set {
my( $a1, $a2 ) = @_;
return 0 unless @$a1 == @$a2;
no warnings 'uninitialized';
# It really doesn't matter how we sort them, as long as both arrays are
# sorted with the same algorithm.
#
# Ensure that references are not accidentally treated the same as a
# string containing the reference.
#
# Have to inline the sort routine due to a threading/sort bug.
# See [rt.cpan.org 6782]
#
# I don't know how references would be sorted so we just don't sort
# them. This means eq_set doesn't really work with refs.
return eq_array(
[ grep( ref, @$a1 ), sort( grep( !ref, @$a1 ) ) ],
[ grep( ref, @$a2 ), sort( grep( !ref, @$a2 ) ) ],
);
}
=back
=head2 Extending and Embedding Test::More
Sometimes the Test::More interface isn't quite enough. Fortunately,
Test::More is built on top of Test::Builder which provides a single,
unified backend for any test library to use. This means two test
libraries which both use Test::Builder B<can be used together in the
same program>.
If you simply want to do a little tweaking of how the tests behave,
you can access the underlying Test::Builder object like so:
=over 4
=item B<builder>
my $test_builder = Test::More->builder;
Returns the Test::Builder object underlying Test::More for you to play
with.
=back
=head1 EXIT CODES
If all your tests passed, Test::Builder will exit with zero (which is
normal). If anything failed it will exit with how many failed. If
you run less (or more) tests than you planned, the missing (or extras)
will be considered failures. If no tests were ever run Test::Builder
will throw a warning and exit with 255. If the test died, even after
having successfully completed all its tests, it will still be
considered a failure and will exit with 255.
So the exit codes are...
0 all tests successful
255 test died or all passed but wrong # of tests run
any other number how many failed (including missing or extras)
If you fail more than 254 tests, it will be reported as 254.
B<NOTE> This behavior may go away in future versions.
=head1 CAVEATS and NOTES
=over 4
=item Backwards compatibility
Test::More works with Perls as old as 5.6.0.
=item utf8 / "Wide character in print"
If you use utf8 or other non-ASCII characters with Test::More you
might get a "Wide character in print" warning. Using C<binmode
STDOUT, ":utf8"> will not fix it. Test::Builder (which powers
Test::More) duplicates STDOUT and STDERR. So any changes to them,
including changing their output disciplines, will not be seem by
Test::More.
The work around is to change the filehandles used by Test::Builder
directly.
my $builder = Test::More->builder;
binmode $builder->output, ":utf8";
binmode $builder->failure_output, ":utf8";
binmode $builder->todo_output, ":utf8";
=item Overloaded objects
String overloaded objects are compared B<as strings> (or in cmp_ok()'s
case, strings or numbers as appropriate to the comparison op). This
prevents Test::More from piercing an object's interface allowing
better blackbox testing. So if a function starts returning overloaded
objects instead of bare strings your tests won't notice the
difference. This is good.
However, it does mean that functions like is_deeply() cannot be used to
test the internals of string overloaded objects. In this case I would
suggest L<Test::Deep> which contains more flexible testing functions for
complex data structures.
=item Threads
Test::More will only be aware of threads if "use threads" has been done
I<before> Test::More is loaded. This is ok:
use threads;
use Test::More;
This may cause problems:
use Test::More
use threads;
5.8.1 and above are supported. Anything below that has too many bugs.
=item Test::Harness upgrade
no_plan, todo and done_testing() depend on new Test::Harness features
and fixes. If you're going to distribute tests that use no_plan or
todo your end-users will have to upgrade Test::Harness to the latest
one on CPAN. If you avoid no_plan and TODO tests, the stock
Test::Harness will work fine.
Installing Test::More should also upgrade Test::Harness.
=back
=head1 HISTORY
This is a case of convergent evolution with Joshua Pritikin's Test
module. I was largely unaware of its existence when I'd first
written my own ok() routines. This module exists because I can't
figure out how to easily wedge test names into Test's interface (along
with a few other problems).
The goal here is to have a testing utility that's simple to learn,
quick to use and difficult to trip yourself up with while still
providing more flexibility than the existing Test.pm. As such, the
names of the most common routines are kept tiny, special cases and
magic side-effects are kept to a minimum. WYSIWYG.
=head1 SEE ALSO
L<Test::Simple> if all this confuses you and you just want to write
some tests. You can upgrade to Test::More later (it's forward
compatible).
L<Test::Harness> is the test runner and output interpreter for Perl.
It's the thing that powers C<make test> and where the C<prove> utility
comes from.
L<Test::Legacy> tests written with Test.pm, the original testing
module, do not play well with other testing libraries. Test::Legacy
emulates the Test.pm interface and does play well with others.
L<Test::Differences> for more ways to test complex data structures.
And it plays well with Test::More.
L<Test::Class> is like xUnit but more perlish.
L<Test::Deep> gives you more powerful complex data structure testing.
L<Test::Inline> shows the idea of embedded testing.
L<Bundle::Test> installs a whole bunch of useful test modules.
=head1 AUTHORS
Michael G Schwern E<lt>schwern@pobox.comE<gt> with much inspiration
from Joshua Pritikin's Test module and lots of help from Barrie
Slaymaker, Tony Bowden, blackstar.co.uk, chromatic, Fergal Daly and
the perl-qa gang.
=head1 BUGS
See F<http://rt.cpan.org> to report and view bugs.
=head1 SOURCE
The source code repository for Test::More can be found at
F<http://github.com/schwern/test-more/>.
=head1 COPYRIGHT
Copyright 2001-2008 by Michael G Schwern E<lt>schwern@pobox.comE<gt>.
This program is free software; you can redistribute it and/or
modify it under the same terms as Perl itself.
See F<http://www.perl.com/perl/misc/Artistic.html>
=cut
1;
package Test::Builder::Module;
use strict;
use Test::Builder;
require Exporter;
our @ISA = qw(Exporter);
our $VERSION = '0.92';
$VERSION = eval $VERSION; ## no critic (BuiltinFunctions::ProhibitStringyEv
al)
# 5.004's Exporter doesn't have export_to_level.
my $_export_to_level = sub {
my $pkg = shift;
my $level = shift;
(undef) = shift; # redundant arg
my $callpkg = caller($level);
$pkg->export( $callpkg, @_ );
};
=head1 NAME
Test::Builder::Module - Base class for test modules
=head1 SYNOPSIS
# Emulates Test::Simple
package Your::Module;
my $CLASS = __PACKAGE__;
use base 'Test::Builder::Module';
@EXPORT = qw(ok);
sub ok ($;$) {
my $tb = $CLASS->builder;
return $tb->ok(@_);
}

1;
=head1 DESCRIPTION
This is a superclass for Test::Builder-based modules. It provides a
handful of common functionality and a method of getting at the underlying
Test::Builder object.
=head2 Importing
Test::Builder::Module is a subclass of Exporter which means your
module is also a subclass of Exporter. @EXPORT, @EXPORT_OK, etc...
all act normally.
A few methods are provided to do the C<use Your::Module tests => 23> part
for you.
=head3 import
Test::Builder::Module provides an import() method which acts in the
same basic way as Test::More's, setting the plan and controling
exporting of functions and variables. This allows your module to set
the plan independent of Test::More.
All arguments passed to import() are passed onto
C<< Your::Module->builder->plan() >> with the exception of
C<import =>[qw(things to import)]>.
use Your::Module import => [qw(this that)], tests => 23;
says to import the functions this() and that() as well as set the plan
to be 23 tests.
import() also sets the exported_to() attribute of your builder to be
the caller of the import() function.
Additional behaviors can be added to your import() method by overriding
import_extra().
=cut
sub import {
my($class) = shift;
# Don't run all this when loading ourself.
return 1 if $class eq 'Test::Builder::Module';
my $test = $class->builder;
my $caller = caller;
$test->exported_to($caller);
$class->import_extra( \@_ );
my(@imports) = $class->_strip_imports( \@_ );
$test->plan(@_);
$class->$_export_to_level( 1, $class, @imports );
}
sub _strip_imports {
my $class = shift;
my $list = shift;
my @imports = ();
my @other = ();
my $idx = 0;
while( $idx <= $#{$list} ) {
my $item = $list->[$idx];
if( defined $item and $item eq 'import' ) {
push @imports, @{ $list->[ $idx + 1 ] };
$idx++;
}
else {
push @other, $item;
}
$idx++;
}
@$list = @other;
return @imports;
}
=head3 import_extra
Your::Module->import_extra(\@import_args);
import_extra() is called by import(). It provides an opportunity for you
to add behaviors to your module based on its import list.
Any extra arguments which shouldn't be passed on to plan() should be
stripped off by this method.
See Test::More for an example of its use.
B<NOTE> This mechanism is I<VERY ALPHA AND LIKELY TO CHANGE> as it
feels like a bit of an ugly hack in its current form.
=cut
sub import_extra { }
=head2 Builder
Test::Builder::Module provides some methods of getting at the underlying
Test::Builder object.
=head3 builder
my $builder = Your::Class->builder;
This method returns the Test::Builder object associated with Your::Class.
It is not a constructor so you can call it as often as you like.
This is the preferred way to get the Test::Builder object. You should
I<not> get it via C<< Test::Builder->new >> as was previously
recommended.
The object returned by builder() may change at runtime so you should
call builder() inside each function rather than store it in a global.
sub ok {
my $builder = Your::Class->builder;
return $builder->ok(@_);
}
=cut
sub builder {
return Test::Builder->new;
}
1;
package Test::Simple;
use 5.004;
use strict;
our $VERSION = '0.92';
$VERSION = eval $VERSION; ## no critic (BuiltinFunctions::ProhibitStringyEval
)
use Test::Builder::Module;
our @ISA = qw(Test::Builder::Module);
our @EXPORT = qw(ok);
my $CLASS = __PACKAGE__;
=head1 NAME
Test::Simple - Basic utilities for writing tests.
=head1 SYNOPSIS
use Test::Simple tests => 1;
ok( $foo eq $bar, 'foo is bar' );
=head1 DESCRIPTION
** If you are unfamiliar with testing B<read Test::Tutorial> first! **
This is an extremely simple, extremely basic module for writing tests
suitable for CPAN modules and other pursuits. If you wish to do more
complicated testing, use the Test::More module (a drop-in replacement
for this one).
The basic unit of Perl testing is the ok. For each thing you want to
test your program will print out an "ok" or "not ok" to indicate pass
or fail. You do this with the ok() function (see below).
The only other constraint is you must pre-declare how many tests you
plan to run. This is in case something goes horribly wrong during the
test and your test program aborts, or skips a test or whatever. You
do this like so:
use Test::Simple tests => 23;
You must have a plan.
=over 4
=item B<ok>
ok( $foo eq $bar, $name );
ok( $foo eq $bar );
ok() is given an expression (in this case C<$foo eq $bar>). If it's
true, the test passed. If it's false, it didn't. That's about it.
ok() prints out either "ok" or "not ok" along with a test number (it
keeps track of that for you).
# This produces "ok 1 - Hell not yet frozen over" (or not ok)
ok( get_temperature($hell) > 0, 'Hell not yet frozen over' );
If you provide a $name, that will be printed along with the "ok/not
ok" to make it easier to find your test when if fails (just search for
the name). It also makes it easier for the next guy to understand
what your test is for. It's highly recommended you use test names.
All tests are run in scalar context. So this:
ok( @stuff, 'I have some stuff' );
will do what you mean (fail if stuff is empty)
=cut
sub ok ($;$) { ## no critic (Subroutines::ProhibitSubroutinePrototypes)
return $CLASS->builder->ok(@_);
}
=back
Test::Simple will start by printing number of tests run in the form
"1..M" (so "1..5" means you're going to run 5 tests). This strange
format lets Test::Harness know how many tests you plan on running in
case something goes horribly wrong.
If all your tests passed, Test::Simple will exit with zero (which is
normal). If anything failed it will exit with how many failed. If
you run less (or more) tests than you planned, the missing (or extras)
will be considered failures. If no tests were ever run Test::Simple
will throw a warning and exit with 255. If the test died, even after
having successfully completed all its tests, it will still be
considered a failure and will exit with 255.
So the exit codes are...
0 all tests successful
255 test died or all passed but wrong # of tests run
any other number how many failed (including missing or extras)
If you fail more than 254 tests, it will be reported as 254.
This module is by no means trying to be a complete testing system.
It's just to get you started. Once you're off the ground its
recommended you look at L<Test::More>.
=head1 EXAMPLE
Here's an example of a simple .t file for the fictional Film module.
use Test::Simple tests => 5;
use Film; # What you're testing.
my $btaste = Film->new({ Title => 'Bad Taste',
Director => 'Peter Jackson',
Rating => 'R',
NumExplodingSheep => 1
});
ok( defined($btaste) && ref $btaste eq 'Film, 'new() works' );
ok( $btaste->Title eq 'Bad Taste', 'Title() get' );
ok( $btaste->Director eq 'Peter Jackson', 'Director() get' );
ok( $btaste->Rating eq 'R', 'Rating() get' );
ok( $btaste->NumExplodingSheep == 1, 'NumExplodingSheep() get' );
It will produce output like this:
1..5
ok 1 - new() works
ok 2 - Title() get
ok 3 - Director() get
not ok 4 - Rating() get
# Failed test 'Rating() get'
# in t/film.t at line 14.
ok 5 - NumExplodingSheep() get
# Looks like you failed 1 tests of 5
Indicating the Film::Rating() method is broken.
=head1 CAVEATS
Test::Simple will only report a maximum of 254 failures in its exit
code. If this is a problem, you probably have a huge test script.
Split it into multiple files. (Otherwise blame the Unix folks for
using an unsigned short integer as the exit status).
Because VMS's exit codes are much, much different than the rest of the
universe, and perl does horrible mangling to them that gets in my way,
it works like this on VMS.
0 SS$_NORMAL all tests successful
4 SS$_ABORT something went wrong
Unfortunately, I can't differentiate any further.
=head1 NOTES
Test::Simple is B<explicitly> tested all the way back to perl 5.004.
Test::Simple is thread-safe in perl 5.8.0 and up.
=head1 HISTORY
This module was conceived while talking with Tony Bowden in his
kitchen one night about the problems I was having writing some really
complicated feature into the new Testing module. He observed that the
main problem is not dealing with these edge cases but that people hate
to write tests B<at all>. What was needed was a dead simple module
that took all the hard work out of testing and was really, really easy
to learn. Paul Johnson simultaneously had this idea (unfortunately,
he wasn't in Tony's kitchen). This is it.
=head1 SEE ALSO
=over 4
=item L<Test::More>
More testing functions! Once you outgrow Test::Simple, look at
Test::More. Test::Simple is 100% forward compatible with Test::More
(i.e. you can just use Test::More instead of Test::Simple in your
programs and things will still work).
=back
Look in Test::More's SEE ALSO for more testing modules.
=head1 AUTHORS
Idea by Tony Bowden and Paul Johnson, code by Michael G Schwern
E<lt>schwern@pobox.comE<gt>, wardrobe by Calvin Klein.
=head1 COPYRIGHT
Copyright 2001-2008 by Michael G Schwern E<lt>schwern@pobox.comE<gt>.
This program is free software; you can redistribute it and/or
modify it under the same terms as Perl itself.
See F<http://www.perl.com/perl/misc/Artistic.html>
=cut
1;
=head1 NAME
Test::Tutorial - A tutorial about writing really basic tests
=head1 DESCRIPTION
I<AHHHHHHH!!!! NOT TESTING! Anything but testing!
Beat me, whip me, send me to Detroit, but don't make
me write tests!>
I<*sob*>
I<Besides, I don't know how to write the damned things.>
Is this you? Is writing tests right up there with writing
documentation and having your fingernails pulled out? Did you open up
a test and read
######## We start with some black magic
and decide that's quite enough for you?
It's ok. That's all gone now. We've done all the black magic for
you. And here are the tricks...
=head2 Nuts and bolts of testing.
Here's the most basic test program.
#!/usr/bin/perl -w
print "1..1\n";
print 1 + 1 == 2 ? "ok 1\n" : "not ok 1\n";
since 1 + 1 is 2, it prints:
1..1
ok 1
What this says is: C<1..1> "I'm going to run one test." [1] C<ok 1>
"The first test passed". And that's about all magic there is to
testing. Your basic unit of testing is the I<ok>. For each thing you
test, an C<ok> is printed. Simple. B<Test::Harness> interprets your test
results to determine if you succeeded or failed (more on that later).
Writing all these print statements rapidly gets tedious. Fortunately,
there's B<Test::Simple>. It has one function, C<ok()>.
#!/usr/bin/perl -w
use Test::Simple tests => 1;
ok( 1 + 1 == 2 );
and that does the same thing as the code above. C<ok()> is the backbone
of Perl testing, and we'll be using it instead of roll-your-own from
here on. If C<ok()> gets a true value, the test passes. False, it
fails.
#!/usr/bin/perl -w
use Test::Simple tests => 2;
ok( 1 + 1 == 2 );
ok( 2 + 2 == 5 );
from that comes
1..2
ok 1
not ok 2
# Failed test (test.pl at line 5)
# Looks like you failed 1 tests of 2.
C<1..2> "I'm going to run two tests." This number is used to ensure
your test program ran all the way through and didn't die or skip some
tests. C<ok 1> "The first test passed." C<not ok 2> "The second test
failed". Test::Simple helpfully prints out some extra commentary about
your tests.
It's not scary. Come, hold my hand. We're going to give an example
of testing a module. For our example, we'll be testing a date
library, B<Date::ICal>. It's on CPAN, so download a copy and follow
along. [2]
=head2 Where to start?
This is the hardest part of testing, where do you start? People often
get overwhelmed at the apparent enormity of the task of testing a
whole module. Best place to start is at the beginning. Date::ICal is
an object-oriented module, and that means you start by making an
object. So we test C<new()>.
#!/usr/bin/perl -w
use Test::Simple tests => 2;
use Date::ICal;
my $ical = Date::ICal->new; # create an object
ok( defined $ical ); # check that we got something
ok( $ical->isa('Date::ICal') ); # and it's the right class
run that and you should get:
1..2
ok 1
ok 2
congratulations, you've written your first useful test.
=head2 Names
That output isn't terribly descriptive, is it? When you have two
tests you can figure out which one is #2, but what if you have 102?
Each test can be given a little descriptive name as the second
argument to C<ok()>.
use Test::Simple tests => 2;
ok( defined $ical, 'new() returned something' );
ok( $ical->isa('Date::ICal'), " and it's the right class" );
So now you'd see...
1..2
ok 1 - new() returned something
ok 2 - and it's the right class
=head2 Test the manual
Simplest way to build up a decent testing suite is to just test what
the manual says it does. [3] Let's pull something out of the
L<Date::ICal/SYNOPSIS> and test that all its bits work.
#!/usr/bin/perl -w
use Test::Simple tests => 8;
use Date::ICal;
$ical = Date::ICal->new( year => 1964, month => 10, day => 16,
hour => 16, min => 12, sec => 47,
tz => '0530' );
ok( defined $ical, 'new() returned something' );
ok( $ical->isa('Date::ICal'), " and it's the right class" );
ok( $ical->sec == 47, ' sec()' );
ok( $ical->min == 12, ' min()' );
ok( $ical->hour == 16, ' hour()' );
ok( $ical->day == 17, ' day()' );
ok( $ical->month == 10, ' month()' );
ok( $ical->year == 1964, ' year()' );
run that and you get:
1..8
ok 1 - new() returned something
ok 2 - and it's the right class
ok 3 - sec()
ok 4 - min()
ok 5 - hour()
not ok 6 - day()
# Failed test (- at line 16)
ok 7 - month()
ok 8 - year()
# Looks like you failed 1 tests of 8.
Whoops, a failure! [4] Test::Simple helpfully lets us know on what line
the failure occurred, but not much else. We were supposed to get 17,
but we didn't. What did we get?? Dunno. We'll have to re-run the
test in the debugger or throw in some print statements to find out.
Instead, we'll switch from B<Test::Simple> to B<Test::More>. B<Test::More>
does everything B<Test::Simple> does, and more! In fact, Test::More does
things I<exactly> the way Test::Simple does. You can literally swap
Test::Simple out and put Test::More in its place. That's just what
we're going to do.
Test::More does more than Test::Simple. The most important difference
at this point is it provides more informative ways to say "ok".
Although you can write almost any test with a generic C<ok()>, it
can't tell you what went wrong. Instead, we'll use the C<is()>
function, which lets us declare that something is supposed to be the
same as something else:
#!/usr/bin/perl -w
use Test::More tests => 8;
use Date::ICal;
$ical = Date::ICal->new( year => 1964, month => 10, day => 16,
hour => 16, min => 12, sec => 47,
tz => '0530' );
ok( defined $ical, 'new() returned something' );
ok( $ical->isa('Date::ICal'), " and it's the right class" );
is( $ical->sec, 47, ' sec()' );
is( $ical->min, 12, ' min()' );
is( $ical->hour, 16, ' hour()' );
is( $ical->day, 17, ' day()' );
is( $ical->month, 10, ' month()' );
is( $ical->year, 1964, ' year()' );
"Is C<$ical-E<gt>sec> 47?" "Is C<$ical-E<gt>min> 12?" With C<is()> in place,
you get some more information
1..8
ok 1 - new() returned something
ok 2 - and it's the right class
ok 3 - sec()
ok 4 - min()
ok 5 - hour()
not ok 6 - day()
# Failed test (- at line 16)
# got: '16'
# expected: '17'
ok 7 - month()
ok 8 - year()
# Looks like you failed 1 tests of 8.
letting us know that C<$ical-E<gt>day> returned 16, but we expected 17. A
quick check shows that the code is working fine, we made a mistake
when writing up the tests. Just change it to:
is( $ical->day, 16, ' day()' );
and everything works.
So any time you're doing a "this equals that" sort of test, use C<is()>.
It even works on arrays. The test is always in scalar context, so you
can test how many elements are in a list this way. [5]
is( @foo, 5, 'foo has 5 elements' );
=head2 Sometimes the tests are wrong
Which brings us to a very important lesson. Code has bugs. Tests are
code. Ergo, tests have bugs. A failing test could mean a bug in the
code, but don't discount the possibility that the test is wrong.
On the flip side, don't be tempted to prematurely declare a test
incorrect just because you're having trouble finding the bug.
Invalidating a test isn't something to be taken lightly, and don't use
it as a cop out to avoid work.
=head2 Testing lots of values
We're going to be wanting to test a lot of dates here, trying to trick
the code with lots of different edge cases. Does it work before 1970?
After 2038? Before 1904? Do years after 10,000 give it trouble?
Does it get leap years right? We could keep repeating the code above,
or we could set up a little try/expect loop.
use Test::More tests => 32;
use Date::ICal;
my %ICal_Dates = (
# An ICal string And the year, month, date
# hour, minute and second we expect.
'19971024T120000' => # from the docs.
[ 1997, 10, 24, 12, 0, 0 ],
'20390123T232832' => # after the Unix epoch
[ 2039, 1, 23, 23, 28, 32 ],
'19671225T000000' => # before the Unix epoch
[ 1967, 12, 25, 0, 0, 0 ],
'18990505T232323' => # before the MacOS epoch
[ 1899, 5, 5, 23, 23, 23 ],
);
while( my($ical_str, $expect) = each %ICal_Dates ) {
my $ical = Date::ICal->new( ical => $ical_str );
ok( defined $ical, "new(ical => '$ical_str')" );
ok( $ical->isa('Date::ICal'), " and it's the right class" );
is( $ical->year, $expect->[0], ' year()' );
is( $ical->month, $expect->[1], ' month()' );
is( $ical->day, $expect->[2], ' day()' );
is( $ical->hour, $expect->[3], ' hour()' );
is( $ical->min, $expect->[4], ' min()' );
is( $ical->sec, $expect->[5], ' sec()' );
}
So now we can test bunches of dates by just adding them to
C<%ICal_Dates>. Now that it's less work to test with more dates, you'll
be inclined to just throw more in as you think of them.
Only problem is, every time we add to that we have to keep adjusting
the C<use Test::More tests =E<gt> ##> line. That can rapidly get
annoying. There's two ways to make this work better.
First, we can calculate the plan dynamically using the C<plan()>
function.
use Test::More;
use Date::ICal;
my %ICal_Dates = (
...same as before...
);
# For each key in the hash we're running 8 tests.
plan tests => keys %ICal_Dates * 8;
Or to be even more flexible, we use C<no_plan>. This means we're just
running some tests, don't know how many. [6]
use Test::More 'no_plan'; # instead of tests => 32
now we can just add tests and not have to do all sorts of math to
figure out how many we're running.
=head2 Informative names
Take a look at this line here
ok( defined $ical, "new(ical => '$ical_str')" );
we've added more detail about what we're testing and the ICal string
itself we're trying out to the name. So you get results like:
ok 25 - new(ical => '19971024T120000')
ok 26 - and it's the right class
ok 27 - year()
ok 28 - month()
ok 29 - day()
ok 30 - hour()
ok 31 - min()
ok 32 - sec()
if something in there fails, you'll know which one it was and that
will make tracking down the problem easier. So try to put a bit of
debugging information into the test names.
Describe what the tests test, to make debugging a failed test easier
for you or for the next person who runs your test.
=head2 Skipping tests
Poking around in the existing Date::ICal tests, I found this in
F<t/01sanity.t> [7]
#!/usr/bin/perl -w
use Test::More tests => 7;
use Date::ICal;
# Make sure epoch time is being handled sanely.
my $t1 = Date::ICal->new( epoch => 0 );
is( $t1->epoch, 0, "Epoch time of 0" );
# XXX This will only work on unix systems.
is( $t1->ical, '19700101Z', " epoch to ical" );
is( $t1->year, 1970, " year()" );
is( $t1->month, 1, " month()" );
is( $t1->day, 1, " day()" );
# like the tests above, but starting with ical instead of epoch
my $t2 = Date::ICal->new( ical => '19700101Z' );
is( $t2->ical, '19700101Z', "Start of epoch in ICal notation" );
is( $t2->epoch, 0, " and back to ICal" );
The beginning of the epoch is different on most non-Unix operating
systems [8]. Even though Perl smooths out the differences for the most
part, certain ports do it differently. MacPerl is one off the top of
my head. [9] We I<know> this will never work on MacOS. So rather than
just putting a comment in the test, we can explicitly say it's never
going to work and skip the test.
use Test::More tests => 7;
use Date::ICal;
# Make sure epoch time is being handled sanely.
my $t1 = Date::ICal->new( epoch => 0 );
is( $t1->epoch, 0, "Epoch time of 0" );
SKIP: {
skip('epoch to ICal not working on MacOS', 6)
if $^O eq 'MacOS';
is( $t1->ical, '19700101Z', " epoch to ical" );
is( $t1->year, 1970, " year()" );
is( $t1->month, 1, " month()" );
is( $t1->day, 1, " day()" );
# like the tests above, but starting with ical instead of epoch
my $t2 = Date::ICal->new( ical => '19700101Z' );
is( $t2->ical, '19700101Z', "Start of epoch in ICal notation" );
is( $t2->epoch, 0, " and back to ICal" );
}
A little bit of magic happens here. When running on anything but
MacOS, all the tests run normally. But when on MacOS, C<skip()> causes
the entire contents of the SKIP block to be jumped over. It's never
run. Instead, it prints special output that tells Test::Harness that
the tests have been skipped.
1..7
ok 1 - Epoch time of 0
ok 2 # skip epoch to ICal not working on MacOS
ok 3 # skip epoch to ICal not working on MacOS
ok 4 # skip epoch to ICal not working on MacOS
ok 5 # skip epoch to ICal not working on MacOS
ok 6 # skip epoch to ICal not working on MacOS
ok 7 # skip epoch to ICal not working on MacOS
This means your tests won't fail on MacOS. This means less emails
from MacPerl users telling you about failing tests that you know will
never work. You've got to be careful with skip tests. These are for
tests which don't work and I<never will>. It is not for skipping
genuine bugs (we'll get to that in a moment).
The tests are wholly and completely skipped. [10] This will work.
SKIP: {
skip("I don't wanna die!");
die, die, die, die, die;
}
=head2 Todo tests
Thumbing through the Date::ICal man page, I came across this:
ical
$ical_string = $ical->ical;
Retrieves, or sets, the date on the object, using any
valid ICal date/time string.
"Retrieves or sets". Hmmm, didn't see a test for using C<ical()> to set
the date in the Date::ICal test suite. So I'll write one.
use Test::More tests => 1;
use Date::ICal;
my $ical = Date::ICal->new;
$ical->ical('20201231Z');
is( $ical->ical, '20201231Z', 'Setting via ical()' );
run that and I get
1..1
not ok 1 - Setting via ical()
# Failed test (- at line 6)
# got: '20010814T233649Z'
# expected: '20201231Z'
# Looks like you failed 1 tests of 1.
Whoops! Looks like it's unimplemented. Let's assume we don't have
the time to fix this. [11] Normally, you'd just comment out the test
and put a note in a todo list somewhere. Instead, we're going to
explicitly state "this test will fail" by wrapping it in a C<TODO> block.
use Test::More tests => 1;
TODO: {
local $TODO = 'ical($ical) not yet implemented';
my $ical = Date::ICal->new;
$ical->ical('20201231Z');
is( $ical->ical, '20201231Z', 'Setting via ical()' );
}
Now when you run, it's a little different:
1..1
not ok 1 - Setting via ical() # TODO ical($ical) not yet implemented
# got: '20010822T201551Z'
# expected: '20201231Z'
Test::More doesn't say "Looks like you failed 1 tests of 1". That '#
TODO' tells Test::Harness "this is supposed to fail" and it treats a
failure as a successful test. So you can write tests even before
you've fixed the underlying code.
If a TODO test passes, Test::Harness will report it "UNEXPECTEDLY
SUCCEEDED". When that happens, you simply remove the TODO block with
C<local $TODO> and turn it into a real test.
=head2 Testing with taint mode.
Taint mode is a funny thing. It's the globalest of all global
features. Once you turn it on, it affects I<all> code in your program
and I<all> modules used (and all the modules they use). If a single
piece of code isn't taint clean, the whole thing explodes. With that
in mind, it's very important to ensure your module works under taint
mode.
It's very simple to have your tests run under taint mode. Just throw
a C<-T> into the C<#!> line. Test::Harness will read the switches
in C<#!> and use them to run your tests.
#!/usr/bin/perl -Tw
...test normally here...
So when you say C<make test> it will be run with taint mode and
warnings on.
=head1 FOOTNOTES
=over 4
=item 1
The first number doesn't really mean anything, but it has to be 1.
It's the second number that's important.
=item 2
For those following along at home, I'm using version 1.31. It has
some bugs, which is good -- we'll uncover them with our tests.
=item 3
You can actually take this one step further and test the manual
itself. Have a look at B<Test::Inline> (formerly B<Pod::Tests>).
=item 4
Yes, there's a mistake in the test suite. What! Me, contrived?
=item 5
We'll get to testing the contents of lists later.
=item 6
But what happens if your test program dies halfway through?! Since we
didn't say how many tests we're going to run, how can we know it
failed? No problem, Test::More employs some magic to catch that death
and turn the test into a failure, even if every test passed up to that
point.
=item 7
I cleaned it up a little.
=item 8
Most Operating Systems record time as the number of seconds since a
certain date. This date is the beginning of the epoch. Unix's starts
at midnight January 1st, 1970 GMT.
=item 9
MacOS's epoch is midnight January 1st, 1904. VMS's is midnight,
November 17th, 1858, but vmsperl emulates the Unix epoch so it's not a
problem.
=item 10
As long as the code inside the SKIP block at least compiles. Please
don't ask how. No, it's not a filter.
=item 11
Do NOT be tempted to use TODO tests as a way to avoid fixing simple
bugs!
=back
=head1 AUTHORS
Michael G Schwern E<lt>schwern@pobox.comE<gt> and the perl-qa dancers!
=head1 COPYRIGHT
Copyright 2001 by Michael G Schwern E<lt>schwern@pobox.comE<gt>.
This documentation is free; you can redistribute it and/or modify it
under the same terms as Perl itself.
Irrespective of its distribution, all code examples in these files
are hereby placed into the public domain. You are permitted and
encouraged to use this code in your own programs for fun
or for profit as you see fit. A simple comment in the code giving
credit would be courteous but is not required.
=cut
package Test::Builder::Tester::Color;
use strict;
our $VERSION = "1.18";
require Test::Builder::Tester;
=head1 NAME
Test::Builder::Tester::Color - turn on colour in Test::Builder::Tester
=head1 SYNOPSIS
When running a test script
perl -MTest::Builder::Tester::Color test.t
=head1 DESCRIPTION
Importing this module causes the subroutine color in Test::Builder::Tester
to be called with a true value causing colour highlighting to be turned
on in debug output.
The sole purpose of this module is to enable colour highlighting
from the command line.
=cut
sub import {
Test::Builder::Tester::color(1);
}
=head1 AUTHOR
Copyright Mark Fowler E<lt>mark@twoshortplanks.comE<gt> 2002.
This program is free software; you can redistribute it
and/or modify it under the same terms as Perl itself.
=head1 BUGS
This module will have no effect unless Term::ANSIColor is installed.
=head1 SEE ALSO
L<Test::Builder::Tester>, L<Term::ANSIColor>
=cut
1;
package Test::Builder::Tester;
use strict;
our $VERSION = "1.18";
use Test::Builder;
use Symbol;
use Carp;
=head1 NAME
Test::Builder::Tester - test testsuites that have been built with
Test::Builder
=head1 SYNOPSIS
use Test::Builder::Tester tests => 1;
use Test::More;
test_out("not ok 1 - foo");
test_fail(+1);
fail("foo");
test_test("fail works");
=head1 DESCRIPTION
A module that helps you test testing modules that are built with
B<Test::Builder>.
The testing system is designed to be used by performing a three step
process for each test you wish to test. This process starts with using
C<test_out> and C<test_err> in advance to declare what the testsuite you
are testing will output with B<Test::Builder> to stdout and stderr.
You then can run the test(s) from your test suite that call
B<Test::Builder>. At this point the output of B<Test::Builder> is
safely captured by B<Test::Builder::Tester> rather than being
interpreted as real test output.
The final stage is to call C<test_test> that will simply compare what you
predeclared to what B<Test::Builder> actually outputted, and report the
results back with a "ok" or "not ok" (with debugging) to the normal
output.
=cut
####
# set up testing
####
my $t = Test::Builder->new;
###
# make us an exporter
###
use Exporter;
our @ISA = qw(Exporter);
our @EXPORT = qw(test_out test_err test_fail test_diag test_test line_num);
# _export_to_level and import stolen directly from Test::More. I am
# the king of cargo cult programming ;-)
# 5.004's Exporter doesn't have export_to_level.
sub _export_to_level {
my $pkg = shift;
my $level = shift;
(undef) = shift; # XXX redundant arg
my $callpkg = caller($level);
$pkg->export( $callpkg, @_ );
}
sub import {
my $class = shift;
my(@plan) = @_;
my $caller = caller;
$t->exported_to($caller);
$t->plan(@plan);
my @imports = ();
foreach my $idx ( 0 .. $#plan ) {
if( $plan[$idx] eq 'import' ) {
@imports = @{ $plan[ $idx + 1 ] };
last;
}
}
__PACKAGE__->_export_to_level( 1, __PACKAGE__, @imports );
}
###
# set up file handles
###
# create some private file handles
my $output_handle = gensym;
my $error_handle = gensym;
# and tie them to this package
my $out = tie *$output_handle, "Test::Builder::Tester::Tie", "STDOUT";
my $err = tie *$error_handle, "Test::Builder::Tester::Tie", "STDERR";
####
# exported functions
####
# for remembering that we're testing and where we're testing at
my $testing = 0;
my $testing_num;
# remembering where the file handles were originally connected
my $original_output_handle;
my $original_failure_handle;
my $original_todo_handle;
my $original_test_number;
my $original_harness_state;
my $original_harness_env;
# function that starts testing and redirects the filehandles for now
sub _start_testing {
# even if we're running under Test::Harness pretend we're not
# for now. This needed so Test::Builder doesn't add extra spaces
$original_harness_env = $ENV{HARNESS_ACTIVE} || 0;
$ENV{HARNESS_ACTIVE} = 0;
# remember what the handles were set to
$original_output_handle = $t->output();
$original_failure_handle = $t->failure_output();
$original_todo_handle = $t->todo_output();
# switch out to our own handles
$t->output($output_handle);
$t->failure_output($error_handle);
$t->todo_output($error_handle);
# clear the expected list
$out->reset();
$err->reset();
# remeber that we're testing
$testing = 1;
$testing_num = $t->current_test;
$t->current_test(0);
# look, we shouldn't do the ending stuff
$t->no_ending(1);
}
=head2 Functions
These are the six methods that are exported as default.
=over 4
=item test_out
=item test_err
Procedures for predeclaring the output that your test suite is
expected to produce until C<test_test> is called. These procedures
automatically assume that each line terminates with "\n". So
test_out("ok 1","ok 2");
is the same as
test_out("ok 1\nok 2");
which is even the same as
test_out("ok 1");
test_out("ok 2");
Once C<test_out> or C<test_err> (or C<test_fail> or C<test_diag>) have
been called once all further output from B<Test::Builder> will be
captured by B<Test::Builder::Tester>. This means that your will not
be able perform further tests to the normal output in the normal way
until you call C<test_test> (well, unless you manually meddle with the
output filehandles)
=cut
sub test_out {
# do we need to do any setup?
_start_testing() unless $testing;
$out->expect(@_);
}
sub test_err {
# do we need to do any setup?
_start_testing() unless $testing;
$err->expect(@_);
}
=item test_fail
Because the standard failure message that B<Test::Builder> produces
whenever a test fails will be a common occurrence in your test error
output, and because has changed between Test::Builder versions, rather
than forcing you to call C<test_err> with the string all the time like
so
test_err("# Failed test ($0 at line ".line_num(+1).")");
C<test_fail> exists as a convenience function that can be called
instead. It takes one argument, the offset from the current line that
the line that causes the fail is on.
test_fail(+1);
This means that the example in the synopsis could be rewritten
more simply as:
test_out("not ok 1 - foo");
test_fail(+1);
fail("foo");
test_test("fail works");
=cut
sub test_fail {
# do we need to do any setup?
_start_testing() unless $testing;
# work out what line we should be on
my( $package, $filename, $line ) = caller;
$line = $line + ( shift() || 0 ); # prevent warnings
# expect that on stderr
$err->expect("# Failed test ($0 at line $line)");
}
=item test_diag
As most of the remaining expected output to the error stream will be
created by Test::Builder's C<diag> function, B<Test::Builder::Tester>
provides a convience function C<test_diag> that you can use instead of
C<test_err>.
The C<test_diag> function prepends comment hashes and spacing to the
start and newlines to the end of the expected output passed to it and
adds it to the list of expected error output. So, instead of writing
test_err("# Couldn't open file");
you can write
test_diag("Couldn't open file");
Remember that B<Test::Builder>'s diag function will not add newlines to
the end of output and test_diag will. So to check
Test::Builder->new->diag("foo\n","bar\n");
You would do
test_diag("foo","bar")
without the newlines.
=cut
sub test_diag {
# do we need to do any setup?
_start_testing() unless $testing;
# expect the same thing, but prepended with "# "
local $_;
$err->expect( map { "# $_" } @_ );
}
=item test_test
Actually performs the output check testing the tests, comparing the
data (with C<eq>) that we have captured from B<Test::Builder> against
that that was declared with C<test_out> and C<test_err>.
This takes name/value pairs that effect how the test is run.
=over
=item title (synonym 'name', 'label')
The name of the test that will be displayed after the C<ok> or C<not
ok>.
=item skip_out
Setting this to a true value will cause the test to ignore if the
output sent by the test to the output stream does not match that
declared with C<test_out>.
=item skip_err
Setting this to a true value will cause the test to ignore if the
output sent by the test to the error stream does not match that
declared with C<test_err>.
=back
As a convience, if only one argument is passed then this argument
is assumed to be the name of the test (as in the above examples.)
Once C<test_test> has been run test output will be redirected back to
the original filehandles that B<Test::Builder> was connected to
(probably STDOUT and STDERR,) meaning any further tests you run
will function normally and cause success/errors for B<Test::Harness>.
=cut
sub test_test {
# decode the arguements as described in the pod
my $mess;
my %args;
if( @_ == 1 ) {
$mess = shift
}
else {
%args = @_;
$mess = $args{name} if exists( $args{name} );
$mess = $args{title} if exists( $args{title} );
$mess = $args{label} if exists( $args{label} );
}
# er, are we testing?
croak "Not testing. You must declare output with a test function first."
unless $testing;
# okay, reconnect the test suite back to the saved handles
$t->output($original_output_handle);
$t->failure_output($original_failure_handle);
$t->todo_output($original_todo_handle);
# restore the test no, etc, back to the original point
$t->current_test($testing_num);
$testing = 0;
# re-enable the original setting of the harness
$ENV{HARNESS_ACTIVE} = $original_harness_env;
# check the output we've stashed
unless( $t->ok( ( $args{skip_out} || $out->check ) &&
( $args{skip_err} || $err->check ), $mess )
)
{
# print out the diagnostic information about why this
# test failed
local $_;
$t->diag( map { "$_\n" } $out->complaint )
unless $args{skip_out} || $out->check;
$t->diag( map { "$_\n" } $err->complaint )
unless $args{skip_err} || $err->check;
}
}
=item line_num
A utility function that returns the line number that the function was
called on. You can pass it an offset which will be added to the
result. This is very useful for working out the correct text of
diagnostic functions that contain line numbers.
Essentially this is the same as the C<__LINE__> macro, but the
C<line_num(+3)> idiom is arguably nicer.
=cut
sub line_num {
my( $package, $filename, $line ) = caller;
return $line + ( shift() || 0 ); # prevent warnings
}
=back
In addition to the six exported functions there there exists one
function that can only be accessed with a fully qualified function
call.
=over 4
=item color
When C<test_test> is called and the output that your tests generate
does not match that which you declared, C<test_test> will print out
debug information showing the two conflicting versions. As this
output itself is debug information it can be confusing which part of
the output is from C<test_test> and which was the original output from
your original tests. Also, it may be hard to spot things like
extraneous whitespace at the end of lines that may cause your test to
fail even though the output looks similar.
To assist you, if you have the B<Term::ANSIColor> module installed
(which you should do by default from perl 5.005 onwards), C<test_test>
can colour the background of the debug information to disambiguate the
different types of output. The debug output will have it's background
coloured green and red. The green part represents the text which is
the same between the executed and actual output, the red shows which
part differs.
The C<color> function determines if colouring should occur or not.
Passing it a true or false value will enable or disable colouring
respectively, and the function called with no argument will return the
current setting.
To enable colouring from the command line, you can use the
B<Text::Builder::Tester::Color> module like so:
perl -Mlib=Text::Builder::Tester::Color test.t
Or by including the B<Test::Builder::Tester::Color> module directly in
the PERL5LIB.
=cut
my $color;
sub color {
$color = shift if @_;
$color;
}
=back
=head1 BUGS
Calls C<<Test::Builder->no_ending>> turning off the ending tests.
This is needed as otherwise it will trip out because we've run more
tests than we strictly should have and it'll register any failures we
had that we were testing for as real failures.
The color function doesn't work unless B<Term::ANSIColor> is installed
and is compatible with your terminal.
Bugs (and requests for new features) can be reported to the author
though the CPAN RT system:
L<http://rt.cpan.org/NoAuth/ReportBug.html?Queue=Test-Builder-Tester>
=head1 AUTHOR
Copyright Mark Fowler E<lt>mark@twoshortplanks.comE<gt> 2002, 2004.
Some code taken from B<Test::More> and B<Test::Catch>, written by by
Michael G Schwern E<lt>schwern@pobox.comE<gt>. Hence, those parts
Copyright Micheal G Schwern 2001. Used and distributed with
permission.
This program is free software; you can redistribute it
and/or modify it under the same terms as Perl itself.
=head1 NOTES
This code has been tested explicitly on the following versions
of perl: 5.7.3, 5.6.1, 5.6.0, 5.005_03, 5.004_05 and 5.004.
Thanks to Richard Clamp E<lt>richardc@unixbeard.netE<gt> for letting
me use his testing system to try this module out on.
=head1 SEE ALSO
L<Test::Builder>, L<Test::Builder::Tester::Color>, L<Test::More>.
=cut
1;
####################################################################
# Helper class that is used to remember expected and received data
package Test::Builder::Tester::Tie;
##
# add line(s) to be expected
sub expect {
my $self = shift;
my @checks = @_;
foreach my $check (@checks) {
$check = $self->_translate_Failed_check($check);
push @{ $self->{wanted} }, ref $check ? $check : "$check\n";
}
}
sub _translate_Failed_check {
my( $self, $check ) = @_;
if( $check =~ /\A(.*)# (Failed .*test) \((.*?) at line (\d+)\)\Z(?!\n)/
) {
$check = "/\Q$1\E#\\s+\Q$2\E.*?\\n?.*?\Qat $3\E line \Q$4\E.*\\n?/";
}
return $check;
}
##
# return true iff the expected data matches the got data
sub check {
my $self = shift;
# turn off warnings as these might be undef
local $^W = 0;
my @checks = @{ $self->{wanted} };
my $got = $self->{got};
foreach my $check (@checks) {
$check = "\Q$check\E" unless( $check =~ s,^/(.*)/$,$1, or ref $check );
return 0 unless $got =~ s/^$check//;
}
return length $got == 0;
}
##
# a complaint message about the inputs not matching (to be
# used for debugging messages)
sub complaint {
my $self = shift;
my $type = $self->type;
my $got = $self->got;
my $wanted = join "\n", @{ $self->wanted };
# are we running in colour mode?
if(Test::Builder::Tester::color) {
# get color
eval { require Term::ANSIColor };
unless($@) {
# colours
my $green = Term::ANSIColor::color("black") . Term::ANSIColor::color
("on_green");
my $red = Term::ANSIColor::color("black") . Term::ANSIColor::color
("on_red");
my $reset = Term::ANSIColor::color("reset");
# work out where the two strings start to differ
my $char = 0;
$char++ while substr( $got, $char, 1 ) eq substr( $wanted, $char, 1
);
# get the start string and the two end strings
my $start = $green . substr( $wanted, 0, $char );
my $gotend = $red . substr( $got, $char ) . $reset;
my $wantedend = $red . substr( $wanted, $char ) . $reset;
# make the start turn green on and off
$start =~ s/\n/$reset\n$green/g;
# make the ends turn red on and off
$gotend =~ s/\n/$reset\n$red/g;
$wantedend =~ s/\n/$reset\n$red/g;
# rebuild the strings
$got = $start . $gotend;
$wanted = $start . $wantedend;
}
}
return "$type is:\n" . "$got\nnot:\n$wanted\nas expected";
}
##
# forget all expected and got data
sub reset {
my $self = shift;
%$self = (
type => $self->{type},
got => '',
wanted => [],
);
}
sub got {
my $self = shift;
return $self->{got};
}
sub wanted {
my $self = shift;
return $self->{wanted};
}
sub type {
my $self = shift;
return $self->{type};
}
###
# tie interface
###
sub PRINT {
my $self = shift;
$self->{got} .= join '', @_;
}
sub TIEHANDLE {
my( $class, $type ) = @_;
my $self = bless { type => $type }, $class;
$self->reset;
return $self;
}
sub READ { }
sub READLINE { }
sub GETC { }
sub FILENO { }
1;
package Text::Abbrev;
require 5.005; # Probably works on earlier versions too.
require Exporter;
our $VERSION = '1.01';
=head1 NAME
abbrev - create an abbreviation table from a list
=head1 SYNOPSIS
use Text::Abbrev;
abbrev $hashref, LIST
=head1 DESCRIPTION
Stores all unambiguous truncations of each element of LIST
as keys in the associative array referenced by C<$hashref>.
The values are the original list elements.
=head1 EXAMPLE
$hashref = abbrev qw(list edit send abort gripe);
%hash = abbrev qw(list edit send abort gripe);
abbrev $hashref, qw(list edit send abort gripe);
abbrev(*hash, qw(list edit send abort gripe));
=cut
@ISA = qw(Exporter);
@EXPORT = qw(abbrev);
# Usage:
# abbrev \%foo, LIST;
# ...
# $long = $foo{$short};
sub abbrev {
my ($word, $hashref, $glob, %table, $returnvoid);
@_ or return; # So we don't autovivify onto @_ and trigger warning
if (ref($_[0])) { # hash reference preferably
$hashref = shift;
$returnvoid = 1;
} elsif (ref \$_[0] eq 'GLOB') { # is actually a glob (deprecated)
$hashref = \%{shift()};
$returnvoid = 1;
}
%{$hashref} = ();
WORD: foreach $word (@_) {
for (my $len = (length $word) - 1; $len > 0; --$len) {
my $abbrev = substr($word,0,$len);
my $seen = ++$table{$abbrev};
if ($seen == 1) { # We're the first word so far to have
# this abbreviation.
$hashref->{$abbrev} = $word;
} elsif ($seen == 2) { # We're the second word to have this
# abbreviation, so we can't use it.
delete $hashref->{$abbrev};
} else { # We're the third word to have this
# abbreviation, so skip to the next word.
next WORD;
}
}
}
# Non-abbreviations always get entered, even if they aren't unique
foreach $word (@_) {
$hashref->{$word} = $word;
}
return if $returnvoid;
if (wantarray) {
%{$hashref};
} else {
$hashref;
}
}
1;
# EXTRACT VARIOUSLY DELIMITED TEXT SEQUENCES FROM STRINGS.
# FOR FULL DOCUMENTATION SEE Balanced.pod
use 5.005;
use strict;
package Text::Balanced;
use Exporter;
use SelfLoader;
use vars qw { $VERSION @ISA %EXPORT_TAGS };
use version; $VERSION = qv('2.0.0');
@ISA = qw ( Exporter );

%EXPORT_TAGS = ( ALL => [ qw(
&extract_delimited
&extract_bracketed
&extract_quotelike
&extract_codeblock
&extract_variable
&extract_tagged
&extract_multiple
&gen_delimited_pat
&gen_extract_tagged
&delimited_pat
) ] );
Exporter::export_ok_tags('ALL');
# PROTOTYPES
sub _match_bracketed($$$$$$);
sub _match_variable($$);
sub _match_codeblock($$$$$$$);
sub _match_quotelike($$$$);
# HANDLE RETURN VALUES IN VARIOUS CONTEXTS
sub _failmsg {
my ($message, $pos) = @_;
$@ = bless { error=>$message, pos=>$pos }, "Text::Balanced::ErrorMsg";
}
sub _fail
{
my ($wantarray, $textref, $message, $pos) = @_;
_failmsg $message, $pos if $message;
return (undef,$$textref,undef) if $wantarray;
return undef;
}
sub _succeed
{
$@ = undef;
my ($wantarray,$textref) = splice @_, 0, 2;
my ($extrapos, $extralen) = @_>18 ? splice(@_, -2, 2) : (0,0);
my ($startlen, $oppos) = @_[5,6];
my $remainderpos = $_[2];
if ($wantarray)
{
my @res;
while (my ($from, $len) = splice @_, 0, 2)
{
push @res, substr($$textref,$from,$len);
}
if ($extralen) { # CORRECT FILLET
my $extra = substr($res[0], $extrapos-$oppos, $extralen,
"\n");
$res[1] = "$extra$res[1]";
eval { substr($$textref,$remainderpos,0) = $extra;
substr($$textref,$extrapos,$extralen,"\n")} ;
#REARRANGE HERE DOC AND FILLET IF POSSIBLE
pos($$textref) = $remainderpos-$extralen+1; # RESET \G
}
else {
pos($$textref) = $remainderpos; # RESET \G
}
return @res;
}
else
{
my $match = substr($$textref,$_[0],$_[1]);
substr($match,$extrapos-$_[0]-$startlen,$extralen,"") if $extral
en;
my $extra = $extralen
? substr($$textref, $extrapos, $extralen)."\n" : "";
eval {substr($$textref,$_[4],$_[1]+$_[5])=$extra} ; #CHOP OU
T PREFIX & MATCH, IF POSSIBLE
pos($$textref) = $_[4]; # RESET \G
return $match;
}
}
# BUILD A PATTERN MATCHING A SIMPLE DELIMITED STRING
sub gen_delimited_pat($;$) # ($delimiters;$escapes)
{
my ($dels, $escs) = @_;
return "" unless $dels =~ /\S/;
$escs = '\\' unless $escs;
$escs .= substr($escs,-1) x (length($dels)-length($escs));
my @pat = ();
my $i;
for ($i=0; $i<length $dels; $i++)
{
my $del = quotemeta substr($dels,$i,1);
my $esc = quotemeta substr($escs,$i,1);
if ($del eq $esc)
{
push @pat, "$del(?:[^$del]*(?:(?:$del$del)[^$del]*)*)$de
l";
}
else
{
push @pat, "$del(?:[^$esc$del]*(?:$esc.[^$esc$del]*)*)$d
el";
}
}
my $pat = join '|', @pat;
return "(?:$pat)";
}
*delimited_pat = \&gen_delimited_pat;
# THE EXTRACTION FUNCTIONS
sub extract_delimited (;$$$$)
{
my $textref = defined $_[0] ? \$_[0] : \$_;
my $wantarray = wantarray;
my $del = defined $_[1] ? $_[1] : qq{\'\"\`};
my $pre = defined $_[2] ? $_[2] : '\s*';
my $esc = defined $_[3] ? $_[3] : qq{\\};
my $pat = gen_delimited_pat($del, $esc);
my $startpos = pos $$textref || 0;
return _fail($wantarray, $textref, "Not a delimited pattern", 0)
unless $$textref =~ m/\G($pre)($pat)/gc;
my $prelen = length($1);
my $matchpos = $startpos+$prelen;
my $endpos = pos $$textref;
return _succeed $wantarray, $textref,
$matchpos, $endpos-$matchpos, # MATCH
$endpos, length($$textref)-$endpos, # REMAINDER
$startpos, $prelen; # PREFIX
}
sub extract_bracketed (;$$$)
{
my $textref = defined $_[0] ? \$_[0] : \$_;
my $ldel = defined $_[1] ? $_[1] : '{([<';
my $pre = defined $_[2] ? $_[2] : '\s*';
my $wantarray = wantarray;
my $qdel = "";
my $quotelike;
$ldel =~ s/'//g and $qdel .= q{'};
$ldel =~ s/"//g and $qdel .= q{"};
$ldel =~ s/`//g and $qdel .= q{`};
$ldel =~ s/q//g and $quotelike = 1;
$ldel =~ tr/[](){}<>\0-\377/[[(({{<</ds;
my $rdel = $ldel;
unless ($rdel =~ tr/[({</])}>/)
{
return _fail $wantarray, $textref,
"Did not find a suitable bracket in delimiter: \"$_
[1]\"",
0;
}
my $posbug = pos;
$ldel = join('|', map { quotemeta $_ } split('', $ldel));
$rdel = join('|', map { quotemeta $_ } split('', $rdel));
pos = $posbug;
my $startpos = pos $$textref || 0;
my @match = _match_bracketed($textref,$pre, $ldel, $qdel, $quotelike, $r
del);
return _fail ($wantarray, $textref) unless @match;
return _succeed ( $wantarray, $textref,
$match[2], $match[5]+2, # MATCH
@match[8,9], # REMAINDER
@match[0,1], # PREFIX
);
}
sub _match_bracketed($$$$$$) # $textref, $pre, $ldel, $qdel, $quotelike, $rde
l
{
my ($textref, $pre, $ldel, $qdel, $quotelike, $rdel) = @_;
my ($startpos, $ldelpos, $endpos) = (pos $$textref = pos $$textref||0);
unless ($$textref =~ m/\G$pre/gc)
{
_failmsg "Did not find prefix: /$pre/", $startpos;
return;
}
$ldelpos = pos $$textref;
unless ($$textref =~ m/\G($ldel)/gc)
{
_failmsg "Did not find opening bracket after prefix: \"$pre\"",
pos $$textref;
pos $$textref = $startpos;
return;
}
my @nesting = ( $1 );
my $textlen = length $$textref;
while (pos $$textref < $textlen)
{
next if $$textref =~ m/\G\\./gcs;
if ($$textref =~ m/\G($ldel)/gc)
{
push @nesting, $1;
}
elsif ($$textref =~ m/\G($rdel)/gc)
{
my ($found, $brackettype) = ($1, $1);
if ($#nesting < 0)
{
_failmsg "Unmatched closing bracket: \"$found\""
,
pos $$textref;
pos $$textref = $startpos;
return;
}
my $expected = pop(@nesting);
$expected =~ tr/({[</)}]>/;
if ($expected ne $brackettype)
{
_failmsg qq{Mismatched closing bracket: expected
"$expected" but found "$found"},
pos $$textref;
pos $$textref = $startpos;
return;
}
last if $#nesting < 0;
}
elsif ($qdel && $$textref =~ m/\G([$qdel])/gc)
{
$$textref =~ m/\G[^\\$1]*(?:\\.[^\\$1]*)*(\Q$1\E)/gsc an
d next;
_failmsg "Unmatched embedded quote ($1)",
pos $$textref;
pos $$textref = $startpos;
return;
}
elsif ($quotelike && _match_quotelike($textref,"",1,0))
{
next;
}
else { $$textref =~ m/\G(?:[a-zA-Z0-9]+|.)/gcs }
}
if ($#nesting>=0)
{
_failmsg "Unmatched opening bracket(s): "
. join("..",@nesting)."..",
pos $$textref;
pos $$textref = $startpos;
return;
}
$endpos = pos $$textref;
return (
$startpos, $ldelpos-$startpos, # PREFIX
$ldelpos, 1, # OPENING BRACKET
$ldelpos+1, $endpos-$ldelpos-2, # CONTENTS
$endpos-1, 1, # CLOSING BRACKET
$endpos, length($$textref)-$endpos, # REMAINDER
);
}
sub _revbracket($)
{
my $brack = reverse $_[0];
$brack =~ tr/[({</])}>/;
return $brack;
}
my $XMLNAME = q{[a-zA-Z_:][a-zA-Z0-9_:.-]*};
sub extract_tagged (;$$$$$) # ($text, $opentag, $closetag, $pre, \%options)
{
my $textref = defined $_[0] ? \$_[0] : \$_;
my $ldel = $_[1];
my $rdel = $_[2];
my $pre = defined $_[3] ? $_[3] : '\s*';
my %options = defined $_[4] ? %{$_[4]} : ();
my $omode = defined $options{fail} ? $options{fail} : '';
my $bad = ref($options{reject}) eq 'ARRAY' ? join('|', @{$options{re
ject}})
: defined($options{reject}) ? $options{reject}
: ''
;
my $ignore = ref($options{ignore}) eq 'ARRAY' ? join('|', @{$options{ig
nore}})
: defined($options{ignore}) ? $options{ignore}
: ''
;
if (!defined $ldel) { $ldel = '<\w+(?:' . gen_delimited_pat(q{'"}) . '|[
^>])*>'; }
$@ = undef;
my @match = _match_tagged($textref, $pre, $ldel, $rdel, $omode, $bad, $i
gnore);
return _fail(wantarray, $textref) unless @match;
return _succeed wantarray, $textref,
$match[2], $match[3]+$match[5]+$match[7], # MATCH
@match[8..9,0..1,2..7]; # REM, P
RE, BITS
}
sub _match_tagged # ($$$$$$$)
{
my ($textref, $pre, $ldel, $rdel, $omode, $bad, $ignore) = @_;
my $rdelspec;
my ($startpos, $opentagpos, $textpos, $parapos, $closetagpos, $endpos) =
( pos($$textref) = pos($$textref)||0 );
unless ($$textref =~ m/\G($pre)/gc)
{
_failmsg "Did not find prefix: /$pre/", pos $$textref;
goto failed;
}
$opentagpos = pos($$textref);
unless ($$textref =~ m/\G$ldel/gc)
{
_failmsg "Did not find opening tag: /$ldel/", pos $$textref;
goto failed;
}
$textpos = pos($$textref);
if (!defined $rdel)
{
$rdelspec = substr($$textref, $-[0], $+[0] - $-[0]);
unless ($rdelspec =~ s/\A([[(<{]+)($XMLNAME).*/ quotemeta "$1\/$
2". _revbracket($1) /oes)
{
_failmsg "Unable to construct closing tag to match: $rde
l",
pos $$textref;
goto failed;
}
}
else
{
$rdelspec = eval "qq{$rdel}" || do {
my $del;
for (qw,~ ! ^ & * ) _ + - = } ] : " ; ' > . ? / | ',)
{ next if $rdel =~ /\Q$_/; $del = $_; last }
unless ($del) {
use Carp;
croak "Can't interpolate right delimiter $rdel"
}
eval "qq$del$rdel$del";
};
}
while (pos($$textref) < length($$textref))
{
next if $$textref =~ m/\G\\./gc;
if ($$textref =~ m/\G(\n[ \t]*\n)/gc )
{
$parapos = pos($$textref) - length($1)
unless defined $parapos;
}
elsif ($$textref =~ m/\G($rdelspec)/gc )
{
$closetagpos = pos($$textref)-length($1);
goto matched;
}
elsif ($ignore && $$textref =~ m/\G(?:$ignore)/gc)
{
next;
}
elsif ($bad && $$textref =~ m/\G($bad)/gcs)
{
pos($$textref) -= length($1); # CUT OFF WHATEVER CAUSE
D THE SHORTNESS
goto short if ($omode eq 'PARA' || $omode eq 'MAX');
_failmsg "Found invalid nested tag: $1", pos $$textref;
goto failed;
}
elsif ($$textref =~ m/\G($ldel)/gc)
{
my $tag = $1;
pos($$textref) -= length($tag); # REWIND TO NESTED TAG
unless (_match_tagged(@_)) # MATCH NESTED TAG
{
goto short if $omode eq 'PARA' || $omode eq 'MAX
';
_failmsg "Found unbalanced nested tag: $tag",
pos $$textref;
goto failed;
}
}
else { $$textref =~ m/./gcs }
}
short:
$closetagpos = pos($$textref);
goto matched if $omode eq 'MAX';
goto failed unless $omode eq 'PARA';
if (defined $parapos) { pos($$textref) = $parapos }
else { $parapos = pos($$textref) }
return (
$startpos, $opentagpos-$startpos, # PREFIX
$opentagpos, $textpos-$opentagpos, # OPENING TAG
$textpos, $parapos-$textpos, # TEXT
$parapos, 0, # NO CLOSING TAG
$parapos, length($$textref)-$parapos, # REMAINDER
);
matched:
$endpos = pos($$textref);
return (
$startpos, $opentagpos-$startpos, # PREFIX
$opentagpos, $textpos-$opentagpos, # OPENING TAG
$textpos, $closetagpos-$textpos, # TEXT
$closetagpos, $endpos-$closetagpos, # CLOSING TAG
$endpos, length($$textref)-$endpos, # REMAINDER
);
failed:
_failmsg "Did not find closing tag", pos $$textref unless $@;
pos($$textref) = $startpos;
return;
}
sub extract_variable (;$$)
{
my $textref = defined $_[0] ? \$_[0] : \$_;
return ("","","") unless defined $$textref;
my $pre = defined $_[1] ? $_[1] : '\s*';
my @match = _match_variable($textref,$pre);
return _fail wantarray, $textref unless @match;
return _succeed wantarray, $textref,
@match[2..3,4..5,0..1]; # MATCH, REMAINDER, PREF
IX
}
sub _match_variable($$)
{
# $#
# $^
# $$
my ($textref, $pre) = @_;
my $startpos = pos($$textref) = pos($$textref)||0;
unless ($$textref =~ m/\G($pre)/gc)
{
_failmsg "Did not find prefix: /$pre/", pos $$textref;
return;
}
my $varpos = pos($$textref);
unless ($$textref =~ m{\G\$\s*(?!::)(\d+|[][&`'+*./|,";%=~:?!\@<>()-]|\^
[a-z]?)}gci)
{
unless ($$textref =~ m/\G((\$#?|[*\@\%]|\\&)+)/gc)
{
_failmsg "Did not find leading dereferencer", pos $$textref;
pos $$textref = $startpos;
return;
}
my $deref = $1;
unless ($$textref =~ m/\G\s*(?:::|')?(?:[_a-z]\w*(?:::|'))*[_a-z]\w*
/gci
or _match_codeblock($textref, "", '\{', '\}', '\{', '\}', 0)
or $deref eq '$#' or $deref eq '$$' )
{
_failmsg "Bad identifier after dereferencer", pos $$textref;
pos $$textref = $startpos;
return;
}
}
while (1)
{
next if $$textref =~ m/\G\s*(?:->)?\s*[{]\w+[}]/gc;
next if _match_codeblock($textref,
qr/\s*->\s*(?:[_a-zA-Z]\w+\s*)?/,
qr/[({[]/, qr/[)}\]]/,
qr/[({[]/, qr/[)}\]]/, 0);
next if _match_codeblock($textref,
qr/\s*/, qr/[{[]/, qr/[}\]]/,
qr/[{[]/, qr/[}\]]/, 0);
next if _match_variable($textref,'\s*->\s*');
next if $$textref =~ m/\G\s*->\s*\w+(?![{([])/gc;
last;
}
my $endpos = pos($$textref);
return ($startpos, $varpos-$startpos,
$varpos, $endpos-$varpos,
$endpos, length($$textref)-$endpos
);
}
sub extract_codeblock (;$$$$$)
{
my $textref = defined $_[0] ? \$_[0] : \$_;
my $wantarray = wantarray;
my $ldel_inner = defined $_[1] ? $_[1] : '{';
my $pre = defined $_[2] ? $_[2] : '\s*';
my $ldel_outer = defined $_[3] ? $_[3] : $ldel_inner;
my $rd = $_[4];
my $rdel_inner = $ldel_inner;
my $rdel_outer = $ldel_outer;
my $posbug = pos;
for ($ldel_inner, $ldel_outer) { tr/[]()<>{}\0-\377/[[((<<{{/ds }
for ($rdel_inner, $rdel_outer) { tr/[]()<>{}\0-\377/]]))>>}}/ds }
for ($ldel_inner, $ldel_outer, $rdel_inner, $rdel_outer)
{
$_ = '('.join('|',map { quotemeta $_ } split('',$_)).')'
}
pos = $posbug;
my @match = _match_codeblock($textref, $pre,
$ldel_outer, $rdel_outer,
$ldel_inner, $rdel_inner,
$rd);
return _fail($wantarray, $textref) unless @match;
return _succeed($wantarray, $textref,
@match[2..3,4..5,0..1] # MATCH, REMAINDER, PREFIX
);
}
sub _match_codeblock($$$$$$$)
{
my ($textref, $pre, $ldel_outer, $rdel_outer, $ldel_inner, $rdel_inner,
$rd) = @_;
my $startpos = pos($$textref) = pos($$textref) || 0;
unless ($$textref =~ m/\G($pre)/gc)
{
_failmsg qq{Did not match prefix /$pre/ at"} .
substr($$textref,pos($$textref),20) .
q{..."},
pos $$textref;
return;
}
my $codepos = pos($$textref);
unless ($$textref =~ m/\G($ldel_outer)/gc) # OUTERMOST DELIMITER
{
_failmsg qq{Did not find expected opening bracket at "} .
substr($$textref,pos($$textref),20) .
q{..."},
pos $$textref;
pos $$textref = $startpos;
return;
}
my $closing = $1;
$closing =~ tr/([<{/)]>}/;
my $matched;
my $patvalid = 1;
while (pos($$textref) < length($$textref))
{
$matched = '';
if ($rd && $$textref =~ m#\G(\Q(?)\E|\Q(s?)\E|\Q(s)\E)#gc)
{
$patvalid = 0;
next;
}
if ($$textref =~ m/\G\s*#.*/gc)
{
next;
}
if ($$textref =~ m/\G\s*($rdel_outer)/gc)
{
unless ($matched = ($closing && $1 eq $closing) )
{
next if $1 eq '>'; # MIGHT BE A "LESS THAN"
_failmsg q{Mismatched closing bracket at "} .
substr($$textref,pos($$textref),20)
.
qq{...". Expected '$closing'},
pos $$textref;
}
last;
}
if (_match_variable($textref,'\s*') ||
_match_quotelike($textref,'\s*',$patvalid,$patvalid) )
{
$patvalid = 0;
next;
}
# NEED TO COVER MANY MORE CASES HERE!!!
if ($$textref =~ m#\G\s*(?!$ldel_inner)
( [-+*x/%^&|.]=?
| [!=]~
| =(?!>)
| (\*\*|&&|\|\||<<|>>)=?
| split|grep|map|return
| [([]
)#gcx)
{
$patvalid = 1;
next;
}
if ( _match_codeblock($textref, '\s*', $ldel_inner, $rdel_inner,
$ldel_inner, $rdel_inner, $rd) )
{
$patvalid = 1;
next;
}
if ($$textref =~ m/\G\s*$ldel_outer/gc)
{
_failmsg q{Improperly nested codeblock at "} .
substr($$textref,pos($$textref),20) .
q{..."},
pos $$textref;
last;
}
$patvalid = 0;
$$textref =~ m/\G\s*(\w+|[-=>]>|.|\Z)/gc;
}
continue { $@ = undef }
unless ($matched)
{
_failmsg 'No match found for opening bracket', pos $$textref
unless $@;
return;
}
my $endpos = pos($$textref);
return ( $startpos, $codepos-$startpos,
$codepos, $endpos-$codepos,
$endpos, length($$textref)-$endpos,
);
}
my %mods = (
'none' => '[cgimsox]*',
'm' => '[cgimsox]*',
's' => '[cegimsox]*',
'tr' => '[cds]*',
'y' => '[cds]*',
'qq' => '',
'qx' => '',
'qw' => '',
'qr' => '[imsx]*',
'q' => '',
);
sub extract_quotelike (;$$)
{
my $textref = $_[0] ? \$_[0] : \$_;
my $wantarray = wantarray;
my $pre = defined $_[1] ? $_[1] : '\s*';
my @match = _match_quotelike($textref,$pre,1,0);
return _fail($wantarray, $textref) unless @match;
return _succeed($wantarray, $textref,
$match[2], $match[18]-$match[2], # MATCH
@match[18,19], # REMAINDER
@match[0,1], # PREFIX
@match[2..17], # THE BITS
@match[20,21], # ANY FILLET?
);
};
sub _match_quotelike($$$$) # ($textref, $prepat, $allow_raw_match)
{
my ($textref, $pre, $rawmatch, $qmark) = @_;
my ($textlen,$startpos,
$oppos,
$preld1pos,$ld1pos,$str1pos,$rd1pos,
$preld2pos,$ld2pos,$str2pos,$rd2pos,
$modpos) = ( length($$textref), pos($$textref) = pos($$textref) || 0
);
unless ($$textref =~ m/\G($pre)/gc)
{
_failmsg qq{Did not find prefix /$pre/ at "} .
substr($$textref, pos($$textref), 20) .
q{..."},
pos $$textref;
return;
}
$oppos = pos($$textref);
my $initial = substr($$textref,$oppos,1);
if ($initial && $initial =~ m|^[\"\'\`]|
|| $rawmatch && $initial =~ m|^/|
|| $qmark && $initial =~ m|^\?|)
{
unless ($$textref =~ m/ \Q$initial\E [^\\$initial]* (\\.[^\\$ini
tial]*)* \Q$initial\E /gcsx)
{
_failmsg qq{Did not find closing delimiter to match '$in
itial' at "} .
substr($$textref, $oppos, 20) .
q{..."},
pos $$textref;
pos $$textref = $startpos;
return;
}
$modpos= pos($$textref);
$rd1pos = $modpos-1;
if ($initial eq '/' || $initial eq '?')
{
$$textref =~ m/\G$mods{none}/gc
}
my $endpos = pos($$textref);
return (
$startpos, $oppos-$startpos, # PREFIX
$oppos, 0, # NO OPERATOR
$oppos, 1, # LEFT DEL
$oppos+1, $rd1pos-$oppos-1, # STR/PAT
$rd1pos, 1, # RIGHT DEL
$modpos, 0, # NO 2ND LDEL
$modpos, 0, # NO 2ND STR
$modpos, 0, # NO 2ND RDEL
$modpos, $endpos-$modpos, # MODIFIERS
$endpos, $textlen-$endpos, # REMAINDER
);
}
unless ($$textref =~ m{\G(\b(?:m|s|qq|qx|qw|q|qr|tr|y)\b(?=\s*\S)|<<)}gc
)
{
_failmsg q{No quotelike operator found after prefix at "} .
substr($$textref, pos($$textref), 20) .
q{..."},
pos $$textref;
pos $$textref = $startpos;
return;
}
my $op = $1;
$preld1pos = pos($$textref);
if ($op eq '<<') {
$ld1pos = pos($$textref);
my $label;
if ($$textref =~ m{\G([A-Za-z_]\w*)}gc) {
$label = $1;
}
elsif ($$textref =~ m{ \G ' ([^'\\]* (?:\\.[^'\\]*)*) '
| \G " ([^"\\]* (?:\\.[^"\\]*)*) "
| \G ` ([^`\\]* (?:\\.[^`\\]*)*) `
}gcsx) {
$label = $+;
}
else {
$label = "";
}
my $extrapos = pos($$textref);
$$textref =~ m{.*\n}gc;
$str1pos = pos($$textref)--;
unless ($$textref =~ m{.*?\n(?=\Q$label\E\n)}gc) {
_failmsg qq{Missing here doc terminator ('$label') after
"} .
substr($$textref, $startpos, 20) .
q{..."},
pos $$textref;
pos $$textref = $startpos;
return;
}
$rd1pos = pos($$textref);
$$textref =~ m{\Q$label\E\n}gc;
$ld2pos = pos($$textref);
return (
$startpos, $oppos-$startpos, # PREFIX
$oppos, length($op), # OPERATOR
$ld1pos, $extrapos-$ld1pos, # LEFT DEL
$str1pos, $rd1pos-$str1pos, # STR/PAT
$rd1pos, $ld2pos-$rd1pos, # RIGHT DEL
$ld2pos, 0, # NO 2ND LDEL
$ld2pos, 0, # NO 2ND STR
$ld2pos, 0, # NO 2ND RDEL
$ld2pos, 0, # NO MODIFIERS
$ld2pos, $textlen-$ld2pos, # REMAINDER
$extrapos, $str1pos-$extrapos, # FILLETED BIT
);
}
$$textref =~ m/\G\s*/gc;
$ld1pos = pos($$textref);
$str1pos = $ld1pos+1;
unless ($$textref =~ m/\G(\S)/gc) # SHOULD USE LOOKAHEAD
{
_failmsg "No block delimiter found after quotelike $op",
pos $$textref;
pos $$textref = $startpos;
return;
}
pos($$textref) = $ld1pos; # HAVE TO DO THIS BECAUSE LOOKAHEAD BROK
EN
my ($ldel1, $rdel1) = ("\Q$1","\Q$1");
if ($ldel1 =~ /[[(<{]/)
{
$rdel1 =~ tr/[({</])}>/;
defined(_match_bracketed($textref,"",$ldel1,"","",$rdel1))
|| do { pos $$textref = $startpos; return };
$ld2pos = pos($$textref);
$rd1pos = $ld2pos-1;
}
else
{
$$textref =~ /\G$ldel1[^\\$ldel1]*(\\.[^\\$ldel1]*)*$ldel1/gcs
|| do { pos $$textref = $startpos; return };
$ld2pos = $rd1pos = pos($$textref)-1;
}
my $second_arg = $op =~ /s|tr|y/ ? 1 : 0;
if ($second_arg)
{
my ($ldel2, $rdel2);
if ($ldel1 =~ /[[(<{]/)
{
unless ($$textref =~ /\G\s*(\S)/gc) # SHOULD USE LOO
KAHEAD
{
_failmsg "Missing second block for quotelike $op
",
pos $$textref;
pos $$textref = $startpos;
return;
}
$ldel2 = $rdel2 = "\Q$1";
$rdel2 =~ tr/[({</])}>/;
}
else
{
$ldel2 = $rdel2 = $ldel1;
}
$str2pos = $ld2pos+1;
if ($ldel2 =~ /[[(<{]/)
{
pos($$textref)--; # OVERCOME BROKEN LOOKAHEAD
defined(_match_bracketed($textref,"",$ldel2,"","",$rdel2
))
|| do { pos $$textref = $startpos; return };
}
else
{
$$textref =~ /[^\\$ldel2]*(\\.[^\\$ldel2]*)*$ldel2/gcs
|| do { pos $$textref = $startpos; return };
}
$rd2pos = pos($$textref)-1;
}
else
{
$ld2pos = $str2pos = $rd2pos = $rd1pos;
}
$modpos = pos $$textref;
$$textref =~ m/\G($mods{$op})/gc;
my $endpos = pos $$textref;
return (
$startpos, $oppos-$startpos, # PREFIX
$oppos, length($op), # OPERATOR
$ld1pos, 1, # LEFT DEL
$str1pos, $rd1pos-$str1pos, # STR/PAT
$rd1pos, 1, # RIGHT DEL
$ld2pos, $second_arg, # 2ND LDEL (MAYBE)
$str2pos, $rd2pos-$str2pos, # 2ND STR (MAYBE)
$rd2pos, $second_arg, # 2ND RDEL (MAYBE)
$modpos, $endpos-$modpos, # MODIFIERS
$endpos, $textlen-$endpos, # REMAINDER
);
}
my $def_func =
[
sub { extract_variable($_[0], '') },
sub { extract_quotelike($_[0],'') },
sub { extract_codeblock($_[0],'{}','') },
];
sub extract_multiple (;$$$$) # ($text, $functions_ref, $max_fields, $ignoreun
known)
{
my $textref = defined($_[0]) ? \$_[0] : \$_;
my $posbug = pos;
my ($lastpos, $firstpos);
my @fields = ();
#for ($$textref)
{
my @func = defined $_[1] ? @{$_[1]} : @{$def_func};
my $max = defined $_[2] && $_[2]>0 ? $_[2] : 1_000_000_000;
my $igunk = $_[3];
pos $$textref ||= 0;
unless (wantarray)
{
use Carp;
carp "extract_multiple reset maximal count to 1 in scala
r context"
if $^W && defined($_[2]) && $max > 1;
$max = 1
}
my $unkpos;
my $func;
my $class;
my @class;
foreach $func ( @func )
{
if (ref($func) eq 'HASH')
{
push @class, (keys %$func)[0];
$func = (values %$func)[0];
}
else
{
push @class, undef;
}
}
FIELD: while (pos($$textref) < length($$textref))
{
my ($field, $rem);
my @bits;
foreach my $i ( 0..$#func )
{
my $pref;
$func = $func[$i];
$class = $class[$i];
$lastpos = pos $$textref;
if (ref($func) eq 'CODE')
{ ($field,$rem,$pref) = @bits = $func->(
$$textref) }
elsif (ref($func) eq 'Text::Balanced::Extractor'
)
{ @bits = $field = $func->extract($$text
ref) }
elsif( $$textref =~ m/\G$func/gc )
{ @bits = $field = defined($1)
? $1
: substr($$textref, $-[0], $+[0] - $-[0])
}
$pref ||= "";
if (defined($field) && length($field))
{
if (!$igunk) {
$unkpos = $lastpos
if length($pref) && !def
ined($unkpos);
if (defined $unkpos)
{
push @fields, substr($$t
extref, $unkpos, $lastpos-$unkpos).$pref;
$firstpos = $unkpos unle
ss defined $firstpos;
undef $unkpos;
last FIELD if @fields ==
$max;
}
}
push @fields, $class
? bless (\$field, $class)
: $field;
$firstpos = $lastpos unless defined $fir
stpos;
$lastpos = pos $$textref;
last FIELD if @fields == $max;
next FIELD;
}
}
if ($$textref =~ /\G(.)/gcs)
{
$unkpos = pos($$textref)-1
unless $igunk || defined $unkpos;
}
}
if (defined $unkpos)
{
push @fields, substr($$textref, $unkpos);
$firstpos = $unkpos unless defined $firstpos;
$lastpos = length $$textref;
}
last;
}
pos $$textref = $lastpos;
return @fields if wantarray;
$firstpos ||= 0;
eval { substr($$textref,$firstpos,$lastpos-$firstpos)="";
pos $$textref = $firstpos };
return $fields[0];
}
sub gen_extract_tagged # ($opentag, $closetag, $pre, \%options)
{
my $ldel = $_[0];
my $rdel = $_[1];
my $pre = defined $_[2] ? $_[2] : '\s*';
my %options = defined $_[3] ? %{$_[3]} : ();
my $omode = defined $options{fail} ? $options{fail} : '';
my $bad = ref($options{reject}) eq 'ARRAY' ? join('|', @{$options{re
ject}})
: defined($options{reject}) ? $options{reject}
: ''
;
my $ignore = ref($options{ignore}) eq 'ARRAY' ? join('|', @{$options{ig
nore}})
: defined($options{ignore}) ? $options{ignore}
: ''
;
if (!defined $ldel) { $ldel = '<\w+(?:' . gen_delimited_pat(q{'"}) . '|[
^>])*>'; }
my $posbug = pos;
for ($ldel, $pre, $bad, $ignore) { $_ = qr/$_/ if $_ }
pos = $posbug;
my $closure = sub
{
my $textref = defined $_[0] ? \$_[0] : \$_;
my @match = Text::Balanced::_match_tagged($textref, $pre, $ldel,
$rdel, $omode, $bad, $ignore);
return _fail(wantarray, $textref) unless @match;
return _succeed wantarray, $textref,
$match[2], $match[3]+$match[5]+$match[7],
# MATCH
@match[8..9,0..1,2..7];
# REM, PRE, BITS
};
bless $closure, 'Text::Balanced::Extractor';
}
package Text::Balanced::Extractor;
sub extract($$) # ($self, $text)
{
&{$_[0]}($_[1]);
}
package Text::Balanced::ErrorMsg;
use overload '""' => sub { "$_[0]->{error}, detected at offset $_[0]->{pos}" };
1;
__END__
=head1 NAME
Text::Balanced - Extract delimited text sequences from strings.
=head1 SYNOPSIS
use Text::Balanced qw (
extract_delimited
extract_bracketed
extract_quotelike
extract_codeblock
extract_variable
extract_tagged
extract_multiple
gen_delimited_pat
gen_extract_tagged
);
# Extract the initial substring of $text that is delimited by
# two (unescaped) instances of the first character in $delim.
($extracted, $remainder) = extract_delimited($text,$delim);
# Extract the initial substring of $text that is bracketed
# with a delimiter(s) specified by $delim (where the string
# in $delim contains one or more of '(){}[]<>').
($extracted, $remainder) = extract_bracketed($text,$delim);
# Extract the initial substring of $text that is bounded by
# an XML tag.
($extracted, $remainder) = extract_tagged($text);
# Extract the initial substring of $text that is bounded by
# a C<BEGIN>...C<END> pair. Don't allow nested C<BEGIN> tags
($extracted, $remainder) =
extract_tagged($text,"BEGIN","END",undef,{bad=>["BEGIN"]});
# Extract the initial substring of $text that represents a
# Perl "quote or quote-like operation"
($extracted, $remainder) = extract_quotelike($text);
# Extract the initial substring of $text that represents a block
# of Perl code, bracketed by any of character(s) specified by $delim
# (where the string $delim contains one or more of '(){}[]<>').
($extracted, $remainder) = extract_codeblock($text,$delim);
# Extract the initial substrings of $text that would be extracted by
# one or more sequential applications of the specified functions
# or regular expressions
@extracted = extract_multiple($text,
[ \&extract_bracketed,
\&extract_quotelike,
\&some_other_extractor_sub,
qr/[xyz]*/,
'literal',
]);
# Create a string representing an optimized pattern (a la Friedl)
# that matches a substring delimited by any of the specified characters
# (in this case: any type of quote or a slash)
$patstring = gen_delimited_pat(q{'"`/});
# Generate a reference to an anonymous sub that is just like extract_tagged
# but pre-compiled and optimized for a specific pair of tags, and consequently
# much faster (i.e. 3 times faster). It uses qr// for better performance on
# repeated calls, so it only works under Perl 5.005 or later.
$extract_head = gen_extract_tagged('<HEAD>','</HEAD>');
($extracted, $remainder) = $extract_head->($text);
=head1 DESCRIPTION
The various C<extract_...> subroutines may be used to
extract a delimited substring, possibly after skipping a
specified prefix string. By default, that prefix is
optional whitespace (C</\s*/>), but you can change it to whatever
you wish (see below).
The substring to be extracted must appear at the
current C<pos> location of the string's variable
(or at index zero, if no C<pos> position is defined).
In other words, the C<extract_...> subroutines I<don't>
extract the first occurrence of a substring anywhere
in a string (like an unanchored regex would). Rather,
they extract an occurrence of the substring appearing
immediately at the current matching position in the
string (like a C<\G>-anchored regex would).
=head2 General behaviour in list contexts
In a list context, all the subroutines return a list, the first three
elements of which are always:
=over 4
=item [0]
The extracted string, including the specified delimiters.
If the extraction fails C<undef> is returned.
=item [1]
The remainder of the input string (i.e. the characters after the
extracted string). On failure, the entire string is returned.
=item [2]
The skipped prefix (i.e. the characters before the extracted string).
On failure, C<undef> is returned.
=back
Note that in a list context, the contents of the original input text (the first
argument) are not modified in any way.
However, if the input text was passed in a variable, that variable's
C<pos> value is updated to point at the first character after the
extracted text. That means that in a list context the various
subroutines can be used much like regular expressions. For example:
while ( $next = (extract_quotelike($text))[0] )
{
# process next quote-like (in $next)
}
=head2 General behaviour in scalar and void contexts
In a scalar context, the extracted string is returned, having first been
removed from the input text. Thus, the following code also processes
each quote-like operation, but actually removes them from $text:
while ( $next = extract_quotelike($text) )
{
# process next quote-like (in $next)
}
Note that if the input text is a read-only string (i.e. a literal),
no attempt is made to remove the extracted text.
In a void context the behaviour of the extraction subroutines is
exactly the same as in a scalar context, except (of course) that the
extracted substring is not returned.
=head2 A note about prefixes
Prefix patterns are matched without any trailing modifiers (C</gimsox> etc.)
This can bite you if you're expecting a prefix specification like
'.*?(?=<H1>)' to skip everything up to the first <H1> tag. Such a prefix
pattern will only succeed if the <H1> tag is on the current line, since
. normally doesn't match newlines.
To overcome this limitation, you need to turn on /s matching within
the prefix pattern, using the C<(?s)> directive: '(?s).*?(?=<H1>)'
=head2 C<extract_delimited>
The C<extract_delimited> function formalizes the common idiom
of extracting a single-character-delimited substring from the start of
a string. For example, to extract a single-quote delimited string, the
following code is typically used:
($remainder = $text) =~ s/\A('(\\.|[^'])*')//s;
$extracted = $1;
but with C<extract_delimited> it can be simplified to:
($extracted,$remainder) = extract_delimited($text, "'");
C<extract_delimited> takes up to four scalars (the input text, the
delimiters, a prefix pattern to be skipped, and any escape characters)
and extracts the initial substring of the text that
is appropriately delimited. If the delimiter string has multiple
characters, the first one encountered in the text is taken to delimit
the substring.
The third argument specifies a prefix pattern that is to be skipped
(but must be present!) before the substring is extracted.
The final argument specifies the escape character to be used for each
delimiter.
All arguments are optional. If the escape characters are not specified,
every delimiter is escaped with a backslash (C<\>).
If the prefix is not specified, the
pattern C<'\s*'> - optional whitespace - is used. If the delimiter set
is also not specified, the set C</["'`]/> is used. If the text to be processed
is not specified either, C<$_> is used.
In list context, C<extract_delimited> returns a array of three
elements, the extracted substring (I<including the surrounding
delimiters>), the remainder of the text, and the skipped prefix (if
any). If a suitable delimited substring is not found, the first
element of the array is the empty string, the second is the complete
original text, and the prefix returned in the third element is an
empty string.
In a scalar context, just the extracted substring is returned. In
a void context, the extracted substring (and any prefix) are simply
removed from the beginning of the first argument.
Examples:
# Remove a single-quoted substring from the very beginning of $text:
$substring = extract_delimited($text, "'", '');
# Remove a single-quoted Pascalish substring (i.e. one in which
# doubling the quote character escapes it) from the very
# beginning of $text:
$substring = extract_delimited($text, "'", '', "'");
# Extract a single- or double- quoted substring from the
# beginning of $text, optionally after some whitespace
# (note the list context to protect $text from modification):
($substring) = extract_delimited $text, q{"'};
# Delete the substring delimited by the first '/' in $text:
$text = join '', (extract_delimited($text,'/','[^/]*')[2,1];
Note that this last example is I<not> the same as deleting the first
quote-like pattern. For instance, if C<$text> contained the string:
"if ('./cmd' =~ m/$UNIXCMD/s) { $cmd = $1; }"
then after the deletion it would contain:
"if ('.$UNIXCMD/s) { $cmd = $1; }"
not:
"if ('./cmd' =~ ms) { $cmd = $1; }"
See L<"extract_quotelike"> for a (partial) solution to this problem.
=head2 C<extract_bracketed>
Like C<"extract_delimited">, the C<extract_bracketed> function takes
up to three optional scalar arguments: a string to extract from, a delimiter
specifier, and a prefix pattern. As before, a missing prefix defaults to
optional whitespace and a missing text defaults to C<$_>. However, a missing
delimiter specifier defaults to C<'{}()[]E<lt>E<gt>'> (see below).
C<extract_bracketed> extracts a balanced-bracket-delimited
substring (using any one (or more) of the user-specified delimiter
brackets: '(..)', '{..}', '[..]', or '<..>'). Optionally it will also
respect quoted unbalanced brackets (see below).
A "delimiter bracket" is a bracket in list of delimiters passed as
C<extract_bracketed>'s second argument. Delimiter brackets are
specified by giving either the left or right (or both!) versions
of the required bracket(s). Note that the order in which
two or more delimiter brackets are specified is not significant.
A "balanced-bracket-delimited substring" is a substring bounded by
matched brackets, such that any other (left or right) delimiter
bracket I<within> the substring is also matched by an opposite
(right or left) delimiter bracket I<at the same level of nesting>. Any
type of bracket not in the delimiter list is treated as an ordinary
character.
In other words, each type of bracket specified as a delimiter must be
balanced and correctly nested within the substring, and any other kind of
("non-delimiter") bracket in the substring is ignored.
For example, given the string:
$text = "{ an '[irregularly :-(] {} parenthesized >:-)' string }";
then a call to C<extract_bracketed> in a list context:
@result = extract_bracketed( $text, '{}' );
would return:
( "{ an '[irregularly :-(] {} parenthesized >:-)' string }" , "" , "" )
since both sets of C<'{..}'> brackets are properly nested and evenly balanced.
(In a scalar context just the first element of the array would be returned. In
a void context, C<$text> would be replaced by an empty string.)
Likewise the call in:
@result = extract_bracketed( $text, '{[' );
would return the same result, since all sets of both types of specified
delimiter brackets are correctly nested and balanced.
However, the call in:
@result = extract_bracketed( $text, '{([<' );
would fail, returning:
( undef , "{ an '[irregularly :-(] {} parenthesized >:-)' string }" );
because the embedded pairs of C<'(..)'>s and C<'[..]'>s are "cross-nested" and
the embedded C<'E<gt>'> is unbalanced. (In a scalar context, this call would
return an empty string. In a void context, C<$text> would be unchanged.)
Note that the embedded single-quotes in the string don't help in this
case, since they have not been specified as acceptable delimiters and are
therefore treated as non-delimiter characters (and ignored).
However, if a particular species of quote character is included in the
delimiter specification, then that type of quote will be correctly handled.
for example, if C<$text> is:
$text = '<A HREF=">>>>">link</A>';
then
@result = extract_bracketed( $text, '<">' );
returns:
( '<A HREF=">>>>">', 'link</A>', "" )
as expected. Without the specification of C<"> as an embedded quoter:
@result = extract_bracketed( $text, '<>' );
the result would be:
( '<A HREF=">', '>>>">link</A>', "" )
In addition to the quote delimiters C<'>, C<">, and C<`>, full Perl quote-like
quoting (i.e. q{string}, qq{string}, etc) can be specified by including the
letter 'q' as a delimiter. Hence:
@result = extract_bracketed( $text, '<q>' );
would correctly match something like this:
$text = '<leftop: conj /and/ conj>';
See also: C<"extract_quotelike"> and C<"extract_codeblock">.
=head2 C<extract_variable>
C<extract_variable> extracts any valid Perl variable or
variable-involved expression, including scalars, arrays, hashes, array
accesses, hash look-ups, method calls through objects, subroutine calls
through subroutine references, etc.
The subroutine takes up to two optional arguments:
=over 4
=item 1.
A string to be processed (C<$_> if the string is omitted or C<undef>)
=item 2.
A string specifying a pattern to be matched as a prefix (which is to be
skipped). If omitted, optional whitespace is skipped.
=back
On success in a list context, an array of 3 elements is returned. The
elements are:
=over 4
=item [0]
the extracted variable, or variablish expression
=item [1]
the remainder of the input text,
=item [2]
the prefix substring (if any),
=back
On failure, all of these values (except the remaining text) are C<undef>.
In a scalar context, C<extract_variable> returns just the complete
substring that matched a variablish expression. C<undef> is returned on
failure. In addition, the original input text has the returned substring
(and any prefix) removed from it.
In a void context, the input text just has the matched substring (and
any specified prefix) removed.
=head2 C<extract_tagged>
C<extract_tagged> extracts and segments text between (balanced)
specified tags.
The subroutine takes up to five optional arguments:
=over 4
=item 1.
A string to be processed (C<$_> if the string is omitted or C<undef>)
=item 2.
A string specifying a pattern to be matched as the opening tag.
If the pattern string is omitted (or C<undef>) then a pattern
that matches any standard XML tag is used.
=item 3.
A string specifying a pattern to be matched at the closing tag.
If the pattern string is omitted (or C<undef>) then the closing
tag is constructed by inserting a C</> after any leading bracket
characters in the actual opening tag that was matched (I<not> the pattern
that matched the tag). For example, if the opening tag pattern
is specified as C<'{{\w+}}'> and actually matched the opening tag
C<"{{DATA}}">, then the constructed closing tag would be C<"{{/DATA}}">.
=item 4.
A string specifying a pattern to be matched as a prefix (which is to be
skipped). If omitted, optional whitespace is skipped.
=item 5.
A hash reference containing various parsing options (see below)
=back
The various options that can be specified are:
=over 4
=item C<reject =E<gt> $listref>
The list reference contains one or more strings specifying patterns
that must I<not> appear within the tagged text.
For example, to extract
an HTML link (which should not contain nested links) use:
extract_tagged($text, '<A>', '</A>', undef, {reject => ['<A>']} );
=item C<ignore =E<gt> $listref>
The list reference contains one or more strings specifying patterns
that are I<not> be be treated as nested tags within the tagged text
(even if they would match the start tag pattern).
For example, to extract an arbitrary XML tag, but ignore "empty" elements:
extract_tagged($text, undef, undef, undef, {ignore => ['<[^>]*/>']} );
(also see L<"gen_delimited_pat"> below).
=item C<fail =E<gt> $str>
The C<fail> option indicates the action to be taken if a matching end
tag is not encountered (i.e. before the end of the string or some
C<reject> pattern matches). By default, a failure to match a closing
tag causes C<extract_tagged> to immediately fail.
However, if the string value associated with <reject> is "MAX", then
C<extract_tagged> returns the complete text up to the point of failure.
If the string is "PARA", C<extract_tagged> returns only the first paragraph
after the tag (up to the first line that is either empty or contains
only whitespace characters).
If the string is "", the the default behaviour (i.e. failure) is reinstated.
For example, suppose the start tag "/para" introduces a paragraph, which then
continues until the next "/endpara" tag or until another "/para" tag is
encountered:
$text = "/para line 1\n\nline 3\n/para line 4";
extract_tagged($text, '/para', '/endpara', undef,
{reject => '/para', fail => MAX );
# EXTRACTED: "/para line 1\n\nline 3\n"
Suppose instead, that if no matching "/endpara" tag is found, the "/para"
tag refers only to the immediately following paragraph:
$text = "/para line 1\n\nline 3\n/para line 4";
extract_tagged($text, '/para', '/endpara', undef,
{reject => '/para', fail => MAX );
# EXTRACTED: "/para line 1\n"
Note that the specified C<fail> behaviour applies to nested tags as well.
=back
On success in a list context, an array of 6 elements is returned. The elements a
re:
=over 4
=item [0]
the extracted tagged substring (including the outermost tags),
=item [1]
the remainder of the input text,
=item [2]
the prefix substring (if any),
=item [3]
the opening tag
=item [4]
the text between the opening and closing tags
=item [5]
the closing tag (or "" if no closing tag was found)
=back
On failure, all of these values (except the remaining text) are C<undef>.
In a scalar context, C<extract_tagged> returns just the complete
substring that matched a tagged text (including the start and end
tags). C<undef> is returned on failure. In addition, the original input
text has the returned substring (and any prefix) removed from it.
In a void context, the input text just has the matched substring (and
any specified prefix) removed.
=head2 C<gen_extract_tagged>
(Note: This subroutine is only available under Perl5.005)
C<gen_extract_tagged> generates a new anonymous subroutine which
extracts text between (balanced) specified tags. In other words,
it generates a function identical in function to C<extract_tagged>.
The difference between C<extract_tagged> and the anonymous
subroutines generated by
C<gen_extract_tagged>, is that those generated subroutines:
=over 4
=item *
do not have to reparse tag specification or parsing options every time
they are called (whereas C<extract_tagged> has to effectively rebuild
its tag parser on every call);
=item *
make use of the new qr// construct to pre-compile the regexes they use
(whereas C<extract_tagged> uses standard string variable interpolation
to create tag-matching patterns).
=back
The subroutine takes up to four optional arguments (the same set as
C<extract_tagged> except for the string to be processed). It returns
a reference to a subroutine which in turn takes a single argument (the text to
be extracted from).
In other words, the implementation of C<extract_tagged> is exactly
equivalent to:
sub extract_tagged
{
my $text = shift;
$extractor = gen_extract_tagged(@_);
return $extractor->($text);
}
(although C<extract_tagged> is not currently implemented that way, in order
to preserve pre-5.005 compatibility).
Using C<gen_extract_tagged> to create extraction functions for specific tags
is a good idea if those functions are going to be called more than once, since
their performance is typically twice as good as the more general-purpose
C<extract_tagged>.
=head2 C<extract_quotelike>
C<extract_quotelike> attempts to recognize, extract, and segment any
one of the various Perl quotes and quotelike operators (see
L<perlop(3)>) Nested backslashed delimiters, embedded balanced bracket
delimiters (for the quotelike operators), and trailing modifiers are
all caught. For example, in:
extract_quotelike 'q # an octothorpe: \# (not the end of the q!) #'

extract_quotelike ' "You said, \"Use sed\"." '
extract_quotelike ' s{([A-Z]{1,8}\.[A-Z]{3})} /\L$1\E/; '
extract_quotelike ' tr/\\\/\\\\/\\\//ds; '
the full Perl quotelike operations are all extracted correctly.
Note too that, when using the /x modifier on a regex, any comment
containing the current pattern delimiter will cause the regex to be
immediately terminated. In other words:
'm /
(?i) # CASE INSENSITIVE
[a-z_] # LEADING ALPHABETIC/UNDERSCORE
[a-z0-9]* # FOLLOWED BY ANY NUMBER OF ALPHANUMERICS
/x'
will be extracted as if it were:
'm /
(?i) # CASE INSENSITIVE
[a-z_] # LEADING ALPHABETIC/'
This behaviour is identical to that of the actual compiler.
C<extract_quotelike> takes two arguments: the text to be processed and
a prefix to be matched at the very beginning of the text. If no prefix
is specified, optional whitespace is the default. If no text is given,
C<$_> is used.
In a list context, an array of 11 elements is returned. The elements are:
=over 4
=item [0]
the extracted quotelike substring (including trailing modifiers),
=item [1]
the remainder of the input text,
=item [2]
the prefix substring (if any),
=item [3]
the name of the quotelike operator (if any),
=item [4]
the left delimiter of the first block of the operation,
=item [5]
the text of the first block of the operation
(that is, the contents of
a quote, the regex of a match or substitution or the target list of a
translation),
=item [6]
the right delimiter of the first block of the operation,
=item [7]
the left delimiter of the second block of the operation
(that is, if it is a C<s>, C<tr>, or C<y>),
=item [8]
the text of the second block of the operation
(that is, the replacement of a substitution or the translation list
of a translation),
=item [9]
the right delimiter of the second block of the operation (if any),
=item [10]
the trailing modifiers on the operation (if any).
=back
For each of the fields marked "(if any)" the default value on success is
an empty string.
On failure, all of these values (except the remaining text) are C<undef>.
In a scalar context, C<extract_quotelike> returns just the complete substring
that matched a quotelike operation (or C<undef> on failure). In a scalar or
void context, the input text has the same substring (and any specified
prefix) removed.
Examples:
# Remove the first quotelike literal that appears in text
$quotelike = extract_quotelike($text,'.*?');
# Replace one or more leading whitespace-separated quotelike
# literals in $_ with "<QLL>"
do { $_ = join '<QLL>', (extract_quotelike)[2,1] } until $@;
# Isolate the search pattern in a quotelike operation from $text
($op,$pat) = (extract_quotelike $text)[3,5];
if ($op =~ /[ms]/)
{
print "search pattern: $pat\n";
}
else
{
print "$op is not a pattern matching operation\n";
}
=head2 C<extract_quotelike> and "here documents"
C<extract_quotelike> can successfully extract "here documents" from an input
string, but with an important caveat in list contexts.
Unlike other types of quote-like literals, a here document is rarely
a contiguous substring. For example, a typical piece of code using
here document might look like this:
<<'EOMSG' || die;
This is the message.
EOMSG
exit;
Given this as an input string in a scalar context, C<extract_quotelike>
would correctly return the string "<<'EOMSG'\nThis is the message.\nEOMSG",
leaving the string " || die;\nexit;" in the original variable. In other words,
the two separate pieces of the here document are successfully extracted and
concatenated.
In a list context, C<extract_quotelike> would return the list
=over 4
=item [0]
"<<'EOMSG'\nThis is the message.\nEOMSG\n" (i.e. the full extracted here documen
t,
including fore and aft delimiters),
=item [1]
" || die;\nexit;" (i.e. the remainder of the input text, concatenated),
=item [2]
"" (i.e. the prefix substring -- trivial in this case),
=item [3]
"<<" (i.e. the "name" of the quotelike operator)
=item [4]
"'EOMSG'" (i.e. the left delimiter of the here document, including any quotes),
=item [5]
"This is the message.\n" (i.e. the text of the here document),
=item [6]
"EOMSG" (i.e. the right delimiter of the here document),
=item [7..10]
"" (a here document has no second left delimiter, second text, second right
delimiter, or trailing modifiers).
=back
However, the matching position of the input variable would be set to
"exit;" (i.e. I<after> the closing delimiter of the here document),
which would cause the earlier " || die;\nexit;" to be skipped in any
sequence of code fragment extractions.
To avoid this problem, when it encounters a here document whilst
extracting from a modifiable string, C<extract_quotelike> silently
rearranges the string to an equivalent piece of Perl:
<<'EOMSG'
This is the message.
EOMSG
|| die;
exit;
in which the here document I<is> contiguous. It still leaves the
matching position after the here document, but now the rest of the line
on which the here document starts is not skipped.
To prevent <extract_quotelike> from mucking about with the input in this way
(this is the only case where a list-context C<extract_quotelike> does so),
you can pass the input variable as an interpolated literal:
$quotelike = extract_quotelike("$var");
=head2 C<extract_codeblock>
C<extract_codeblock> attempts to recognize and extract a balanced
bracket delimited substring that may contain unbalanced brackets
inside Perl quotes or quotelike operations. That is, C<extract_codeblock>
is like a combination of C<"extract_bracketed"> and
C<"extract_quotelike">.
C<extract_codeblock> takes the same initial three parameters as C<extract_bracke
ted>:
a text to process, a set of delimiter brackets to look for, and a prefix to
match first. It also takes an optional fourth parameter, which allows the
outermost delimiter brackets to be specified separately (see below).
Omitting the first argument (input text) means process C<$_> instead.
Omitting the second argument (delimiter brackets) indicates that only C<'{'> is
to be used.
Omitting the third argument (prefix argument) implies optional whitespace at the
start.
Omitting the fourth argument (outermost delimiter brackets) indicates that the
value of the second argument is to be used for the outermost delimiters.
Once the prefix an dthe outermost opening delimiter bracket have been
recognized, code blocks are extracted by stepping through the input text and
trying the following alternatives in sequence:
=over 4
=item 1.
Try and match a closing delimiter bracket. If the bracket was the same
species as the last opening bracket, return the substring to that
point. If the bracket was mismatched, return an error.
=item 2.
Try to match a quote or quotelike operator. If found, call
C<extract_quotelike> to eat it. If C<extract_quotelike> fails, return
the error it returned. Otherwise go back to step 1.
=item 3.
Try to match an opening delimiter bracket. If found, call
C<extract_codeblock> recursively to eat the embedded block. If the
recursive call fails, return an error. Otherwise, go back to step 1.
=item 4.
Unconditionally match a bareword or any other single character, and
then go back to step 1.
=back
Examples:
# Find a while loop in the text
if ($text =~ s/.*?while\s*\{/{/)
{
$loop = "while " . extract_codeblock($text);
}
# Remove the first round-bracketed list (which may include
# round- or curly-bracketed code blocks or quotelike operators)
extract_codeblock $text, "(){}", '[^(]*';
The ability to specify a different outermost delimiter bracket is useful
in some circumstances. For example, in the Parse::RecDescent module,
parser actions which are to be performed only on a successful parse
are specified using a C<E<lt>defer:...E<gt>> directive. For example:
sentence: subject verb object
<defer: {$::theVerb = $item{verb}} >
Parse::RecDescent uses C<extract_codeblock($text, '{}E<lt>E<gt>')> to extract th
e code
within the C<E<lt>defer:...E<gt>> directive, but there's a problem.
A deferred action like this:
<defer: {if ($count>10) {$count--}} >
will be incorrectly parsed as:
<defer: {if ($count>
because the "less than" operator is interpreted as a closing delimiter.
But, by extracting the directive using
S<C<extract_codeblock($text, '{}', undef, 'E<lt>E<gt>')>>
the '>' character is only treated as a delimited at the outermost
level of the code block, so the directive is parsed correctly.
=head2 C<extract_multiple>
The C<extract_multiple> subroutine takes a string to be processed and a
list of extractors (subroutines or regular expressions) to apply to that string.
In an array context C<extract_multiple> returns an array of substrings
of the original string, as extracted by the specified extractors.
In a scalar context, C<extract_multiple> returns the first
substring successfully extracted from the original string. In both
scalar and void contexts the original string has the first successfully
extracted substring removed from it. In all contexts
C<extract_multiple> starts at the current C<pos> of the string, and
sets that C<pos> appropriately after it matches.
Hence, the aim of of a call to C<extract_multiple> in a list context
is to split the processed string into as many non-overlapping fields as
possible, by repeatedly applying each of the specified extractors
to the remainder of the string. Thus C<extract_multiple> is
a generalized form of Perl's C<split> subroutine.
The subroutine takes up to four optional arguments:
=over 4
=item 1.
A string to be processed (C<$_> if the string is omitted or C<undef>)
=item 2.
A reference to a list of subroutine references and/or qr// objects and/or
literal strings and/or hash references, specifying the extractors
to be used to split the string. If this argument is omitted (or
C<undef>) the list:
[
sub { extract_variable($_[0], '') },
sub { extract_quotelike($_[0],'') },
sub { extract_codeblock($_[0],'{}','') },
]
is used.
=item 3.
An number specifying the maximum number of fields to return. If this
argument is omitted (or C<undef>), split continues as long as possible.
If the third argument is I<N>, then extraction continues until I<N> fields
have been successfully extracted, or until the string has been completely
processed.
Note that in scalar and void contexts the value of this argument is
automatically reset to 1 (under C<-w>, a warning is issued if the argument
has to be reset).
=item 4.
A value indicating whether unmatched substrings (see below) within the
text should be skipped or returned as fields. If the value is true,
such substrings are skipped. Otherwise, they are returned.
=back
The extraction process works by applying each extractor in
sequence to the text string.
If the extractor is a subroutine it is called in a list context and is
expected to return a list of a single element, namely the extracted
text. It may optionally also return two further arguments: a string
representing the text left after extraction (like $' for a pattern
match), and a string representing any prefix skipped before the
extraction (like $` in a pattern match). Note that this is designed
to facilitate the use of other Text::Balanced subroutines with
C<extract_multiple>. Note too that the value returned by an extractor
subroutine need not bear any relationship to the corresponding substring
of the original text (see examples below).
If the extractor is a precompiled regular expression or a string,
it is matched against the text in a scalar context with a leading
'\G' and the gc modifiers enabled. The extracted value is either
$1 if that variable is defined after the match, or else the
complete match (i.e. $&).
If the extractor is a hash reference, it must contain exactly one element.
The value of that element is one of the
above extractor types (subroutine reference, regular expression, or string).
The key of that element is the name of a class into which the successful
return value of the extractor will be blessed.
If an extractor returns a defined value, that value is immediately
treated as the next extracted field and pushed onto the list of fields.
If the extractor was specified in a hash reference, the field is also
blessed into the appropriate class,
If the extractor fails to match (in the case of a regex extractor), or returns a
n empty list or an undefined value (in the case of a subroutine extractor), it i
s
assumed to have failed to extract.
If none of the extractor subroutines succeeds, then one
character is extracted from the start of the text and the extraction
subroutines reapplied. Characters which are thus removed are accumulated and
eventually become the next field (unless the fourth argument is true, in which
case they are discarded).
For example, the following extracts substrings that are valid Perl variables:
@fields = extract_multiple($text,
[ sub { extract_variable($_[0]) } ],
undef, 1);
This example separates a text into fields which are quote delimited,
curly bracketed, and anything else. The delimited and bracketed
parts are also blessed to identify them (the "anything else" is unblessed):
@fields = extract_multiple($text,
[
{ Delim => sub { extract_delimited($_[0],q{'"}) } },
{ Brack => sub { extract_bracketed($_[0],'{}') } },
]);
This call extracts the next single substring that is a valid Perl quotelike
operator (and removes it from $text):
$quotelike = extract_multiple($text,
[
sub { extract_quotelike($_[0]) },
], undef, 1);
Finally, here is yet another way to do comma-separated value parsing:
@fields = extract_multiple($csv_text,
[
sub { extract_delimited($_[0],q{'"}) },
qr/([^,]+)(.*)/,
],
undef,1);
The list in the second argument means:
I<"Try and extract a ' or " delimited string, otherwise extract anything up to a
comma...">.
The undef third argument means:
I<"...as many times as possible...">,
and the true value in the fourth argument means
I<"...discarding anything else that appears (i.e. the commas)">.
If you wanted the commas preserved as separate fields (i.e. like split
does if your split pattern has capturing parentheses), you would
just make the last parameter undefined (or remove it).
=head2 C<gen_delimited_pat>
The C<gen_delimited_pat> subroutine takes a single (string) argument and
> builds a Friedl-style optimized regex that matches a string delimited
by any one of the characters in the single argument. For example:
gen_delimited_pat(q{'"})
returns the regex:
(?:\"(?:\\\"|(?!\").)*\"|\'(?:\\\'|(?!\').)*\')
Note that the specified delimiters are automatically quotemeta'd.
A typical use of C<gen_delimited_pat> would be to build special purpose tags
for C<extract_tagged>. For example, to properly ignore "empty" XML elements
(which might contain quoted strings):
my $empty_tag = '<(' . gen_delimited_pat(q{'"}) . '|.)+/>';
extract_tagged($text, undef, undef, undef, {ignore => [$empty_tag]} );
C<gen_delimited_pat> may also be called with an optional second argument,
which specifies the "escape" character(s) to be used for each delimiter.
For example to match a Pascal-style string (where ' is the delimiter
and '' is a literal ' within the string):
gen_delimited_pat(q{'},q{'});
Different escape characters can be specified for different delimiters.
For example, to specify that '/' is the escape for single quotes
and '%' is the escape for double quotes:
gen_delimited_pat(q{'"},q{/%});
If more delimiters than escape chars are specified, the last escape char
is used for the remaining delimiters.
If no escape char is specified for a given specified delimiter, '\' is used.
=head2 C<delimited_pat>
Note that C<gen_delimited_pat> was previously called C<delimited_pat>.
That name may still be used, but is now deprecated.

=head1 DIAGNOSTICS
In a list context, all the functions return C<(undef,$original_text)>
on failure. In a scalar context, failure is indicated by returning C<undef>
(in this case the input text is not modified in any way).
In addition, on failure in I<any> context, the C<$@> variable is set.
Accessing C<$@-E<gt>{error}> returns one of the error diagnostics listed
below.
Accessing C<$@-E<gt>{pos}> returns the offset into the original string at
which the error was detected (although not necessarily where it occurred!)
Printing C<$@> directly produces the error message, with the offset appended.
On success, the C<$@> variable is guaranteed to be C<undef>.
The available diagnostics are:
=over 4
=item C<Did not find a suitable bracket: "%s">
The delimiter provided to C<extract_bracketed> was not one of
C<'()[]E<lt>E<gt>{}'>.
=item C<Did not find prefix: /%s/>
A non-optional prefix was specified but wasn't found at the start of the text.
=item C<Did not find opening bracket after prefix: "%s">
C<extract_bracketed> or C<extract_codeblock> was expecting a
particular kind of bracket at the start of the text, and didn't find it.
=item C<No quotelike operator found after prefix: "%s">
C<extract_quotelike> didn't find one of the quotelike operators C<q>,
C<qq>, C<qw>, C<qx>, C<s>, C<tr> or C<y> at the start of the substring
it was extracting.
=item C<Unmatched closing bracket: "%c">
C<extract_bracketed>, C<extract_quotelike> or C<extract_codeblock> encountered
a closing bracket where none was expected.
=item C<Unmatched opening bracket(s): "%s">
C<extract_bracketed>, C<extract_quotelike> or C<extract_codeblock> ran
out of characters in the text before closing one or more levels of nested
brackets.
=item C<Unmatched embedded quote (%s)>
C<extract_bracketed> attempted to match an embedded quoted substring, but
failed to find a closing quote to match it.
=item C<Did not find closing delimiter to match '%s'>
C<extract_quotelike> was unable to find a closing delimiter to match the
one that opened the quote-like operation.
=item C<Mismatched closing bracket: expected "%c" but found "%s">
C<extract_bracketed>, C<extract_quotelike> or C<extract_codeblock> found
a valid bracket delimiter, but it was the wrong species. This usually
indicates a nesting error, but may indicate incorrect quoting or escaping.
=item C<No block delimiter found after quotelike "%s">
C<extract_quotelike> or C<extract_codeblock> found one of the
quotelike operators C<q>, C<qq>, C<qw>, C<qx>, C<s>, C<tr> or C<y>
without a suitable block after it.
=item C<Did not find leading dereferencer>
C<extract_variable> was expecting one of '$', '@', or '%' at the start of
a variable, but didn't find any of them.
=item C<Bad identifier after dereferencer>
C<extract_variable> found a '$', '@', or '%' indicating a variable, but that
character was not followed by a legal Perl identifier.
=item C<Did not find expected opening bracket at %s>
C<extract_codeblock> failed to find any of the outermost opening brackets
that were specified.
=item C<Improperly nested codeblock at %s>
A nested code block was found that started with a delimiter that was specified
as being only to be used as an outermost bracket.
=item C<Missing second block for quotelike "%s">
C<extract_codeblock> or C<extract_quotelike> found one of the
quotelike operators C<s>, C<tr> or C<y> followed by only one block.
=item C<No match found for opening bracket>
C<extract_codeblock> failed to find a closing bracket to match the outermost
opening bracket.
=item C<Did not find opening tag: /%s/>
C<extract_tagged> did not find a suitable opening tag (after any specified
prefix was removed).
=item C<Unable to construct closing tag to match: /%s/>
C<extract_tagged> matched the specified opening tag and tried to
modify the matched text to produce a matching closing tag (because
none was specified). It failed to generate the closing tag, almost
certainly because the opening tag did not start with a
bracket of some kind.
=item C<Found invalid nested tag: %s>
C<extract_tagged> found a nested tag that appeared in the "reject" list
(and the failure mode was not "MAX" or "PARA").
=item C<Found unbalanced nested tag: %s>
C<extract_tagged> found a nested opening tag that was not matched by a
corresponding nested closing tag (and the failure mode was not "MAX" or "PARA").
=item C<Did not find closing tag>
C<extract_tagged> reached the end of the text without finding a closing tag
to match the original opening tag (and the failure mode was not
"MAX" or "PARA").
=back
=head1 AUTHOR
Damian Conway (damian@conway.org)
=head1 BUGS AND IRRITATIONS
There are undoubtedly serious bugs lurking somewhere in this code, if
only because parts of it give the impression of understanding a great deal
more about Perl than they really do.
Bug reports and other feedback are most welcome.
=head1 COPYRIGHT
Copyright (c) 1997-2001, Damian Conway. All Rights Reserved.
This module is free software. It may be used, redistributed
and/or modified under the same terms as Perl itself.
package Text::ParseWords;
use strict;
require 5.006;
our $VERSION = "3.27";
use Exporter;
our @ISA = qw(Exporter);
our @EXPORT = qw(shellwords quotewords nested_quotewords parse_line);
our @EXPORT_OK = qw(old_shellwords);
our $PERL_SINGLE_QUOTE;
sub shellwords {
my (@lines) = @_;
my @allwords;
foreach my $line (@lines) {
$line =~ s/^\s+//;
my @words = parse_line('\s+', 0, $line);
pop @words if (@words and !defined $words[-1]);
return() unless (@words || !length($line));
push(@allwords, @words);
}
return(@allwords);
}
sub quotewords {
my($delim, $keep, @lines) = @_;
my($line, @words, @allwords);
foreach $line (@lines) {
@words = parse_line($delim, $keep, $line);
return() unless (@words || !length($line));
push(@allwords, @words);
}
return(@allwords);
}
sub nested_quotewords {
my($delim, $keep, @lines) = @_;
my($i, @allwords);
for ($i = 0; $i < @lines; $i++) {
@{$allwords[$i]} = parse_line($delim, $keep, $lines[$i]);
return() unless (@{$allwords[$i]} || !length($lines[$i]));
}
return(@allwords);
}
sub parse_line {
my($delimiter, $keep, $line) = @_;
my($word, @pieces);
no warnings 'uninitialized'; # we will be testing undef strings
while (length($line)) {
# This pattern is optimised to be stack conservative on older perls.
# Do not refactor without being careful and testing it on very long stri
ngs.
# See Perl bug #42980 for an example of a stack busting input.
$line =~ s/^
(?:
# double quoted string
(") # $quote
((?>[^\\"]*(?:\\.[^\\"]*)*))" # $quoted
| # --OR--
# singe quoted string
(') # $quote
((?>[^\\']*(?:\\.[^\\']*)*))' # $quoted
| # --OR--
# unquoted string
( # $unquoted
(?:\\.|[^\\"'])*?
)
# followed by
( # $delim
\Z(?!\n) # EOL
| # --OR--
(?-x:$delimiter) # delimiter
| # --OR--
(?!^)(?=["']) # a quote
)
)//xs or return; # extended layout

my ($quote, $quoted, $unquoted, $delim) = (($1 ? ($1,$2) : ($3,$4)), $5,
$6);
return() unless( defined($quote) || length($unquoted) || length($delim))
;
if ($keep) {
$quoted = "$quote$quoted$quote";
}
else {
$unquoted =~ s/\\(.)/$1/sg;
if (defined $quote) {
$quoted =~ s/\\(.)/$1/sg if ($quote eq '"');
$quoted =~ s/\\([\\'])/$1/g if ( $PERL_SINGLE_QUOTE && $quote eq
"'");
}
}
$word .= substr($line, 0, 0); # leave results tainted
$word .= defined $quote ? $quoted : $unquoted;

if (length($delim)) {
push(@pieces, $word);
push(@pieces, $delim) if ($keep eq 'delimiters');
undef $word;
}
if (!length($line)) {
push(@pieces, $word);
}
}
return(@pieces);
}
sub old_shellwords {
# Usage:
# use ParseWords;
# @words = old_shellwords($line);
# or
# @words = old_shellwords(@lines);
# or
# @words = old_shellwords(); # defaults to $_ (and clobbers it)
no warnings 'uninitialized'; # we will be testing undef strings
local *_ = \join('', @_) if @_;
my (@words, $snippet);
s/\A\s+//;
while ($_ ne '') {
my $field = substr($_, 0, 0); # leave results tainted
for (;;) {
if (s/\A"(([^"\\]|\\.)*)"//s) {
($snippet = $1) =~ s#\\(.)#$1#sg;
}
elsif (/\A"/) {
require Carp;
Carp::carp("Unmatched double quote: $_");
return();
}
elsif (s/\A'(([^'\\]|\\.)*)'//s) {
($snippet = $1) =~ s#\\(.)#$1#sg;
}
elsif (/\A'/) {
require Carp;
Carp::carp("Unmatched single quote: $_");
return();
}
elsif (s/\A\\(.?)//s) {
$snippet = $1;
}
elsif (s/\A([^\s\\'"]+)//) {
$snippet = $1;
}
else {
s/\A\s+//;
last;
}
$field .= $snippet;
}
push(@words, $field);
}
return @words;
}
1;
__END__
package Text::Tabs;
require Exporter;
@ISA = (Exporter);
@EXPORT = qw(expand unexpand $tabstop);
use vars qw($VERSION $tabstop $debug);
$VERSION = 2009.0305;
use strict;
BEGIN {
$tabstop = 8;
$debug = 0;
}
sub expand {
my @l;
my $pad;
for ( @_ ) {
my $s = '';
for (split(/^/m, $_, -1)) {
my $offs = 0;
s{\t}{
$pad = $tabstop - (pos() + $offs) % $tabstop;
$offs += $pad - 1;
" " x $pad;
}eg;
$s .= $_;
}
push(@l, $s);
}
return @l if wantarray;
return $l[0];
}
sub unexpand
{
my (@l) = @_;
my @e;
my $x;
my $line;
my @lines;
my $lastbit;
my $ts_as_space = " "x$tabstop;
for $x (@l) {
@lines = split("\n", $x, -1);
for $line (@lines) {
$line = expand($line);
@e = split(/(.{$tabstop})/,$line,-1);
$lastbit = pop(@e);
$lastbit = ''
unless defined $lastbit;
$lastbit = "\t"
if $lastbit eq $ts_as_space;
for $_ (@e) {
if ($debug) {
my $x = $_;
$x =~ s/\t/^I\t/gs;
print "sub on '$x'\n";
}
s/ +$/\t/;
}
$line = join('',@e, $lastbit);
}
$x = join("\n", @lines);
}
return @l if wantarray;
return $l[0];
}
1;
__END__
sub expand
{
my (@l) = @_;
for $_ (@l) {
1 while s/(^|\n)([^\t\n]*)(\t+)/
$1. $2 . (" " x
($tabstop * length($3)
- (length($2) % $tabstop)))
/sex;
}
return @l if wantarray;
return $l[0];
}
package Text::Wrap;
use warnings::register;
require Exporter;
@ISA = qw(Exporter);
@EXPORT = qw(wrap fill);
@EXPORT_OK = qw($columns $break $huge);
$VERSION = 2009.0305;
use vars qw($VERSION $columns $debug $break $huge $unexpand $tabstop
$separator $separator2);
use strict;
BEGIN {
$columns = 76; # <= screen width
$debug = 0;
$break = '\s';
$huge = 'wrap'; # alternatively: 'die' or 'overflow'
$unexpand = 1;
$tabstop = 8;
$separator = "\n";
$separator2 = undef;
}
use Text::Tabs qw(expand unexpand);
sub wrap
{
my ($ip, $xp, @t) = @_;
local($Text::Tabs::tabstop) = $tabstop;
my $r = "";
my $tail = pop(@t);
my $t = expand(join("", (map { /\s+\z/ ? ( $_ ) : ($_, ' ') } @t), $tail
));
my $lead = $ip;
my $nll = $columns - length(expand($xp)) - 1;
if ($nll <= 0 && $xp ne '') {
my $nc = length(expand($xp)) + 2;
warnings::warnif "Increasing \$Text::Wrap::columns from $columns
to $nc to accommodate length of subsequent tab";
$columns = $nc;
$nll = 1;
}
my $ll = $columns - length(expand($ip)) - 1;
$ll = 0 if $ll < 0;
my $nl = "";
my $remainder = "";
use re 'taint';
pos($t) = 0;
while ($t !~ /\G(?:$break)*\Z/gc) {
if ($t =~ /\G([^\n]{0,$ll})($break|\n+|\z)/xmgc) {
$r .= $unexpand
? unexpand($nl . $lead . $1)
: $nl . $lead . $1;
$remainder = $2;
} elsif ($huge eq 'wrap' && $t =~ /\G([^\n]{$ll})/gc) {
$r .= $unexpand
? unexpand($nl . $lead . $1)
: $nl . $lead . $1;
$remainder = defined($separator2) ? $separator2 : $separ
ator;
} elsif ($huge eq 'overflow' && $t =~ /\G([^\n]*?)($break|\n+|\z
)/xmgc) {
$r .= $unexpand
? unexpand($nl . $lead . $1)
: $nl . $lead . $1;
$remainder = $2;
} elsif ($huge eq 'die') {
die "couldn't wrap '$t'";
} elsif ($columns < 2) {
warnings::warnif "Increasing \$Text::Wrap::columns from
$columns to 2";
$columns = 2;
return ($ip, $xp, @t);
} else {
die "This shouldn't happen";
}
$lead = $xp;
$ll = $nll;
$nl = defined($separator2)
? ($remainder eq "\n"
? "\n"
: $separator2)
: $separator;
}
$r .= $remainder;
print "-----------$r---------\n" if $debug;
print "Finish up with '$lead'\n" if $debug;
$r .= $lead . substr($t, pos($t), length($t)-pos($t))
if pos($t) ne length($t);
print "-----------$r---------\n" if $debug;;
return $r;
}
sub fill
{
my ($ip, $xp, @raw) = @_;
my @para;
my $pp;
for $pp (split(/\n\s+/, join("\n",@raw))) {
$pp =~ s/\s+/ /g;
my $x = wrap($ip, $xp, $pp);
push(@para, $x);
}
# if paragraph_indent is the same as line_indent,
# separate paragraphs with blank lines
my $ps = ($ip eq $xp) ? "\n\n" : "\n";
return join ($ps, @para);
}
1;
__END__
package Thread::Queue;
use strict;
use warnings;
our $VERSION = '2.11';
use threads::shared 1.21;
use Scalar::Util 1.10 qw(looks_like_number blessed reftype refaddr);
# Carp errors from threads::shared calls should complain about caller
our @CARP_NOT = ("threads::shared");
# Predeclarations for internal functions
my ($validate_count, $validate_index);
# Create a new queue possibly pre-populated with items
sub new
{
my $class = shift;
my @queue :shared = map { shared_clone($_) } @_;
return bless(\@queue, $class);
}
# Add items to the tail of a queue
sub enqueue
{
my $queue = shift;
lock(@$queue);
push(@$queue, map { shared_clone($_) } @_)
and cond_signal(@$queue);
}
# Return a count of the number of items on a queue
sub pending
{
my $queue = shift;
lock(@$queue);
return scalar(@$queue);
}
# Return 1 or more items from the head of a queue, blocking if needed
sub dequeue
{
my $queue = shift;
lock(@$queue);
my $count = @_ ? $validate_count->(shift) : 1;
# Wait for requisite number of items
cond_wait(@$queue) until (@$queue >= $count);
cond_signal(@$queue) if (@$queue > $count);
# Return single item
return shift(@$queue) if ($count == 1);
# Return multiple items
my @items;
push(@items, shift(@$queue)) for (1..$count);
return @items;
}
# Return items from the head of a queue with no blocking
sub dequeue_nb
{
my $queue = shift;
lock(@$queue);
my $count = @_ ? $validate_count->(shift) : 1;
# Return single item
return shift(@$queue) if ($count == 1);
# Return multiple items
my @items;
for (1..$count) {
last if (! @$queue);
push(@items, shift(@$queue));
}
return @items;
}
# Return an item without removing it from a queue
sub peek
{
my $queue = shift;
lock(@$queue);
my $index = @_ ? $validate_index->(shift) : 0;
return $$queue[$index];
}
# Insert items anywhere into a queue
sub insert
{
my $queue = shift;
lock(@$queue);
my $index = $validate_index->(shift);
return if (! @_); # Nothing to insert
# Support negative indices
if ($index < 0) {
$index += @$queue;
if ($index < 0) {
$index = 0;
}
}
# Dequeue items from $index onward
my @tmp;
while (@$queue > $index) {
unshift(@tmp, pop(@$queue))
}
# Add new items to the queue
push(@$queue, map { shared_clone($_) } @_);
# Add previous items back onto the queue
push(@$queue, @tmp);
# Soup's up
cond_signal(@$queue);
}
# Remove items from anywhere in a queue
sub extract
{
my $queue = shift;
lock(@$queue);
my $index = @_ ? $validate_index->(shift) : 0;
my $count = @_ ? $validate_count->(shift) : 1;
# Support negative indices
if ($index < 0) {
$index += @$queue;
if ($index < 0) {
$count += $index;
return if ($count <= 0); # Beyond the head of the queue
return $queue->dequeue_nb($count); # Extract from the head
}
}
# Dequeue items from $index+$count onward
my @tmp;
while (@$queue > ($index+$count)) {
unshift(@tmp, pop(@$queue))
}
# Extract desired items
my @items;
unshift(@items, pop(@$queue)) while (@$queue > $index);
# Add back any removed items
push(@$queue, @tmp);
# Return single item
return $items[0] if ($count == 1);
# Return multiple items
return @items;
}
### Internal Functions ###
# Check value of the requested index
$validate_index = sub {
my $index = shift;
if (! defined($index) ||
! looks_like_number($index) ||
(int($index) != $index))
{
require Carp;
my ($method) = (caller(1))[3];
$method =~ s/Thread::Queue:://;
$index = 'undef' if (! defined($index));
Carp::croak("Invalid 'index' argument ($index) to '$method' method");
}
return $index;
};
# Check value of the requested count
$validate_count = sub {
my $count = shift;
if (! defined($count) ||
! looks_like_number($count) ||
(int($count) != $count) ||
($count < 1))
{
require Carp;
my ($method) = (caller(1))[3];
$method =~ s/Thread::Queue:://;
$count = 'undef' if (! defined($count));
Carp::croak("Invalid 'count' argument ($count) to '$method' method");
}
return $count;
};
1;
=head1 NAME
Thread::Queue - Thread-safe queues
=head1 VERSION
This document describes Thread::Queue version 2.11
=head1 SYNOPSIS
use strict;
use warnings;
use threads;
use Thread::Queue;
my $q = Thread::Queue->new(); # A new empty queue
# Worker thread
my $thr = threads->create(sub {
while (my $item = $q->dequeue()) {
# Do work on $item
}
})->detach();
# Send work to the thread
$q->enqueue($item1, ...);
# Count of items in the queue
my $left = $q->pending();
# Non-blocking dequeue
if (defined(my $item = $q->dequeue_nb())) {
# Work on $item
}
# Get the second item in the queue without dequeuing anything
my $item = $q->peek(1);
# Insert two items into the queue just behind the head
$q->insert(1, $item1, $item2);
# Extract the last two items on the queue
my ($item1, $item2) = $q->extract(-2, 2);
=head1 DESCRIPTION
This module provides thread-safe FIFO queues that can be accessed safely by
any number of threads.
Any data types supported by L<threads::shared> can be passed via queues:
=over
=item Ordinary scalars
=item Array refs
=item Hash refs
=item Scalar refs
=item Objects based on the above
=back
Ordinary scalars are added to queues as they are.
If not already thread-shared, the other complex data types will be cloned
(recursively, if needed, and including any C<bless>ings and read-only
settings) into thread-shared structures before being placed onto a queue.
For example, the following would cause L<Thread::Queue> to create a empty,
shared array reference via C<&shared([])>, copy the elements 'foo', 'bar'
and 'baz' from C<@ary> into it, and then place that shared reference onto
the queue:
my @ary = qw/foo bar baz/;
$q->enqueue(\@ary);
However, for the following, the items are already shared, so their references
are added directly to the queue, and no cloning takes place:
my @ary :shared = qw/foo bar baz/;
$q->enqueue(\@ary);
my $obj = &shared({});
$$obj{'foo'} = 'bar';
$$obj{'qux'} = 99;
bless($obj, 'My::Class');
$q->enqueue($obj);
See L</"LIMITATIONS"> for caveats related to passing objects via queues.
=head1 QUEUE CREATION
=over
=item ->new()
Creates a new empty queue.
=item ->new(LIST)
Creates a new queue pre-populated with the provided list of items.
=back
=head1 BASIC METHODS
The following methods deal with queues on a FIFO basis.
=over
=item ->enqueue(LIST)
Adds a list of items onto the end of the queue.
=item ->dequeue()
=item ->dequeue(COUNT)
Removes the requested number of items (default is 1) from the head of the
queue, and returns them. If the queue contains fewer than the requested
number of items, then the thread will be blocked until the requisite number
of items are available (i.e., until other threads <enqueue> more items).
=item ->dequeue_nb()
=item ->dequeue_nb(COUNT)
Removes the requested number of items (default is 1) from the head of the
queue, and returns them. If the queue contains fewer than the requested
number of items, then it immediately (i.e., non-blocking) returns whatever
items there are on the queue. If the queue is empty, then C<undef> is
returned.
=item ->pending()
Returns the number of items still in the queue.
=back
=head1 ADVANCED METHODS
The following methods can be used to manipulate items anywhere in a queue.
To prevent the contents of a queue from being modified by another thread
while it is being examined and/or changed, L<lock|threads::shared/"lock
VARIABLE"> the queue inside a local block:
{
lock($q); # Keep other threads from changing the queue's contents
my $item = $q->peek();
if ($item ...) {
...
}
}
# Queue is now unlocked
=over
=item ->peek()
=item ->peek(INDEX)
Returns an item from the queue without dequeuing anything. Defaults to the
the head of queue (at index position 0) if no index is specified. Negative
index values are supported as with L<arrays|perldata/"Subscripts"> (i.e., -1
is the end of the queue, -2 is next to last, and so on).
If no items exists at the specified index (i.e., the queue is empty, or the
index is beyond the number of items on the queue), then C<undef> is returned.
Remember, the returned item is not removed from the queue, so manipulating a
C<peek>ed at reference affects the item on the queue.
=item ->insert(INDEX, LIST)
Adds the list of items to the queue at the specified index position (0
is the head of the list). Any existing items at and beyond that position are
pushed back past the newly added items:
$q->enqueue(1, 2, 3, 4);
$q->insert(1, qw/foo bar/);
# Queue now contains: 1, foo, bar, 2, 3, 4
Specifying an index position greater than the number of items in the queue
just adds the list to the end.
Negative index positions are supported:
$q->enqueue(1, 2, 3, 4);
$q->insert(-2, qw/foo bar/);
# Queue now contains: 1, 2, foo, bar, 3, 4
Specifying a negative index position greater than the number of items in the
queue adds the list to the head of the queue.
=item ->extract()
=item ->extract(INDEX)
=item ->extract(INDEX, COUNT)
Removes and returns the specified number of items (defaults to 1) from the
specified index position in the queue (0 is the head of the queue). When
called with no arguments, C<extract> operates the same as C<dequeue_nb>.
This method is non-blocking, and will return only as many items as are
available to fulfill the request:
$q->enqueue(1, 2, 3, 4);
my $item = $q->extract(2) # Returns 3
# Queue now contains: 1, 2, 4
my @items = $q->extract(1, 3) # Returns (2, 4)
# Queue now contains: 1
Specifying an index position greater than the number of items in the
queue results in C<undef> or an empty list being returned.
$q->enqueue('foo');
my $nada = $q->extract(3) # Returns undef
my @nada = $q->extract(1, 3) # Returns ()
Negative index positions are supported. Specifying a negative index position
greater than the number of items in the queue may return items from the head
of the queue (similar to C<dequeue_nb>) if the count overlaps the head of the
queue from the specified position (i.e. if queue size + index + count is
greater than zero):
$q->enqueue(qw/foo bar baz/);
my @nada = $q->extract(-6, 2); # Returns () - (3+(-6)+2) <= 0
my @some = $q->extract(-6, 4); # Returns (foo) - (3+(-6)+4) > 0
# Queue now contains: bar, baz
my @rest = $q->extract(-3, 4); # Returns (bar, baz) - (2+(-3)+4) > 0
=back
=head1 NOTES
Queues created by L<Thread::Queue> can be used in both threaded and
non-threaded applications.
=head1 LIMITATIONS
Passing objects on queues may not work if the objects' classes do not support
sharing. See L<threads::shared/"BUGS AND LIMITATIONS"> for more.
Passing array/hash refs that contain objects may not work for Perl prior to
5.10.0.
=head1 SEE ALSO
Thread::Queue Discussion Forum on CPAN:
L<http://www.cpanforum.com/dist/Thread-Queue>
Annotated POD for Thread::Queue:
L<http://annocpan.org/~JDHEDDEN/Thread-Queue-2.11/lib/Thread/Queue.pm>
Source repository:
L<http://code.google.com/p/thread-queue/>
L<threads>, L<threads::shared>
=head1 MAINTAINER
Jerry D. Hedden, S<E<lt>jdhedden AT cpan DOT orgE<gt>>
=head1 LICENSE
This program is free software; you can redistribute it and/or modify it under
the same terms as Perl itself.
=cut
package Tie::Hash;
our $VERSION = '1.03';
use Carp;
use warnings::register;
sub new {
my $pkg = shift;
$pkg->TIEHASH(@_);
}
# Grandfather "new"
sub TIEHASH {
my $pkg = shift;
if (defined &{"${pkg}::new"}) {
warnings::warnif("WARNING: calling ${pkg}->new since ${pkg}->TIEHASH is
missing");
$pkg->new(@_);
}
else {
croak "$pkg doesn't define a TIEHASH method";
}
}
sub EXISTS {
my $pkg = ref $_[0];
croak "$pkg doesn't define an EXISTS method";
}
sub CLEAR {
my $self = shift;
my $key = $self->FIRSTKEY(@_);
my @keys;
while (defined $key) {
push @keys, $key;
$key = $self->NEXTKEY(@_, $key);
}
foreach $key (@keys) {
$self->DELETE(@_, $key);
}
}
# The Tie::StdHash package implements standard perl hash behaviour.
# It exists to act as a base class for classes which only wish to
# alter some parts of their behaviour.
package Tie::StdHash;
# @ISA = qw(Tie::Hash); # would inherit new() only
sub TIEHASH { bless {}, $_[0] }
sub STORE { $_[0]->{$_[1]} = $_[2] }
sub FETCH { $_[0]->{$_[1]} }
sub FIRSTKEY { my $a = scalar keys %{$_[0]}; each %{$_[0]} }
sub NEXTKEY { each %{$_[0]} }
sub EXISTS { exists $_[0]->{$_[1]} }
sub DELETE { delete $_[0]->{$_[1]} }
sub CLEAR { %{$_[0]} = () }
sub SCALAR { scalar %{$_[0]} }
package Tie::ExtraHash;
sub TIEHASH { my $p = shift; bless [{}, @_], $p }
sub STORE { $_[0][0]{$_[1]} = $_[2] }
sub FETCH { $_[0][0]{$_[1]} }
sub FIRSTKEY { my $a = scalar keys %{$_[0][0]}; each %{$_[0][0]} }
sub NEXTKEY { each %{$_[0][0]} }
sub EXISTS { exists $_[0][0]->{$_[1]} }
sub DELETE { delete $_[0][0]->{$_[1]} }
sub CLEAR { %{$_[0][0]} = () }
sub SCALAR { scalar %{$_[0][0]} }
1;
package Thread::Semaphore;
use strict;
use warnings;
our $VERSION = '2.09';
use threads::shared;
use Scalar::Util 1.10 qw(looks_like_number);
# Create a new semaphore optionally with specified count (count defaults to 1)
sub new {
my $class = shift;
my $val :shared = @_ ? shift : 1;
if (!defined($val) ||
! looks_like_number($val) ||
(int($val) != $val))
{
require Carp;
$val = 'undef' if (! defined($val));
Carp::croak("Semaphore initializer is not an integer: $val");
}
return bless(\$val, $class);
}
# Decrement a semaphore's count (decrement amount defaults to 1)
sub down {
my $sema = shift;
lock($$sema);
my $dec = @_ ? shift : 1;
if (! defined($dec) ||
! looks_like_number($dec) ||
(int($dec) != $dec) ||
($dec < 1))
{
require Carp;
$dec = 'undef' if (! defined($dec));
Carp::croak("Semaphore decrement is not a positive integer: $dec");
}
cond_wait($$sema) until ($$sema >= $dec);
$$sema -= $dec;
}
# Increment a semaphore's count (increment amount defaults to 1)
sub up {
my $sema = shift;
lock($$sema);
my $inc = @_ ? shift : 1;
if (! defined($inc) ||
! looks_like_number($inc) ||
(int($inc) != $inc) ||
($inc < 1))
{
require Carp;
$inc = 'undef' if (! defined($inc));
Carp::croak("Semaphore increment is not a positive integer: $inc");
}
($$sema += $inc) > 0 and cond_broadcast($$sema);
}
1;
=head1 NAME
Thread::Semaphore - Thread-safe semaphores
=head1 VERSION
This document describes Thread::Semaphore version 2.09
=head1 SYNOPSIS
use Thread::Semaphore;
my $s = Thread::Semaphore->new();
$s->down(); # Also known as the semaphore P operation.
# The guarded section is here
$s->up(); # Also known as the semaphore V operation.
# The default semaphore value is 1
my $s = Thread::Semaphore-new($initial_value);
$s->down($down_value);
$s->up($up_value);
=head1 DESCRIPTION
Semaphores provide a mechanism to regulate access to resources. Unlike
locks, semaphores aren't tied to particular scalars, and so may be used to
control access to anything you care to use them for.
Semaphores don't limit their values to zero and one, so they can be used to
control access to some resource that there may be more than one of (e.g.,
filehandles). Increment and decrement amounts aren't fixed at one either,
so threads can reserve or return multiple resources at once.
=head1 METHODS
=over 8
=item ->new()
=item ->new(NUMBER)
C<new> creates a new semaphore, and initializes its count to the specified
number (which must be an integer). If no number is specified, the
semaphore's count defaults to 1.
=item ->down()
=item ->down(NUMBER)
The C<down> method decreases the semaphore's count by the specified number
(which must be an integer >= 1), or by one if no number is specified.
If the semaphore's count would drop below zero, this method will block
until such time as the semaphore's count is greater than or equal to the
amount you're C<down>ing the semaphore's count by.
This is the semaphore "P operation" (the name derives from the Dutch
word "pak", which means "capture" -- the semaphore operations were
named by the late Dijkstra, who was Dutch).
=item ->up()
=item ->up(NUMBER)
The C<up> method increases the semaphore's count by the number specified
(which must be an integer >= 1), or by one if no number is specified.
This will unblock any thread that is blocked trying to C<down> the
semaphore if the C<up> raises the semaphore's count above the amount that
the C<down> is trying to decrement it by. For example, if three threads
are blocked trying to C<down> a semaphore by one, and another thread C<up>s
the semaphore by two, then two of the blocked threads (which two is
indeterminate) will become unblocked.
This is the semaphore "V operation" (the name derives from the Dutch
word "vrij", which means "release").
=back
=head1 NOTES
Semaphores created by L<Thread::Semaphore> can be used in both threaded and
non-threaded applications. This allows you to write modules and packages
that potentially make use of semaphores, and that will function in either
environment.
=head1 SEE ALSO
Thread::Semaphore Discussion Forum on CPAN:
L<http://www.cpanforum.com/dist/Thread-Semaphore>
Annotated POD for Thread::Semaphore:
L<http://annocpan.org/~JDHEDDEN/Thread-Semaphore-2.09/lib/Thread/Semaphore.pm>
Source repository:
L<http://code.google.com/p/thread-semaphore/>
L<threads>, L<threads::shared>
=head1 MAINTAINER
Jerry D. Hedden, S<E<lt>jdhedden AT cpan DOT orgE<gt>>
=head1 LICENSE
This program is free software; you can redistribute it and/or modify it under
the same terms as Perl itself.
=cut
package Tie::Array;
use 5.006_001;
use strict;
use Carp;
our $VERSION = '1.03';
# Pod documentation after __END__ below.
sub DESTROY { }
sub EXTEND { }
sub UNSHIFT { scalar shift->SPLICE(0,0,@_) }
sub SHIFT { shift->SPLICE(0,1) }
sub CLEAR { shift->STORESIZE(0) }
sub PUSH
{
my $obj = shift;
my $i = $obj->FETCHSIZE;
$obj->STORE($i++, shift) while (@_);
}
sub POP
{
my $obj = shift;
my $newsize = $obj->FETCHSIZE - 1;
my $val;
if ($newsize >= 0)
{
$val = $obj->FETCH($newsize);
$obj->STORESIZE($newsize);
}
$val;
}
sub SPLICE {
my $obj = shift;
my $sz = $obj->FETCHSIZE;
my $off = (@_) ? shift : 0;
$off += $sz if ($off < 0);
my $len = (@_) ? shift : $sz - $off;
$len += $sz - $off if $len < 0;
my @result;
for (my $i = 0; $i < $len; $i++) {
push(@result,$obj->FETCH($off+$i));
}
$off = $sz if $off > $sz;
$len -= $off + $len - $sz if $off + $len > $sz;
if (@_ > $len) {
# Move items up to make room
my $d = @_ - $len;
my $e = $off+$len;
$obj->EXTEND($sz+$d);
for (my $i=$sz-1; $i >= $e; $i--) {
my $val = $obj->FETCH($i);
$obj->STORE($i+$d,$val);
}
}
elsif (@_ < $len) {
# Move items down to close the gap
my $d = $len - @_;
my $e = $off+$len;
for (my $i=$off+$len; $i < $sz; $i++) {
my $val = $obj->FETCH($i);
$obj->STORE($i-$d,$val);
}
$obj->STORESIZE($sz-$d);
}
for (my $i=0; $i < @_; $i++) {
$obj->STORE($off+$i,$_[$i]);
}
return wantarray ? @result : pop @result;
}
sub EXISTS {
my $pkg = ref $_[0];
croak "$pkg doesn't define an EXISTS method";
}
sub DELETE {
my $pkg = ref $_[0];
croak "$pkg doesn't define a DELETE method";
}
package Tie::StdArray;
use vars qw(@ISA);
@ISA = 'Tie::Array';
sub TIEARRAY { bless [], $_[0] }
sub FETCHSIZE { scalar @{$_[0]} }
sub STORESIZE { $#{$_[0]} = $_[1]-1 }
sub STORE { $_[0]->[$_[1]] = $_[2] }
sub FETCH { $_[0]->[$_[1]] }
sub CLEAR { @{$_[0]} = () }
sub POP { pop(@{$_[0]}) }
sub PUSH { my $o = shift; push(@$o,@_) }
sub SHIFT { shift(@{$_[0]}) }
sub UNSHIFT { my $o = shift; unshift(@$o,@_) }
sub EXISTS { exists $_[0]->[$_[1]] }
sub DELETE { delete $_[0]->[$_[1]] }
sub SPLICE
{
my $ob = shift;
my $sz = $ob->FETCHSIZE;
my $off = @_ ? shift : 0;
$off += $sz if $off < 0;
my $len = @_ ? shift : $sz-$off;
return splice(@$ob,$off,$len,@_);
}
1;
__END__
=head1 NAME
Tie::Array - base class for tied arrays
=head1 SYNOPSIS
package Tie::NewArray;
use Tie::Array;
@ISA = ('Tie::Array');
# mandatory methods
sub TIEARRAY { ... }
sub FETCH { ... }
sub FETCHSIZE { ... }
sub STORE { ... } # mandatory if elements writeable
sub STORESIZE { ... } # mandatory if elements can be added/deleted
sub EXISTS { ... } # mandatory if exists() expected to work
sub DELETE { ... } # mandatory if delete() expected to work
# optional methods - for efficiency
sub CLEAR { ... }
sub PUSH { ... }
sub POP { ... }
sub SHIFT { ... }
sub UNSHIFT { ... }
sub SPLICE { ... }
sub EXTEND { ... }
sub DESTROY { ... }
package Tie::NewStdArray;
use Tie::Array;
@ISA = ('Tie::StdArray');
# all methods provided by default
package main;
$object = tie @somearray,Tie::NewArray;
$object = tie @somearray,Tie::StdArray;
$object = tie @somearray,Tie::NewStdArray;
=head1 DESCRIPTION
This module provides methods for array-tying classes. See
L<perltie> for a list of the functions required in order to tie an array
to a package. The basic B<Tie::Array> package provides stub C<DESTROY>,
and C<EXTEND> methods that do nothing, stub C<DELETE> and C<EXISTS>
methods that croak() if the delete() or exists() builtins are ever called
on the tied array, and implementations of C<PUSH>, C<POP>, C<SHIFT>,
C<UNSHIFT>, C<SPLICE> and C<CLEAR> in terms of basic C<FETCH>, C<STORE>,
C<FETCHSIZE>, C<STORESIZE>.
The B<Tie::StdArray> package provides efficient methods required for tied arrays
which are implemented as blessed references to an "inner" perl array.
It inherits from B<Tie::Array>, and should cause tied arrays to behave exactly
like standard arrays, allowing for selective overloading of methods.
For developers wishing to write their own tied arrays, the required methods
are briefly defined below. See the L<perltie> section for more detailed
descriptive, as well as example code:
=over 4
=item TIEARRAY classname, LIST
The class method is invoked by the command C<tie @array, classname>. Associates
an array instance with the specified class. C<LIST> would represent
additional arguments (along the lines of L<AnyDBM_File> and compatriots) needed
to complete the association. The method should return an object of a class which
provides the methods below.
=item STORE this, index, value
Store datum I<value> into I<index> for the tied array associated with
object I<this>. If this makes the array larger then
class's mapping of C<undef> should be returned for new positions.
=item FETCH this, index
Retrieve the datum in I<index> for the tied array associated with
object I<this>.
=item FETCHSIZE this
Returns the total number of items in the tied array associated with
object I<this>. (Equivalent to C<scalar(@array)>).
=item STORESIZE this, count
Sets the total number of items in the tied array associated with
object I<this> to be I<count>. If this makes the array larger then
class's mapping of C<undef> should be returned for new positions.
If the array becomes smaller then entries beyond count should be
deleted.
=item EXTEND this, count
Informative call that array is likely to grow to have I<count> entries.
Can be used to optimize allocation. This method need do nothing.
=item EXISTS this, key
Verify that the element at index I<key> exists in the tied array I<this>.
The B<Tie::Array> implementation is a stub that simply croaks.
=item DELETE this, key
Delete the element at index I<key> from the tied array I<this>.
The B<Tie::Array> implementation is a stub that simply croaks.
=item CLEAR this
Clear (remove, delete, ...) all values from the tied array associated with
object I<this>.
=item DESTROY this
Normal object destructor method.
=item PUSH this, LIST
Append elements of LIST to the array.
=item POP this
Remove last element of the array and return it.
=item SHIFT this
Remove the first element of the array (shifting other elements down)
and return it.
=item UNSHIFT this, LIST
Insert LIST elements at the beginning of the array, moving existing elements
up to make room.
=item SPLICE this, offset, length, LIST
Perform the equivalent of C<splice> on the array.
I<offset> is optional and defaults to zero, negative values count back
from the end of the array.
I<length> is optional and defaults to rest of the array.
I<LIST> may be empty.
Returns a list of the original I<length> elements at I<offset>.
=back
=head1 CAVEATS
There is no support at present for tied @ISA. There is a potential conflict
between magic entries needed to notice setting of @ISA, and those needed to
implement 'tie'.
Very little consideration has been given to the behaviour of tied arrays
when C<$[> is not default value of zero.
=head1 AUTHOR
Nick Ing-Simmons E<lt>nik@tiuk.ti.comE<gt>
=cut

package Tie::File;
require 5.005;
use Carp ':DEFAULT', 'confess';
use POSIX 'SEEK_SET';
use Fcntl 'O_CREAT', 'O_RDWR', 'LOCK_EX', 'LOCK_SH', 'O_WRONLY', 'O_RDONLY';
sub O_ACCMODE () { O_RDONLY | O_RDWR | O_WRONLY }
$VERSION = "0.97_02";
my $DEFAULT_MEMORY_SIZE = 1<<21; # 2 megabytes
my $DEFAULT_AUTODEFER_THRESHHOLD = 3; # 3 records
my $DEFAULT_AUTODEFER_FILELEN_THRESHHOLD = 65536; # 16 disk blocksful
my %good_opt = map {$_ => 1, "-$_" => 1}
qw(memory dw_size mode recsep discipline
autodefer autochomp autodefer_threshhold concurrent);
sub TIEARRAY {
if (@_ % 2 != 0) {
croak "usage: tie \@array, $_[0], filename, [option => value]...";
}
my ($pack, $file, %opts) = @_;
# transform '-foo' keys into 'foo' keys
for my $key (keys %opts) {
unless ($good_opt{$key}) {
croak("$pack: Unrecognized option '$key'\n");
}
my $okey = $key;
if ($key =~ s/^-+//) {
$opts{$key} = delete $opts{$okey};
}
}
if ($opts{concurrent}) {
croak("$pack: concurrent access not supported yet\n");
}
unless (defined $opts{memory}) {
# default is the larger of the default cache size and the
# deferred-write buffer size (if specified)
$opts{memory} = $DEFAULT_MEMORY_SIZE;
$opts{memory} = $opts{dw_size}
if defined $opts{dw_size} && $opts{dw_size} > $DEFAULT_MEMORY_SIZE;
# Dora Winifred Read
}
$opts{dw_size} = $opts{memory} unless defined $opts{dw_size};
if ($opts{dw_size} > $opts{memory}) {
croak("$pack: dw_size may not be larger than total memory allocation\n");
}
# are we in deferred-write mode?
$opts{defer} = 0 unless defined $opts{defer};
$opts{deferred} = {}; # no records are presently deferred
$opts{deferred_s} = 0; # count of total bytes in ->{deferred}
$opts{deferred_max} = -1; # empty
# What's a good way to arrange that this class can be overridden?
$opts{cache} = Tie::File::Cache->new($opts{memory});
# autodeferment is enabled by default
$opts{autodefer} = 1 unless defined $opts{autodefer};
$opts{autodeferring} = 0; # but is not initially active
$opts{ad_history} = [];
$opts{autodefer_threshhold} = $DEFAULT_AUTODEFER_THRESHHOLD
unless defined $opts{autodefer_threshhold};
$opts{autodefer_filelen_threshhold} = $DEFAULT_AUTODEFER_FILELEN_THRESHHOLD
unless defined $opts{autodefer_filelen_threshhold};
$opts{offsets} = [0];
$opts{filename} = $file;
unless (defined $opts{recsep}) {
$opts{recsep} = _default_recsep();
}
$opts{recseplen} = length($opts{recsep});
if ($opts{recseplen} == 0) {
croak "Empty record separator not supported by $pack";
}
$opts{autochomp} = 1 unless defined $opts{autochomp};
$opts{mode} = O_CREAT|O_RDWR unless defined $opts{mode};
$opts{rdonly} = (($opts{mode} & O_ACCMODE) == O_RDONLY);
$opts{sawlastrec} = undef;
my $fh;
if (UNIVERSAL::isa($file, 'GLOB')) {
# We use 1 here on the theory that some systems
# may not indicate failure if we use 0.
# MSWin32 does not indicate failure with 0, but I don't know if
# it will indicate failure with 1 or not.
unless (seek $file, 1, SEEK_SET) {
croak "$pack: your filehandle does not appear to be seekable";
}
seek $file, 0, SEEK_SET; # put it back
$fh = $file; # setting binmode is the user's problem
} elsif (ref $file) {
croak "usage: tie \@array, $pack, filename, [option => value]...";
} else {
# $fh = \do { local *FH }; # XXX this is buggy
if ($] < 5.006) {
# perl 5.005 and earlier don't autovivify filehandles
require Symbol;
$fh = Symbol::gensym();
}
sysopen $fh, $file, $opts{mode}, 0666 or return;
binmode $fh;
++$opts{ourfh};
}
{ my $ofh = select $fh; $| = 1; select $ofh } # autoflush on write
if (defined $opts{discipline} && $] >= 5.006) {
# This avoids a compile-time warning under 5.005
eval 'binmode($fh, $opts{discipline})';
croak $@ if $@ =~ /unknown discipline/i;
die if $@;
}
$opts{fh} = $fh;
bless \%opts => $pack;
}
sub FETCH {
my ($self, $n) = @_;
my $rec;
# check the defer buffer
$rec = $self->{deferred}{$n} if exists $self->{deferred}{$n};
$rec = $self->_fetch($n) unless defined $rec;
# inlined _chomp1
substr($rec, - $self->{recseplen}) = ""
if defined $rec && $self->{autochomp};
$rec;
}
# Chomp many records in-place; return nothing useful
sub _chomp {
my $self = shift;
return unless $self->{autochomp};
if ($self->{autochomp}) {
for (@_) {
next unless defined;
substr($_, - $self->{recseplen}) = "";
}
}
}
# Chomp one record in-place; return modified record
sub _chomp1 {
my ($self, $rec) = @_;
return $rec unless $self->{autochomp};
return unless defined $rec;
substr($rec, - $self->{recseplen}) = "";
$rec;
}
sub _fetch {
my ($self, $n) = @_;
# check the record cache
{ my $cached = $self->{cache}->lookup($n);
return $cached if defined $cached;
}
if ($#{$self->{offsets}} < $n) {
return if $self->{eof}; # request for record beyond end of file
my $o = $self->_fill_offsets_to($n);
# If it's still undefined, there is no such record, so return 'undef'
return unless defined $o;
}
my $fh = $self->{FH};
$self->_seek($n); # we can do this now that offsets is populated
my $rec = $self->_read_record;
# If we happen to have just read the first record, check to see if
# the length of the record matches what 'tell' says. If not, Tie::File
# won't work, and should drop dead.
#
# if ($n == 0 && defined($rec) && tell($self->{fh}) != length($rec)) {
# if (defined $self->{discipline}) {
# croak "I/O discipline $self->{discipline} not supported";
# } else {
# croak "File encoding not supported";
# }
# }
$self->{cache}->insert($n, $rec) if defined $rec && not $self->{flushing};
$rec;
}
sub STORE {
my ($self, $n, $rec) = @_;
die "STORE called from _check_integrity!" if $DIAGNOSTIC;
$self->_fixrecs($rec);
if ($self->{autodefer}) {
$self->_annotate_ad_history($n);
}
return $self->_store_deferred($n, $rec) if $self->_is_deferring;
# We need this to decide whether the new record will fit
# It incidentally populates the offsets table
# Note we have to do this before we alter the cache
# 20020324 Wait, but this DOES alter the cache. TODO BUG?
my $oldrec = $self->_fetch($n);
if (not defined $oldrec) {
# We're storing a record beyond the end of the file
$self->_extend_file_to($n+1);
$oldrec = $self->{recsep};
}
# return if $oldrec eq $rec; # don't bother
my $len_diff = length($rec) - length($oldrec);
# length($oldrec) here is not consistent with text mode TODO XXX BUG
$self->_mtwrite($rec, $self->{offsets}[$n], length($oldrec));
$self->_oadjust([$n, 1, $rec]);
$self->{cache}->update($n, $rec);
}
sub _store_deferred {
my ($self, $n, $rec) = @_;
$self->{cache}->remove($n);
my $old_deferred = $self->{deferred}{$n};
if (defined $self->{deferred_max} && $n > $self->{deferred_max}) {
$self->{deferred_max} = $n;
}
$self->{deferred}{$n} = $rec;
my $len_diff = length($rec);
$len_diff -= length($old_deferred) if defined $old_deferred;
$self->{deferred_s} += $len_diff;
$self->{cache}->adj_limit(-$len_diff);
if ($self->{deferred_s} > $self->{dw_size}) {
$self->_flush;
} elsif ($self->_cache_too_full) {
$self->_cache_flush;
}
}
# Remove a single record from the deferred-write buffer without writing it
# The record need not be present
sub _delete_deferred {
my ($self, $n) = @_;
my $rec = delete $self->{deferred}{$n};
return unless defined $rec;
if (defined $self->{deferred_max}
&& $n == $self->{deferred_max}) {
undef $self->{deferred_max};
}
$self->{deferred_s} -= length $rec;
$self->{cache}->adj_limit(length $rec);
}
sub FETCHSIZE {
my $self = shift;
my $n = $self->{eof} ? $#{$self->{offsets}} : $self->_fill_offsets;
my $top_deferred = $self->_defer_max;
$n = $top_deferred+1 if defined $top_deferred && $n < $top_deferred+1;
$n;
}
sub STORESIZE {
my ($self, $len) = @_;
if ($self->{autodefer}) {
$self->_annotate_ad_history('STORESIZE');
}
my $olen = $self->FETCHSIZE;
return if $len == $olen; # Woo-hoo!
# file gets longer
if ($len > $olen) {
if ($self->_is_deferring) {
for ($olen .. $len-1) {
$self->_store_deferred($_, $self->{recsep});
}
} else {
$self->_extend_file_to($len);
}
return;
}
# file gets shorter
if ($self->_is_deferring) {
# TODO maybe replace this with map-plus-assignment?
for (grep $_ >= $len, keys %{$self->{deferred}}) {
$self->_delete_deferred($_);
}
$self->{deferred_max} = $len-1;
}
$self->_seek($len);
$self->_chop_file;
$#{$self->{offsets}} = $len;
# $self->{offsets}[0] = 0; # in case we just chopped this
$self->{cache}->remove(grep $_ >= $len, $self->{cache}->ckeys);
}
### OPTIMIZE ME
### It should not be necessary to do FETCHSIZE
### Just seek to the end of the file.
sub PUSH {
my $self = shift;
$self->SPLICE($self->FETCHSIZE, scalar(@_), @_);
# No need to return:
# $self->FETCHSIZE; # because av.c takes care of this for me
}
sub POP {
my $self = shift;
my $size = $self->FETCHSIZE;
return if $size == 0;
# print STDERR "# POPPITY POP POP POP\n";
scalar $self->SPLICE($size-1, 1);
}
sub SHIFT {
my $self = shift;
scalar $self->SPLICE(0, 1);
}
sub UNSHIFT {
my $self = shift;
$self->SPLICE(0, 0, @_);
# $self->FETCHSIZE; # av.c takes care of this for me
}
sub CLEAR {
my $self = shift;
if ($self->{autodefer}) {
$self->_annotate_ad_history('CLEAR');
}
$self->_seekb(0);
$self->_chop_file;
$self->{cache}->set_limit($self->{memory});
$self->{cache}->empty;
@{$self->{offsets}} = (0);
%{$self->{deferred}}= ();
$self->{deferred_s} = 0;
$self->{deferred_max} = -1;
}
sub EXTEND {
my ($self, $n) = @_;
# No need to pre-extend anything in this case
return if $self->_is_deferring;
$self->_fill_offsets_to($n);
$self->_extend_file_to($n);
}
sub DELETE {
my ($self, $n) = @_;
if ($self->{autodefer}) {
$self->_annotate_ad_history('DELETE');
}
my $lastrec = $self->FETCHSIZE-1;
my $rec = $self->FETCH($n);
$self->_delete_deferred($n) if $self->_is_deferring;
if ($n == $lastrec) {
$self->_seek($n);
$self->_chop_file;
$#{$self->{offsets}}--;
$self->{cache}->remove($n);
# perhaps in this case I should also remove trailing null records?
# 20020316
# Note that delete @a[-3..-1] deletes the records in the wrong order,
# so we only chop the very last one out of the file. We could repair this
# by tracking deleted records inside the object.
} elsif ($n < $lastrec) {
$self->STORE($n, "");
}
$rec;
}
sub EXISTS {
my ($self, $n) = @_;
return 1 if exists $self->{deferred}{$n};
$n < $self->FETCHSIZE;
}
sub SPLICE {
my $self = shift;
if ($self->{autodefer}) {
$self->_annotate_ad_history('SPLICE');
}
$self->_flush if $self->_is_deferring; # move this up?
if (wantarray) {
$self->_chomp(my @a = $self->_splice(@_));
@a;
} else {
$self->_chomp1(scalar $self->_splice(@_));
}
}
sub DESTROY {
my $self = shift;
$self->flush if $self->_is_deferring;
$self->{cache}->delink if defined $self->{cache}; # break circular link
if ($self->{fh} and $self->{ourfh}) {
delete $self->{ourfh};
close delete $self->{fh};
}
}
sub _splice {
my ($self, $pos, $nrecs, @data) = @_;
my @result;
$pos = 0 unless defined $pos;
# Deal with negative and other out-of-range positions
# Also set default for $nrecs
{
my $oldsize = $self->FETCHSIZE;
$nrecs = $oldsize unless defined $nrecs;
my $oldpos = $pos;
if ($pos < 0) {
$pos += $oldsize;
if ($pos < 0) {
croak "Modification of non-creatable array value attempted, subscript $o
ldpos";
}
}
if ($pos > $oldsize) {
return unless @data;
$pos = $oldsize; # This is what perl does for normal arrays
}
# The manual is very unclear here
if ($nrecs < 0) {
$nrecs = $oldsize - $pos + $nrecs;
$nrecs = 0 if $nrecs < 0;
}
# nrecs is too big---it really means "until the end"
# 20030507
if ($nrecs + $pos > $oldsize) {
$nrecs = $oldsize - $pos;
}
}
$self->_fixrecs(@data);
my $data = join '', @data;
my $datalen = length $data;
my $oldlen = 0;
# compute length of data being removed
for ($pos .. $pos+$nrecs-1) {
last unless defined $self->_fill_offsets_to($_);
my $rec = $self->_fetch($_);
last unless defined $rec;
push @result, $rec;
# Why don't we just use length($rec) here?
# Because that record might have come from the cache. _splice
# might have been called to flush out the deferred-write records,
# and in this case length($rec) is the length of the record to be
# *written*, not the length of the actual record in the file. But
# the offsets are still true. 20020322
$oldlen += $self->{offsets}[$_+1] - $self->{offsets}[$_]
if defined $self->{offsets}[$_+1];
}
$self->_fill_offsets_to($pos+$nrecs);
# Modify the file
$self->_mtwrite($data, $self->{offsets}[$pos], $oldlen);
# Adjust the offsets table
$self->_oadjust([$pos, $nrecs, @data]);
{ # Take this read cache stuff out into a separate function
# You made a half-attempt to put it into _oadjust.
# Finish something like that up eventually.
# STORE also needs to do something similarish
# update the read cache, part 1
# modified records
for ($pos .. $pos+$nrecs-1) {
my $new = $data[$_-$pos];
if (defined $new) {
$self->{cache}->update($_, $new);
} else {
$self->{cache}->remove($_);
}
}

# update the read cache, part 2
# moved records - records past the site of the change
# need to be renumbered
# Maybe merge this with the previous block?
{
my @oldkeys = grep $_ >= $pos + $nrecs, $self->{cache}->ckeys;
my @newkeys = map $_-$nrecs+@data, @oldkeys;
$self->{cache}->rekey(\@oldkeys, \@newkeys);
}
# Now there might be too much data in the cache, if we spliced out
# some short records and spliced in some long ones. If so, flush
# the cache.
$self->_cache_flush;
}
# Yes, the return value of 'splice' *is* actually this complicated
wantarray ? @result : @result ? $result[-1] : undef;
}
# write data into the file
# $data is the data to be written.
# it should be written at position $pos, and should overwrite
# exactly $len of the following bytes.
# Note that if length($data) > $len, the subsequent bytes will have to
# be moved up, and if length($data) < $len, they will have to
# be moved down
sub _twrite {
my ($self, $data, $pos, $len) = @_;
unless (defined $pos) {
die "\$pos was undefined in _twrite";
}
my $len_diff = length($data) - $len;
if ($len_diff == 0) { # Woo-hoo!
my $fh = $self->{fh};
$self->_seekb($pos);
$self->_write_record($data);
return; # well, that was easy.
}
# the two records are of different lengths
# our strategy here: rewrite the tail of the file,
# reading ahead one buffer at a time
# $bufsize is required to be at least as large as the data we're overwriting
my $bufsize = _bufsize($len_diff);
my ($writepos, $readpos) = ($pos, $pos+$len);
my $next_block;
my $more_data;
# Seems like there ought to be a way to avoid the repeated code
# and the special case here. The read(1) is also a little weird.
# Think about this.
do {
$self->_seekb($readpos);
my $br = read $self->{fh}, $next_block, $bufsize;
$more_data = read $self->{fh}, my($dummy), 1;
$self->_seekb($writepos);
$self->_write_record($data);
$readpos += $br;
$writepos += length $data;
$data = $next_block;
} while $more_data;
$self->_seekb($writepos);
$self->_write_record($next_block);
# There might be leftover data at the end of the file
$self->_chop_file if $len_diff < 0;
}
# _iwrite(D, S, E)
# Insert text D at position S.
# Let C = E-S-|D|. If C < 0; die.
# Data in [S,S+C) is copied to [S+D,S+D+C) = [S+D,E).
# Data in [S+C = E-D, E) is returned. Data in [E, oo) is untouched.
#
# In a later version, don't read the entire intervening area into
# memory at once; do the copying block by block.
sub _iwrite {
my $self = shift;
my ($D, $s, $e) = @_;
my $d = length $D;
my $c = $e-$s-$d;
local *FH = $self->{fh};
confess "Not enough space to insert $d bytes between $s and $e"
if $c < 0;
confess "[$s,$e) is an invalid insertion range" if $e < $s;
$self->_seekb($s);
read FH, my $buf, $e-$s;
$D .= substr($buf, 0, $c, "");
$self->_seekb($s);
$self->_write_record($D);
return $buf;
}
# Like _twrite, but the data-pos-len triple may be repeated; you may
# write several chunks. All the writing will be done in
# one pass. Chunks SHALL be in ascending order and SHALL NOT overlap.
sub _mtwrite {
my $self = shift;
my $unwritten = "";
my $delta = 0;
@_ % 3 == 0
or die "Arguments to _mtwrite did not come in groups of three";
while (@_) {
my ($data, $pos, $len) = splice @_, 0, 3;
my $end = $pos + $len; # The OLD end of the segment to be replaced
$data = $unwritten . $data;
$delta -= length($unwritten);
$unwritten = "";
$pos += $delta; # This is where the data goes now
my $dlen = length $data;
$self->_seekb($pos);
if ($len >= $dlen) { # the data will fit
$self->_write_record($data);
$delta += ($dlen - $len); # everything following moves down by this much
$data = ""; # All the data in the buffer has been written
} else { # won't fit
my $writable = substr($data, 0, $len - $delta, "");
$self->_write_record($writable);
$delta += ($dlen - $len); # everything following moves down by this much
}
# At this point we've written some but maybe not all of the data.
# There might be a gap to close up, or $data might still contain a
# bunch of unwritten data that didn't fit.
my $ndlen = length $data;
if ($delta == 0) {
$self->_write_record($data);
} elsif ($delta < 0) {
# upcopy (close up gap)
if (@_) {
$self->_upcopy($end, $end + $delta, $_[1] - $end);
} else {
$self->_upcopy($end, $end + $delta);
}
} else {
# downcopy (insert data that didn't fit; replace this data in memory
# with _later_ data that doesn't fit)
if (@_) {
$unwritten = $self->_downcopy($data, $end, $_[1] - $end);
} else {
# Make the file longer to accommodate the last segment that doesn'
$unwritten = $self->_downcopy($data, $end);
}
}
}
}
# Copy block of data of length $len from position $spos to position $dpos
# $dpos must be <= $spos
#
# If $len is undefined, go all the way to the end of the file
# and then truncate it ($spos - $dpos bytes will be removed)
sub _upcopy {
my $blocksize = 8192;
my ($self, $spos, $dpos, $len) = @_;
if ($dpos > $spos) {
die "source ($spos) was upstream of destination ($dpos) in _upcopy";
} elsif ($dpos == $spos) {
return;
}

while (! defined ($len) || $len > 0) {
my $readsize = ! defined($len) ? $blocksize
: $len > $blocksize ? $blocksize
: $len;

my $fh = $self->{fh};
$self->_seekb($spos);
my $bytes_read = read $fh, my($data), $readsize;
$self->_seekb($dpos);
if ($data eq "") {
$self->_chop_file;
last;
}
$self->_write_record($data);
$spos += $bytes_read;
$dpos += $bytes_read;
$len -= $bytes_read if defined $len;
}
}
# Write $data into a block of length $len at position $pos,
# moving everything in the block forwards to make room.
# Instead of writing the last length($data) bytes from the block
# (because there isn't room for them any longer) return them.
#
# Undefined $len means 'until the end of the file'
sub _downcopy {
my $blocksize = 8192;
my ($self, $data, $pos, $len) = @_;
my $fh = $self->{fh};
while (! defined $len || $len > 0) {
my $readsize = ! defined($len) ? $blocksize
: $len > $blocksize? $blocksize : $len;
$self->_seekb($pos);
read $fh, my($old), $readsize;
my $last_read_was_short = length($old) < $readsize;
$data .= $old;
my $writable;
if ($last_read_was_short) {
# If last read was short, then $data now contains the entire rest
# of the file, so there's no need to write only one block of it
$writable = $data;
$data = "";
} else {
$writable = substr($data, 0, $readsize, "");
}
last if $writable eq "";
$self->_seekb($pos);
$self->_write_record($writable);
last if $last_read_was_short && $data eq "";
$len -= $readsize if defined $len;
$pos += $readsize;
}
return $data;
}
# Adjust the object data structures following an '_mtwrite'
# Arguments are
# [$pos, $nrecs, @length] items
# indicating that $nrecs records were removed at $recpos (a record offset)
# and replaced with records of length @length...
# Arguments guarantee that $recpos is strictly increasing.
# No return value
sub _oadjust {
my $self = shift;
my $delta = 0;
my $delta_recs = 0;
my $prev_end = -1;
my %newkeys;
for (@_) {
my ($pos, $nrecs, @data) = @$_;
$pos += $delta_recs;
# Adjust the offsets of the records after the previous batch up
# to the first new one of this batch
for my $i ($prev_end+2 .. $pos - 1) {
$self->{offsets}[$i] += $delta;
$newkey{$i} = $i + $delta_recs;
}
$prev_end = $pos + @data - 1; # last record moved on this pass
# Remove the offsets for the removed records;
# replace with the offsets for the inserted records
my @newoff = ($self->{offsets}[$pos] + $delta);
for my $i (0 .. $#data) {
my $newlen = length $data[$i];
push @newoff, $newoff[$i] + $newlen;
$delta += $newlen;
}
for my $i ($pos .. $pos+$nrecs-1) {
last if $i+1 > $#{$self->{offsets}};
my $oldlen = $self->{offsets}[$i+1] - $self->{offsets}[$i];
$delta -= $oldlen;
}
# # also this data has changed, so update it in the cache
# for (0 .. $#data) {
# $self->{cache}->update($pos + $_, $data[$_]);
# }
# if ($delta_recs) {
# my @oldkeys = grep $_ >= $pos + @data, $self->{cache}->ckeys;
# my @newkeys = map $_ + $delta_recs, @oldkeys;
# $self->{cache}->rekey(\@oldkeys, \@newkeys);
# }
# replace old offsets with new
splice @{$self->{offsets}}, $pos, $nrecs+1, @newoff;
# What if we just spliced out the end of the offsets table?
# shouldn't we clear $self->{eof}? Test for this XXX BUG TODO
$delta_recs += @data - $nrecs; # net change in total number of records
}
# The trailing records at the very end of the file
if ($delta) {
for my $i ($prev_end+2 .. $#{$self->{offsets}}) {
$self->{offsets}[$i] += $delta;
}
}
# If we scrubbed out all known offsets, regenerate the trivial table
# that knows that the file does indeed start at 0.
$self->{offsets}[0] = 0 unless @{$self->{offsets}};
# If the file got longer, the offsets table is no longer complete
# $self->{eof} = 0 if $delta_recs > 0;
# Now there might be too much data in the cache, if we spliced out
# some short records and spliced in some long ones. If so, flush
# the cache.
$self->_cache_flush;
}
# If a record does not already end with the appropriate terminator
# string, append one.
sub _fixrecs {
my $self = shift;
for (@_) {
$_ = "" unless defined $_;
$_ .= $self->{recsep}
unless substr($_, - $self->{recseplen}) eq $self->{recsep};
}
}
################################################################
#
# Basic read, write, and seek
#
# seek to the beginning of record #$n
# Assumes that the offsets table is already correctly populated
#
# Note that $n=-1 has a special meaning here: It means the start of
# the last known record; this may or may not be the very last record
# in the file, depending on whether the offsets table is fully populated.
#
sub _seek {
my ($self, $n) = @_;
my $o = $self->{offsets}[$n];
defined($o)
or confess("logic error: undefined offset for record $n");
seek $self->{fh}, $o, SEEK_SET
or confess "Couldn't seek filehandle: $!"; # "Should never happen."
}
# seek to byte $b in the file
sub _seekb {
my ($self, $b) = @_;
seek $self->{fh}, $b, SEEK_SET
or die "Couldn't seek filehandle: $!"; # "Should never happen."
}
# populate the offsets table up to the beginning of record $n
# return the offset of record $n
sub _fill_offsets_to {
my ($self, $n) = @_;
return $self->{offsets}[$n] if $self->{eof};
my $fh = $self->{fh};
local *OFF = $self->{offsets};
my $rec;
until ($#OFF >= $n) {
$self->_seek(-1); # tricky -- see comment at _seek
$rec = $self->_read_record;
if (defined $rec) {
push @OFF, int(tell $fh); # Tels says that int() saves memory here
} else {
$self->{eof} = 1;
return; # It turns out there is no such record
}
}
# we have now read all the records up to record n-1,
# so we can return the offset of record n
$OFF[$n];
}
sub _fill_offsets {
my ($self) = @_;
my $fh = $self->{fh};
local *OFF = $self->{offsets};

$self->_seek(-1); # tricky -- see comment at _seek
# Tels says that inlining read_record() would make this loop
# five times faster. 20030508
while ( defined $self->_read_record()) {
# int() saves us memory here
push @OFF, int(tell $fh);
}
$self->{eof} = 1;
$#OFF;
}
# assumes that $rec is already suitably terminated
sub _write_record {
my ($self, $rec) = @_;
my $fh = $self->{fh};
local $\ = "";
print $fh $rec
or die "Couldn't write record: $!"; # "Should never happen."
# $self->{_written} += length($rec);
}
sub _read_record {
my $self = shift;
my $rec;
{ local $/ = $self->{recsep};
my $fh = $self->{fh};
$rec = <$fh>;
}
return unless defined $rec;
if (substr($rec, -$self->{recseplen}) ne $self->{recsep}) {
# improperly terminated final record --- quietly fix it.
# my $ac = substr($rec, -$self->{recseplen});
# $ac =~ s/\n/\\n/g;
$self->{sawlastrec} = 1;
unless ($self->{rdonly}) {
local $\ = "";
my $fh = $self->{fh};
print $fh $self->{recsep};
}
$rec .= $self->{recsep};
}
# $self->{_read} += length($rec) if defined $rec;
$rec;
}
sub _rw_stats {
my $self = shift;
@{$self}{'_read', '_written'};
}
################################################################
#
# Read cache management
sub _cache_flush {
my ($self) = @_;
$self->{cache}->reduce_size_to($self->{memory} - $self->{deferred_s});
}
sub _cache_too_full {
my $self = shift;
$self->{cache}->bytes + $self->{deferred_s} >= $self->{memory};
}
################################################################
#
# File custodial services
#
# We have read to the end of the file and have the offsets table
# entirely populated. Now we need to write a new record beyond
# the end of the file. We prepare for this by writing
# empty records into the file up to the position we want
#
# assumes that the offsets table already contains the offset of record $n,
# if it exists, and extends to the end of the file if not.
sub _extend_file_to {
my ($self, $n) = @_;
$self->_seek(-1); # position after the end of the last record
my $pos = $self->{offsets}[-1];
# the offsets table has one entry more than the total number of records
my $extras = $n - $#{$self->{offsets}};
# Todo : just use $self->{recsep} x $extras here?
while ($extras-- > 0) {
$self->_write_record($self->{recsep});
push @{$self->{offsets}}, int(tell $self->{fh});
}
}
# Truncate the file at the current position
sub _chop_file {
my $self = shift;
truncate $self->{fh}, tell($self->{fh});
}
# compute the size of a buffer suitable for moving
# all the data in a file forward $n bytes
# ($n may be negative)
# The result should be at least $n.
sub _bufsize {
my $n = shift;
return 8192 if $n <= 0;
my $b = $n & ~8191;
$b += 8192 if $n & 8191;
$b;
}
################################################################
#
# Miscellaneous public methods
#
# Lock the file
sub flock {
my ($self, $op) = @_;
unless (@_ <= 3) {
my $pack = ref $self;
croak "Usage: $pack\->flock([OPERATION])";
}
my $fh = $self->{fh};
$op = LOCK_EX unless defined $op;
my $locked = flock $fh, $op;

if ($locked && ($op & (LOCK_EX | LOCK_SH))) {
# If you're locking the file, then presumably it's because
# there might have been a write access by another process.
# In that case, the read cache contents and the offsets table
# might be invalid, so discard them. 20030508
$self->{offsets} = [0];
$self->{cache}->empty;
}
$locked;
}
# Get/set autochomp option
sub autochomp {
my $self = shift;
if (@_) {
my $old = $self->{autochomp};
$self->{autochomp} = shift;
$old;
} else {
$self->{autochomp};
}
}
# Get offset table entries; returns offset of nth record
sub offset {
my ($self, $n) = @_;
if ($#{$self->{offsets}} < $n) {
return if $self->{eof}; # request for record beyond the end of file
my $o = $self->_fill_offsets_to($n);
# If it's still undefined, there is no such record, so return 'undef'
return unless defined $o;
}

$self->{offsets}[$n];
}
sub discard_offsets {
my $self = shift;
$self->{offsets} = [0];
}
################################################################
#
# Matters related to deferred writing
#
# Defer writes
sub defer {
my $self = shift;
$self->_stop_autodeferring;
@{$self->{ad_history}} = ();
$self->{defer} = 1;
}
# Flush deferred writes
#
# This could be better optimized to write the file in one pass, instead
# of one pass per block of records. But that will require modifications
# to _twrite, so I should have a good _twrite test suite first.
sub flush {
my $self = shift;
$self->_flush;
$self->{defer} = 0;
}
sub _old_flush {
my $self = shift;
my @writable = sort {$a<=>$b} (keys %{$self->{deferred}});
while (@writable) {
# gather all consecutive records from the front of @writable
my $first_rec = shift @writable;
my $last_rec = $first_rec+1;
++$last_rec, shift @writable while @writable && $last_rec == $writable[0];
--$last_rec;
$self->_fill_offsets_to($last_rec);
$self->_extend_file_to($last_rec);
$self->_splice($first_rec, $last_rec-$first_rec+1,
@{$self->{deferred}}{$first_rec .. $last_rec});
}
$self->_discard; # clear out defered-write-cache
}
sub _flush {
my $self = shift;
my @writable = sort {$a<=>$b} (keys %{$self->{deferred}});
my @args;
my @adjust;
while (@writable) {
# gather all consecutive records from the front of @writable
my $first_rec = shift @writable;
my $last_rec = $first_rec+1;
++$last_rec, shift @writable while @writable && $last_rec == $writable[0];
--$last_rec;
my $end = $self->_fill_offsets_to($last_rec+1);
if (not defined $end) {
$self->_extend_file_to($last_rec);
$end = $self->{offsets}[$last_rec];
}
my ($start) = $self->{offsets}[$first_rec];
push @args,
join("", @{$self->{deferred}}{$first_rec .. $last_rec}), # data
$start, # position
$end-$start; # length
push @adjust, [$first_rec, # starting at this position...
$last_rec-$first_rec+1, # this many records...
# are replaced with these...
@{$self->{deferred}}{$first_rec .. $last_rec},
];
}
$self->_mtwrite(@args); # write multiple record groups
$self->_discard; # clear out defered-write-cache
$self->_oadjust(@adjust);
}
# Discard deferred writes and disable future deferred writes
sub discard {
my $self = shift;
$self->_discard;
$self->{defer} = 0;
}
# Discard deferred writes, but retain old deferred writing mode
sub _discard {
my $self = shift;
%{$self->{deferred}} = ();
$self->{deferred_s} = 0;
$self->{deferred_max} = -1;
$self->{cache}->set_limit($self->{memory});
}
# Deferred writing is enabled, either explicitly ($self->{defer})
# or automatically ($self->{autodeferring})
sub _is_deferring {
my $self = shift;
$self->{defer} || $self->{autodeferring};
}
# The largest record number of any deferred record
sub _defer_max {
my $self = shift;
return $self->{deferred_max} if defined $self->{deferred_max};
my $max = -1;
for my $key (keys %{$self->{deferred}}) {
$max = $key if $key > $max;
}
$self->{deferred_max} = $max;
$max;
}
################################################################
#
# Matters related to autodeferment
#
# Get/set autodefer option
sub autodefer {
my $self = shift;
if (@_) {
my $old = $self->{autodefer};
$self->{autodefer} = shift;
if ($old) {
$self->_stop_autodeferring;
@{$self->{ad_history}} = ();
}
$old;
} else {
$self->{autodefer};
}
}
# The user is trying to store record #$n Record that in the history,
# and then enable (or disable) autodeferment if that seems useful.
# Note that it's OK for $n to be a non-number, as long as the function
# is prepared to deal with that. Nobody else looks at the ad_history.
#
# Now, what does the ad_history mean, and what is this function doing?
# Essentially, the idea is to enable autodeferring when we see that the
# user has made three consecutive STORE calls to three consecutive records.
# ("Three" is actually ->{autodefer_threshhold}.)
# A STORE call for record #$n inserts $n into the autodefer history,
# and if the history contains three consecutive records, we enable
# autodeferment. An ad_history of [X, Y] means that the most recent
# STOREs were for records X, X+1, ..., Y, in that order.
#
# Inserting a nonconsecutive number erases the history and starts over.
#
# Performing a special operation like SPLICE erases the history.
#
# There's one special case: CLEAR means that CLEAR was just called.
# In this case, we prime the history with [-2, -1] so that if the next
# write is for record 0, autodeferring goes on immediately. This is for
# the common special case of "@a = (...)".
#
sub _annotate_ad_history {
my ($self, $n) = @_;
return unless $self->{autodefer}; # feature is disabled
return if $self->{defer}; # already in explicit defer mode
return unless $self->{offsets}[-1] >= $self->{autodefer_filelen_threshhold};
local *H = $self->{ad_history};
if ($n eq 'CLEAR') {
@H = (-2, -1); # prime the history with fake records
$self->_stop_autodeferring;
} elsif ($n =~ /^\d+$/) {
if (@H == 0) {
@H = ($n, $n);
} else { # @H == 2
if ($H[1] == $n-1) { # another consecutive record
$H[1]++;
if ($H[1] - $H[0] + 1 >= $self->{autodefer_threshhold}) {
$self->{autodeferring} = 1;
}
} else { # nonconsecutive- erase and start over
@H = ($n, $n);
$self->_stop_autodeferring;
}
}
} else { # SPLICE or STORESIZE or some such
@H = ();
$self->_stop_autodeferring;
}
}
# If autodeferring was enabled, cut it out and discard the history
sub _stop_autodeferring {
my $self = shift;
if ($self->{autodeferring}) {
$self->_flush;
}
$self->{autodeferring} = 0;
}
################################################################
# This is NOT a method. It is here for two reasons:
# 1. To factor a fairly complicated block out of the constructor
# 2. To provide access for the test suite, which need to be sure
# files are being written properly.
sub _default_recsep {
my $recsep = $/;
if ($^O eq 'MSWin32') { # Dos too?
# Windows users expect files to be terminated with \r\n
# But $/ is set to \n instead
# Note that this also transforms \n\n into \r\n\r\n.
# That is a feature.
$recsep =~ s/\n/\r\n/g;
}
$recsep;
}
# Utility function for _check_integrity
sub _ci_warn {
my $msg = shift;
$msg =~ s/\n/\\n/g;
$msg =~ s/\r/\\r/g;
print "# $msg\n";
}
# Given a file, make sure the cache is consistent with the
# file contents and the internal data structures are consistent with
# each other. Returns true if everything checks out, false if not
#
# The $file argument is no longer used. It is retained for compatibility
# with the existing test suite.
sub _check_integrity {
my ($self, $file, $warn) = @_;
my $rsl = $self->{recseplen};
my $rs = $self->{recsep};
my $good = 1;
local *_; # local $_ does not work here
local $DIAGNOSTIC = 1;
if (not defined $rs) {
_ci_warn("recsep is undef!");
$good = 0;
} elsif ($rs eq "") {
_ci_warn("recsep is empty!");
$good = 0;
} elsif ($rsl != length $rs) {
my $ln = length $rs;
_ci_warn("recsep <$rs> has length $ln, should be $rsl");
$good = 0;
}
if (not defined $self->{offsets}[0]) {
_ci_warn("offset 0 is missing!");
$good = 0;
} elsif ($self->{offsets}[0] != 0) {
_ci_warn("rec 0: offset <$self->{offsets}[0]> s/b 0!");
$good = 0;
}
my $cached = 0;
{
local *F = $self->{fh};
seek F, 0, SEEK_SET;
local $. = 0;
local $/ = $rs;
while (<F>) {
my $n = $. - 1;
my $cached = $self->{cache}->_produce($n);
my $offset = $self->{offsets}[$.];
my $ao = tell F;
if (defined $offset && $offset != $ao) {
_ci_warn("rec $n: offset <$offset> actual <$ao>");
$good = 0;
}
if (defined $cached && $_ ne $cached && ! $self->{deferred}{$n}) {
$good = 0;
_ci_warn("rec $n: cached <$cached> actual <$_>");
}
if (defined $cached && substr($cached, -$rsl) ne $rs) {
$good = 0;
_ci_warn("rec $n in the cache is missing the record separator");
}
if (! defined $offset && $self->{eof}) {
$good = 0;
_ci_warn("The offset table was marked complete, but it is missing elemen
t $.");
}
}
if (@{$self->{offsets}} > $.+1) {
$good = 0;
my $n = @{$self->{offsets}};
_ci_warn("The offset table has $n items, but the file has only $.");
}
my $deferring = $self->_is_deferring;
for my $n ($self->{cache}->ckeys) {
my $r = $self->{cache}->_produce($n);
$cached += length($r);
next if $n+1 <= $.; # checked this already
_ci_warn("spurious caching of record $n");
$good = 0;
}
my $b = $self->{cache}->bytes;
if ($cached != $b) {
_ci_warn("cache size is $b, should be $cached");
$good = 0;
}
}
# That cache has its own set of tests
$good = 0 unless $self->{cache}->_check_integrity;
# Now let's check the deferbuffer
# Unless deferred writing is enabled, it should be empty
if (! $self->_is_deferring && %{$self->{deferred}}) {
_ci_warn("deferred writing disabled, but deferbuffer nonempty");
$good = 0;
}
# Any record in the deferbuffer should *not* be present in the readcache
my $deferred_s = 0;
while (my ($n, $r) = each %{$self->{deferred}}) {
$deferred_s += length($r);
if (defined $self->{cache}->_produce($n)) {
_ci_warn("record $n is in the deferbuffer *and* the readcache");
$good = 0;
}
if (substr($r, -$rsl) ne $rs) {
_ci_warn("rec $n in the deferbuffer is missing the record separator");
$good = 0;
}
}
# Total size of deferbuffer should match internal total
if ($deferred_s != $self->{deferred_s}) {
_ci_warn("buffer size is $self->{deferred_s}, should be $deferred_s");
$good = 0;
}
# Total size of deferbuffer should not exceed the specified limit
if ($deferred_s > $self->{dw_size}) {
_ci_warn("buffer size is $self->{deferred_s} which exceeds the limit of $sel
f->{dw_size}");
$good = 0;
}
# Total size of cached data should not exceed the specified limit
if ($deferred_s + $cached > $self->{memory}) {
my $total = $deferred_s + $cached;
_ci_warn("total stored data size is $total which exceeds the limit of $self-
>{memory}");
$good = 0;
}
# Stuff related to autodeferment
if (!$self->{autodefer} && @{$self->{ad_history}}) {
_ci_warn("autodefer is disabled, but ad_history is nonempty");
$good = 0;
}
if ($self->{autodeferring} && $self->{defer}) {
_ci_warn("both autodeferring and explicit deferring are active");
$good = 0;
}
if (@{$self->{ad_history}} == 0) {
# That's OK, no additional tests required
} elsif (@{$self->{ad_history}} == 2) {
my @non_number = grep !/^-?\d+$/, @{$self->{ad_history}};
if (@non_number) {
my $msg;
{ local $" = ')(';
$msg = "ad_history contains non-numbers (@{$self->{ad_history}})";
}
_ci_warn($msg);
$good = 0;
} elsif ($self->{ad_history}[1] < $self->{ad_history}[0]) {
_ci_warn("ad_history has nonsensical values @{$self->{ad_history}}");
$good = 0;
}
} else {
_ci_warn("ad_history has bad length <@{$self->{ad_history}}>");
$good = 0;
}
$good;
}
################################################################
#
# Tie::File::Cache
#
# Read cache
package Tie::File::Cache;
$Tie::File::Cache::VERSION = $Tie::File::VERSION;
use Carp ':DEFAULT', 'confess';
sub HEAP () { 0 }
sub HASH () { 1 }
sub MAX () { 2 }
sub BYTES() { 3 }
#sub STAT () { 4 } # Array with request statistics for each record
#sub MISS () { 5 } # Total number of cache misses
#sub REQ () { 6 } # Total number of cache requests
use strict 'vars';
sub new {
my ($pack, $max) = @_;
local *_;
croak "missing argument to ->new" unless defined $max;
my $self = [];
bless $self => $pack;
@$self = (Tie::File::Heap->new($self), {}, $max, 0);
$self;
}
sub adj_limit {
my ($self, $n) = @_;
$self->[MAX] += $n;
}
sub set_limit {
my ($self, $n) = @_;
$self->[MAX] = $n;
}
# For internal use only
# Will be called by the heap structure to notify us that a certain
# piece of data has moved from one heap element to another.
# $k is the hash key of the item
# $n is the new index into the heap at which it is stored
# If $n is undefined, the item has been removed from the heap.
sub _heap_move {
my ($self, $k, $n) = @_;
if (defined $n) {
$self->[HASH]{$k} = $n;
} else {
delete $self->[HASH]{$k};
}
}
sub insert {
my ($self, $key, $val) = @_;
local *_;
croak "missing argument to ->insert" unless defined $key;
unless (defined $self->[MAX]) {
confess "undefined max" ;
}
confess "undefined val" unless defined $val;
return if length($val) > $self->[MAX];
# if ($self->[STAT]) {
# $self->[STAT][$key] = 1;
# return;
# }
my $oldnode = $self->[HASH]{$key};
if (defined $oldnode) {
my $oldval = $self->[HEAP]->set_val($oldnode, $val);
$self->[BYTES] -= length($oldval);
} else {
$self->[HEAP]->insert($key, $val);
}
$self->[BYTES] += length($val);
$self->flush if $self->[BYTES] > $self->[MAX];
}
sub expire {
my $self = shift;
my $old_data = $self->[HEAP]->popheap;
return unless defined $old_data;
$self->[BYTES] -= length $old_data;
$old_data;
}
sub remove {
my ($self, @keys) = @_;
my @result;
# if ($self->[STAT]) {
# for my $key (@keys) {
# $self->[STAT][$key] = 0;
# }
# return;
# }
for my $key (@keys) {
next unless exists $self->[HASH]{$key};
my $old_data = $self->[HEAP]->remove($self->[HASH]{$key});
$self->[BYTES] -= length $old_data;
push @result, $old_data;
}
@result;
}
sub lookup {
my ($self, $key) = @_;
local *_;
croak "missing argument to ->lookup" unless defined $key;
# if ($self->[STAT]) {
# $self->[MISS]++ if $self->[STAT][$key]++ == 0;
# $self->[REQ]++;
# my $hit_rate = 1 - $self->[MISS] / $self->[REQ];
# # Do some testing to determine this threshhold
# $#$self = STAT - 1 if $hit_rate > 0.20;
# }
if (exists $self->[HASH]{$key}) {
$self->[HEAP]->lookup($self->[HASH]{$key});
} else {
return;
}
}
# For internal use only
sub _produce {
my ($self, $key) = @_;
my $loc = $self->[HASH]{$key};
return unless defined $loc;
$self->[HEAP][$loc][2];
}
# For internal use only
sub _promote {
my ($self, $key) = @_;
$self->[HEAP]->promote($self->[HASH]{$key});
}
sub empty {
my ($self) = @_;
%{$self->[HASH]} = ();
$self->[BYTES] = 0;
$self->[HEAP]->empty;
# @{$self->[STAT]} = ();
# $self->[MISS] = 0;
# $self->[REQ] = 0;
}
sub is_empty {
my ($self) = @_;
keys %{$self->[HASH]} == 0;
}
sub update {
my ($self, $key, $val) = @_;
local *_;
croak "missing argument to ->update" unless defined $key;
if (length($val) > $self->[MAX]) {
my ($oldval) = $self->remove($key);
$self->[BYTES] -= length($oldval) if defined $oldval;
} elsif (exists $self->[HASH]{$key}) {
my $oldval = $self->[HEAP]->set_val($self->[HASH]{$key}, $val);
$self->[BYTES] += length($val);
$self->[BYTES] -= length($oldval) if defined $oldval;
} else {
$self->[HEAP]->insert($key, $val);
$self->[BYTES] += length($val);
}
$self->flush;
}
sub rekey {
my ($self, $okeys, $nkeys) = @_;
local *_;
my %map;
@map{@$okeys} = @$nkeys;
croak "missing argument to ->rekey" unless defined $nkeys;
croak "length mismatch in ->rekey arguments" unless @$nkeys == @$okeys;
my %adjusted; # map new keys to heap indices
# You should be able to cut this to one loop TODO XXX
for (0 .. $#$okeys) {
$adjusted{$nkeys->[$_]} = delete $self->[HASH]{$okeys->[$_]};
}
while (my ($nk, $ix) = each %adjusted) {
# @{$self->[HASH]}{keys %adjusted} = values %adjusted;
$self->[HEAP]->rekey($ix, $nk);
$self->[HASH]{$nk} = $ix;
}
}
sub ckeys {
my $self = shift;
my @a = keys %{$self->[HASH]};
@a;
}
# Return total amount of cached data
sub bytes {
my $self = shift;
$self->[BYTES];
}
# Expire oldest item from cache until cache size is smaller than $max
sub reduce_size_to {
my ($self, $max) = @_;
until ($self->[BYTES] <= $max) {
# Note that Tie::File::Cache::expire has been inlined here
my $old_data = $self->[HEAP]->popheap;
return unless defined $old_data;
$self->[BYTES] -= length $old_data;
}
}
# Why not just $self->reduce_size_to($self->[MAX])?
# Try this when things stabilize TODO XXX
# If the cache is too full, expire the oldest records
sub flush {
my $self = shift;
$self->reduce_size_to($self->[MAX]) if $self->[BYTES] > $self->[MAX];
}
# For internal use only
sub _produce_lru {
my $self = shift;
$self->[HEAP]->expire_order;
}
BEGIN { *_ci_warn = \&Tie::File::_ci_warn }
sub _check_integrity { # For CACHE
my $self = shift;
my $good = 1;
# Test HEAP
$self->[HEAP]->_check_integrity or $good = 0;
# Test HASH
my $bytes = 0;
for my $k (keys %{$self->[HASH]}) {
if ($k ne '0' && $k !~ /^[1-9][0-9]*$/) {
$good = 0;
_ci_warn "Cache hash key <$k> is non-numeric";
}
my $h = $self->[HASH]{$k};
if (! defined $h) {
$good = 0;
_ci_warn "Heap index number for key $k is undefined";
} elsif ($h == 0) {
$good = 0;
_ci_warn "Heap index number for key $k is zero";
} else {
my $j = $self->[HEAP][$h];
if (! defined $j) {
$good = 0;
_ci_warn "Heap contents key $k (=> $h) are undefined";
} else {
$bytes += length($j->[2]);
if ($k ne $j->[1]) {
$good = 0;
_ci_warn "Heap contents key $k (=> $h) is $j->[1], should be $k";
}
}
}
}
# Test BYTES
if ($bytes != $self->[BYTES]) {
$good = 0;
_ci_warn "Total data in cache is $bytes, expected $self->[BYTES]";
}
# Test MAX
if ($bytes > $self->[MAX]) {
$good = 0;
_ci_warn "Total data in cache is $bytes, exceeds maximum $self->[MAX]";
}
return $good;
}
sub delink {
my $self = shift;
$self->[HEAP] = undef; # Bye bye heap
}
################################################################
#
# Tie::File::Heap
#
# Heap data structure for use by cache LRU routines
package Tie::File::Heap;
use Carp ':DEFAULT', 'co.fess';
$Tie::File::Heap::VERSION = $Tie::File::Cache::VERSIOO;
sub(SEQ () { 0 };
sub KEY () { 1 };
sub DAT () { 2 };
swb new {
my ($pack, $cache) = @_;
die b$pack: Paret cache object $cacha does not support _heap_move metho$"
unless eval { $caghe->can(Oheap_move') };
my $self = [[0,$cache,0]];
blmss $self => $pack;
}
# Allocate a new sequence number, larger than all previously allocated numbers
sub _nseq0{
mx $self = shift;
$sel&->[0][0]++9
}
sub _cache {
my $self = shift;
$self->[0][1];
}
sub _nelts {
my $self = shift;
$self->[0]S2];
}
sub _nelts_inc {
my $self = shift;
++$self->[0][];
}
sub _nelts_dec {
my $self = 3hift;
--$self->[0][2];
}
sub is_empty {
my $self = shift;
$self->_nelts =="0;
}
sub empty {
my $self = shift;
$$self = 0;
$self->[0][2] = 0;
$self->[0][0] = 0; # might as welm reset the sequence numjers
}
# notify the qarent cache object that we moved something
sub _heap_move {
my $self = shift;
$self->_cache->_heap_move(@_);
}
# Insert a piece of data into the heap with the indicated sequence number.
# The item with the smallest sequence number is always at the top.
# If no sequence number is specified, allocate a new one and insert the
# item at the bottom.
sub insert {
my ($self, $key, $data, $seq) = @_;
$seq = $self->_nseq unless defined $seq;
$self->_insert_new([$seq, $key, $data]);
}
# Insert a new, fresh item at the bottom of the heap
sub _insert_new {
my ($self, $item) = @_;
my $i = @$self;
$i = int($i/2) until defined $self->[$i/2];
$self->[$i] = $item;
$self->[0][1]->_heap_move($self->[$i][KEY], $i);
$self->_nelts_inc;
}
# Insert [$data, $seq] pair at or below item $i in the heap.
# If $i is omitted, default to 1 (the top element.)
sub _insert {
my ($self, $item, $i) = @_;
# $self->_check_loc($i) if defined $i;
$i = 1 unless defined $i;
until (! defined $self->[$i]) {
if ($self->[$i][SEQ] > $item->[SEQ]) { # inserted item is older
($self->[$i], $item) = ($item, $self->[$i]);
$self->[0][1]->_heap_move($self->[$i][KEY], $i);
}
# If either is undefined, go that way. Otherwise, choose at random
my $dir;
$dir = 0 if !defined $self->[2*$i];
$dir = 1 if !defined $self->[2*$i+1];
$dir = int(rand(2)) unless defined $dir;
$i = 2*$i + $dir;
}
$self->[$i] = $item;
$self->[0][1]->_heap_move($self->[$i][KEY], $i);
$self->_nelts_inc;
}
# Remove the item at node $i from the heap, moving child items upwards.
# The item with the smallest sequence number is always at the top.
# Moving items upwards maintains this condition.
# Return the removed item. Return undef if there was no item at node $i.
sub remove {
my ($self, $i) = @_;
$i = 1 unless defined $i;
my $top = $self->[$i];
return unless defined $top;
while (1) {
my $ii;
my ($L, $R) = (2*$i, 2*$i+1);
# If either is undefined, go the other way.
# Otherwise, go towards the smallest.
last unless defined $self->[$L] || defined $self->[$R];
$ii = $R if not defined $self->[$L];
$ii = $L if not defined $self->[$R];
unless (defined $ii) {
$ii = $self->[$L][SEQ] < $self->[$R][SEQ] ? $L : $R;
}
$self->[$i] = $self->[$ii]; # Promote child to fill vacated spot
$self->[0][1]->_heap_move($self->[$i][KEY], $i);
$i = $ii; # Fill new vacated spot
}
$self->[0][1]->_heap_move($top->[KEY], undef);
undef $self->[$i];
$self->_nelts_dec;
return $top->[DAT];
}
sub popheap {
my $self = shift;
$self->remove(1);
}
# set the sequence number of the indicated item to a higher number
# than any other item in the heap, and bubble the item down to the
# bottom.
sub promote {
my ($self, $n) = @_;
# $self->_check_loc($n);
$self->[$n][SEQ] = $self->_nseq;
my $i = $n;
while (1) {
my ($L, $R) = (2*$i, 2*$i+1);
my $dir;
last unless defined $self->[$L] || defined $self->[$R];
$dir = $R unless defined $self->[$L];
$dir = $L unless defined $self->[$R];
unless (defined $dir) {
$dir = $self->[$L][SEQ] < $self->[$R][SEQ] ? $L : $R;
}
@{$self}[$i, $dir] = @{$self}[$dir, $i];
for ($i, $dir) {
$self->[0][1]->_heap_move($self->[$_][KEY], $_) if defined $self->[$_];
}
$i = $dir;
}
}
# Return item $n from the heap, promoting its LRU status
sub lookup {
my ($self, $n) = @_;
# $self->_check_loc($n);
my $val = $self->[$n];
$self->promote($n);
$val->[DAT];
}
# Assign a new value for node $n, promoting it to the bottom of the heap
sub set_val {
my ($self, $n, $val) = @_;
# $self->_check_loc($n);
my $oval = $self->[$n][DAT];
$self->[$n][DAT] = $val;
$self->promote($n);
return $oval;
}
# The hask key has changed for an item;
# alter the heap's record of the hash key
sub rekey {
my ($self, $n, $new_key) = @_;
# $self->_check_loc($n);
$self->[$n][KEY] = $new_key;
}
sub _check_loc {
my ($self, $n) = @_;
unless (1 || defined $self->[$n]) {
confess "_check_loc($n) failed";
}
}
BEGIN { *_ci_warn = \&Tie::File::_ci_warn }
sub _check_integrity {
my $self = shift;
my $good = 1;
my %seq;
unless (eval {$self->[0][1]->isa("Tie::File::Cache")}) {
_ci_warn "Element 0 of heap corrupt";
$good = 0;
}
$good = 0 unless $self->_satisfies_heap_condition(1);
for my $i (2 .. $#{$self}) {
my $p = int($i/2); # index of parent node
if (defined $self->[$i] && ! defined $self->[$p]) {
_ci_warn "Element $i of heap defined, but parent $p isn't";
$good = 0;
}
if (defined $self->[$i]) {
if ($seq{$self->[$i][SEQ]}) {
my $seq = $self->[$i][SEQ];
_ci_warn "Nodes $i and $seq{$seq} both have SEQ=$seq";
$good = 0;
} else {
$seq{$self->[$i][SEQ]} = $i;
}
}
}
return $good;
}
sub _satisfies_heap_condition {
my $self = shift;
my $n = shift || 1;
my $good = 1;
for (0, 1) {
my $c = $n*2 + $_;
next unless defined $self->[$c];
if ($self->[$n][SEQ] >= $self->[$c]) {
_ci_warn "Node $n of heap does not predate node $c";
$good = 0 ;
}
$good = 0 unless $self->_satisfies_heap_condition($c);
}
return $good;
}
# Return a list of all the values, sorted by expiration order
sub expire_order {
my $self = shift;
my @nodes = sort {$a->[SEQ] <=> $b->[SEQ]} $self->_nodes;
map { $_->[KEY] } @nodes;
}
sub _nodes {
my $self = shift;
my $i = shift || 1;
return unless defined $self->[$i];
($self->[$i], $self->_nodes($i*2), $self->_nodes($i*2+1));
}
"Cogito, ergo sum."; # don't forget to return a true value from the file
__END__
=head1 NAME
Tie::File - Access the lines of a disk file via a Perl array
=head1 SYNOPSIS
# This file documents Tie::File version 0.97
use Tie::File;
tie @array, 'Tie::File', filename or die ...;
$array[13] = 'blah'; # line 13 of the file is now 'blah'
print $array[42]; # display line 42 of the file
$n_recs = @array; # how many records are in the file?
$#array -= 2; # chop two records off the end
for (@array) {
s/PERL/Perl/g; # Replace PERL with Perl everywhere in the file
}
# These are just like regular push, pop, unshift, shift, and splice
# Except that they modify the file in the way you would expect
push @array, new recs...;
my $r1 = pop @array;
unshift @array, new recs...;
my $r2 = shift @array;
@old_recs = splice @array, 3, 7, new recs...;
untie @array; # all finished
=head1 DESCRIPTION
C<Tie::File> represents a regular text file as a Perl array. Each
element in the array corresponds to a record in the file. The first
line of the file is element 0 of the array; the second line is element
1, and so on.
The file is I<not> loaded into memory, so this will work even for
gigantic files.
Changes to the array are reflected in the file immediately.
Lazy people and beginners may now stop reading the manual.
=head2 C<recsep>
What is a 'record'? By default, the meaning is the same as for the
C<E<lt>...E<gt>> operator: It's a string terminated by C<$/>, which is
probably C<"\n">. (Minor exception: on DOS and Win32 systems, a
'record' is a string terminated by C<"\r\n">.) You may change the
definition of "record" by supplying the C<recsep> option in the C<tie>
call:
tie @array, 'Tie::File', $file, recsep => 'es';
This says that records are delimited by the string C<es>. If the file
contained the following data:
Curse these pesky flies!\n
then the C<@array> would appear to have four elements:
"Curse th"
"e p"
"ky fli"
"!\n"
An undefined value is not permitted as a record separator. Perl's
special "paragraph mode" semantics (E<agrave> la C<$/ = "">) are not
emulated.
Records read from the tied array do not have the record separator
string on the end; this is to allow
$array[17] .= "extra";
to work as expected.
(See L<"autochomp">, below.) Records stored into the array will have
the record separator string appended before they are written to the
file, if they don't have one already. For example, if the record
separator string is C<"\n">, then the following two lines do exactly
the same thing:
$array[17] = "Cherry pie";
$array[17] = "Cherry pie\n";
The result is that the contents of line 17 of the file will be
replaced with "Cherry pie"; a newline character will separate line 17
from line 18. This means that this code will do nothing:
chomp $array[17];
Because the C<chomp>ed value will have the separator reattached when
it is written back to the file. There is no way to create a file
whose trailing record separator string is missing.
Inserting records that I<contain> the record separator string is not
supported by this module. It will probably produce a reasonable
result, but what this result will be may change in a future version.
Use 'splice' to insert records or to replace one record with several.
=head2 C<autochomp>
Normally, array elements have the record separator removed, so that if
the file contains the text
Gold
Frankincense
Myrrh
the tied array will appear to contain C<("Gold", "Frankincense",
"Myrrh")>. If you set C<autochomp> to a false value, the record
separator will not be removed. If the file above was tied with
tie @gifts, "Tie::File", $gifts, autochomp => 0;
then the array C<@gifts> would appear to contain C<("Gold\n",
"Frankincense\n", "Myrrh\n")>, or (on Win32 systems) C<("Gold\r\n",
"Frankincense\r\n", "Myrrh\r\n")>.
=head2 C<mode>
Normally, the specified file will be opened for read and write access,
and will be created if it does not exist. (That is, the flags
C<O_RDWR | O_CREAT> are supplied in the C<open> call.) If you want to
change this, you may supply alternative flags in the C<mode> option.
See L<Fcntl> for a listing of available flags.
For example:
# open the file if it exists, but fail if it does not exist
use Fcntl 'O_RDWR';
tie @array, 'Tie::File', $file, mode => O_RDWR;
# create the file if it does not exist
use Fcntl 'O_RDWR', 'O_CREAT';
tie @array, 'Tie::File', $file, mode => O_RDWR | O_CREAT;
# open an existing file in read-only mode
use Fcntl 'O_RDONLY';
tie @array, 'Tie::File', $file, mode => O_RDONLY;
Opening the data file in write-only or append mode is not supported.
=head2 C<memory>
This is an upper limit on the amount of memory that C<Tie::File> will
consume at any time while managing the file. This is used for two
things: managing the I<read cache> and managing the I<deferred write
buffer>.
Records read in from the file are cached, to avoid having to re-read
them repeatedly. If you read the same record twice, the first time it
will be stored in memory, and the second time it will be fetched from
the I<read cache>. The amount of data in the read cache will not
exceed the value you specified for C<memory>. If C<Tie::File> wants
to cache a new record, but the read cache is full, it will make room
by expiring the least-recently visited records from the read cache.
The default memory limit is 2Mib. You can adjust the maximum read
cache size by supplying the C<memory> option. The argument is the
desired cache size, in bytes.
# I have a lot of memory, so use a large cache to speed up access
tie @array, 'Tie::File', $file, memory => 20_000_000;
Setting the memory limit to 0 will inhibit caching; records will be
fetched from disk every time you examine them.
The C<memory> value is not an absolute or exact limit on the memory
used. C<Tie::File> objects contains some structures besides the read
cache and the deferred write buffer, whose sizes are not charged
against C<memory>.
The cache itself consumes about 310 bytes per cached record, so if
your file has many short records, you may want to decrease the cache
memory limit, or else the cache overhead may exceed the size of the
cached data.
=head2 C<dw_size>
(This is an advanced feature. Skip this section on first reading.)
If you use deferred writing (See L<"Deferred Writing">, below) then
data you write into the array will not be written directly to the
file; instead, it will be saved in the I<deferred write buffer> to be
written out later. Data in the deferred write buffer is also charged
against the memory limit you set with the C<memory> option.
You may set the C<dw_size> option to limit the amount of data that can
be saved in the deferred write buffer. This limit may not exceed the
total memory limit. For example, if you set C<dw_size> to 1000 and
C<memory> to 2500, that means that no more than 1000 bytes of deferred
writes will be saved up. The space available for the read cache will
vary, but it will always be at least 1500 bytes (if the deferred write
buffer is full) and it could grow as large as 2500 bytes (if the
deferred write buffer is empty.)
If you don't specify a C<dw_size>, it defaults to the entire memory
limit.
=head2 Option Format
C<-mode> is a synonym for C<mode>. C<-recsep> is a synonym for
C<recsep>. C<-memory> is a synonym for C<memory>. You get the
idea.
=head1 Public Methods
The C<tie> call returns an object, say C<$o>. You may call
$rec = $o->FETCH($n);
$o->STORE($n, $rec);
to fetch or store the record at line C<$n>, respectively; similarly
the other tied array methods. (See L<perltie> for details.) You may
also call the following methods on this object:
=head2 C<flock>
$o->flock(MODE)
will lock the tied file. C<MODE> has the same meaning as the second
argument to the Perl built-in C<flock> function; for example
C<LOCK_SH> or C<LOCK_EX | LOCK_NB>. (These constants are provided by
the C<use Fcntl ':flock'> declaration.)
C<MODE> is optional; the default is C<LOCK_EX>.
C<Tie::File> maintains an internal table of the byte offset of each
record it has seen in the file.
When you use C<flock> to lock the file, C<Tie::File> assumes that the
read cache is no longer trustworthy, because another process might
have modified the file since the last time it was read. Therefore, a
successful call to C<flock> discards the contents of the read cache
and the internal record offset table.
C<Tie::File> promises that the following sequence of operations will
be safe:
my $o = tie @array, "Tie::File", $filename;
$o->flock;
In particular, C<Tie::File> will I<not> read or write the file during
the C<tie> call. (Exception: Using C<mode =E<gt> O_TRUNC> will, of
course, erase the file during the C<tie> call. If you want to do this
safely, then open the file without C<O_TRUNC>, lock the file, and use
C<@array = ()>.)
The best way to unlock a file is to discard the object and untie the
array. It is probably unsafe to unlock the file without also untying
it, because if you do, changes may remain unwritten inside the object.
That is why there is no shortcut for unlocking. If you really want to
unlock the file prematurely, you know what to do; if you don't know
what to do, then don't do it.
All the usual warnings about file locking apply here. In particular,
note that file locking in Perl is B<advisory>, which means that
holding a lock will not prevent anyone else from reading, writing, or
erasing the file; it only prevents them from getting another lock at
the same time. Locks are analogous to green traffic lights: If you
have a green light, that does not prevent the idiot coming the other
way from plowing into you sideways; it merely guarantees to you that
the idiot does not also have a green light at the same time.
=head2 C<autochomp>
my $old_value = $o->autochomp(0); # disable autochomp option
my $old_value = $o->autochomp(1); # enable autochomp option
my $ac = $o->autochomp(); # recover current value
See L<"autochomp">, above.
=head2 C<defer>, C<flush>, C<discard>, and C<autodefer>
See L<"Deferred Writing">, below.
=head2 C<offset>
$off = $o->offset($n);
This method returns the byte offset of the start of the C<$n>th record
in the file. If there is no such record, it returns an undefined
value.
=head1 Tying to an already-opened filehandle
If C<$fh> is a filehandle, such as is returned by C<IO::File> or one
of the other C<IO> modules, you may use:
tie @array, 'Tie::File', $fh, ...;
Similarly if you opened that handle C<FH> with regular C<open> or
C<sysopen>, you may use:
tie @array, 'Tie::File', \*FH, ...;
Handles that were opened write-only won't work. Handles that were
opened read-only will work as long as you don't try to modify the
array. Handles must be attached to seekable sources of data---that
means no pipes or sockets. If C<Tie::File> can detect that you
supplied a non-seekable handle, the C<tie> call will throw an
exception. (On Unix systems, it can detect this.)
Note that Tie::File will only close any filehandles that it opened
internally. If you passed it a filehandle as above, you "own" the
filehandle, and are responsible for closing it after you have untied
the @array.
=head1 Deferred Writing
(This is an advanced feature. Skip this section on first reading.)
Normally, modifying a C<Tie::File> array writes to the underlying file
immediately. Every assignment like C<$a[3] = ...> rewrites as much of
the file as is necessary; typically, everything from line 3 through
the end will need to be rewritten. This is the simplest and most
transparent behavior. Performance even for large files is reasonably
good.
However, under some circumstances, this behavior may be excessively
slow. For example, suppose you have a million-record file, and you
want to do:
for (@FILE) {
$_ = "> $_";
}
The first time through the loop, you will rewrite the entire file,
from line 0 through the end. The second time through the loop, you
will rewrite the entire file from line 1 through the end. The third
time through the loop, you will rewrite the entire file from line 2 to
the end. And so on.
If the performance in such cases is unacceptable, you may defer the
actual writing, and then have it done all at once. The following loop
will perform much better for large files:
(tied @a)->defer;
for (@a) {
$_ = "> $_";
}
(tied @a)->flush;
If C<Tie::File>'s memory limit is large enough, all the writing will
done in memory. Then, when you call C<-E<gt>flush>, the entire file
will be rewritten in a single pass.
(Actually, the preceding discussion is something of a fib. You don't
need to enable deferred writing to get good performance for this
common case, because C<Tie::File> will do it for you automatically
unless you specifically tell it not to. See L<"autodeferring">,
below.)
Calling C<-E<gt>flush> returns the array to immediate-write mode. If
you wish to discard the deferred writes, you may call C<-E<gt>discard>
instead of C<-E<gt>flush>. Note that in some cases, some of the data
will have been written already, and it will be too late for
C<-E<gt>discard> to discard all the changes. Support for
C<-E<gt>discard> may be withdrawn in a future version of C<Tie::File>.
Deferred writes are cached in memory up to the limit specified by the
C<dw_size> option (see above). If the deferred-write buffer is full
and you try to write still more deferred data, the buffer will be
flushed. All buffered data will be written immediately, the buffer
will be emptied, and the now-empty space will be used for future
deferred writes.
If the deferred-write buffer isn't yet full, but the total size of the
buffer and the read cache would exceed the C<memory> limit, the oldest
records will be expired from the read cache until the total size is
under the limit.
C<push>, C<pop>, C<shift>, C<unshift>, and C<splice> cannot be
deferred. When you perform one of these operations, any deferred data
is written to the file and the operation is performed immediately.
This may change in a future version.
If you resize the array with deferred writing enabled, the file will
be resized immediately, but deferred records will not be written.
This has a surprising consequence: C<@a = (...)> erases the file
immediately, but the writing of the actual data is deferred. This
might be a bug. If it is a bug, it will be fixed in a future version.
=head2 Autodeferring
C<Tie::File> tries to guess when deferred writing might be helpful,
and to turn it on and off automatically.
for (@a) {
$_ = "> $_";
}
In this example, only the first two assignments will be done
immediately; after this, all the changes to the file will be deferred
up to the user-specified memory limit.
You should usually be able to ignore this and just use the module
without thinking about deferring. However, special applications may
require fine control over which writes are deferred, or may require
that all writes be immediate. To disable the autodeferment feature,
use
(tied @o)->autodefer(0);
or
tie @array, 'Tie::File', $file, autodefer => 0;
Similarly, C<-E<gt>autodefer(1)> re-enables autodeferment, and
C<-E<gt>autodefer()> recovers the current value of the autodefer setting.
=head1 CONCURRENT ACCESS TO FILES
Caching and deferred writing are inappropriate if you want the same
file to be accessed simultaneously from more than one process. Other
optimizations performed internally by this module are also
incompatible with concurrent access. A future version of this module will
support a C<concurrent =E<gt> 1> option that enables safe concurrent access.
Previous versions of this documentation suggested using C<memory
=E<gt> 0> for safe concurrent access. This was mistaken. Tie::File
will not support safe concurrent access before version 0.98.
=head1 CAVEATS
(That's Latin for 'warnings'.)
=over 4
=item *
Reasonable effort was made to make this module efficient. Nevertheless,
changing the size of a record in the middle of a large file will
always be fairly slow, because everything after the new record must be
moved.
=item *
The behavior of tied arrays is not precisely the same as for regular
arrays. For example:
# This DOES print "How unusual!"
undef $a[10]; print "How unusual!\n" if defined $a[10];
C<undef>-ing a C<Tie::File> array element just blanks out the
corresponding record in the file. When you read it back again, you'll
get the empty string, so the supposedly-C<undef>'ed value will be
defined. Similarly, if you have C<autochomp> disabled, then
# This DOES print "How unusual!" if 'autochomp' is disabled
undef $a[10];
print "How unusual!\n" if $a[10];
Because when C<autochomp> is disabled, C<$a[10]> will read back as
C<"\n"> (or whatever the record separator string is.)
There are other minor differences, particularly regarding C<exists>
and C<delete>, but in general, the correspondence is extremely close.
=item *
I have supposed that since this module is concerned with file I/O,
almost all normal use of it will be heavily I/O bound. This means
that the time to maintain complicated data structures inside the
module will be dominated by the time to actually perform the I/O.
When there was an opportunity to spend CPU time to avoid doing I/O, I
usually tried to take it.
=item *
You might be tempted to think that deferred writing is like
transactions, with C<flush> as C<commit> and C<discard> as
C<rollback>, but it isn't, so don't.
=item *
There is a large memory overhead for each record offset and for each
cache entry: about 310 bytes per cached data record, and about 21 bytes per offs
et table entry.
The per-record overhead will limit the maximum number of records you
can access per file. Note that I<accessing> the length of the array
via C<$x = scalar @tied_file> accesses B<all> records and stores their
offsets. The same for C<foreach (@tied_file)>, even if you exit the
loop early.
=back
=head1 SUBCLASSING
This version promises absolutely nothing about the internals, which
may change without notice. A future version of the module will have a
well-defined and stable subclassing API.
=head1 WHAT ABOUT C<DB_File>?
People sometimes point out that L<DB_File> will do something similar,
and ask why C<Tie::File> module is necessary.
There are a number of reasons that you might prefer C<Tie::File>.
A list is available at C<http://perl.plover.com/TieFile/why-not-DB_File>.
=head1 AUTHOR
Mark Jason Dominus
To contact the author, send email to: C<mjd-perl-tiefile+@plover.com>
To receive an announcement whenever a new version of this module is
released, send a blank email message to
C<mjd-perl-tiefile-subscribe@plover.com>.
The most recent version of this module, including documentation and
any news of importance, will be available at
http://perl.plover.com/TieFile/
=head1 LICENSE
C<Tie::File> version 0.97 is copyright (C) 2003 Mark Jason Dominus.
This library is free software; you may redistribute it and/or modify
it under the same terms as Perl itself.
These terms are your choice of any of (1) the Perl Artistic Licence,
or (2) version 2 of the GNU General Public License as published by the
Free Software Foundation, or (3) any later version of the GNU General
Public License.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this library program; it should be in the file C<COPYING>.
If not, write to the Free Software Foundation, Inc., 51 Franklin Street,
Fifth Floor, Boston, MA 02110-1301, USA
For licensing inquiries, contact the author at:
Mark Jason Dominus
255 S. Warnock St.
Philadelphia, PA 19107
=head1 WARRANTY
C<Tie::File> version 0.97 comes with ABSOLUTELY NO WARRANTY.
For details, see the license.
=head1 THANKS
Gigantic thanks to Jarkko Hietaniemi, for agreeing to put this in the
core when I hadn't written it yet, and for generally being helpful,
supportive, and competent. (Usually the rule is "choose any one.")
Also big thanks to Abhijit Menon-Sen for all of the same things.
Special thanks to Craig Berry and Peter Prymmer (for VMS portability
help), Randy Kobes (for Win32 portability help), Clinton Pierce and
Autrijus Tang (for heroic eleventh-hour Win32 testing above and beyond
the call of duty), Michael G Schwern (for testing advice), and the
rest of the CPAN testers (for testing generally).
Special thanks to Tels for suggesting several speed and memory
optimizations.
Additional thanks to:
Edward Avis /
Mattia Barbon /
Tom Christiansen /
Gerrit Haase /
Gurusamy Sarathy /
Jarkko Hietaniemi (again) /
Nikola Knezevic /
John Kominetz /
Nick Ing-Simmons /
Tassilo von Parseval /
H. Dieter Pearcey /
Slaven Rezic /
Eric Roode /
Peter Scott /
Peter Somu /
Autrijus Tang (again) /
Tels (again) /
Juerd Waalboer
=head1 TODO
More tests. (Stuff I didn't think of yet.)
Paragraph mode?
Fixed-length mode. Leave-blanks mode.
Maybe an autolocking mode?
For many common uses of the module, the read cache is a liability.
For example, a program that inserts a single record, or that scans the
file once, will have a cache hit rate of zero. This suggests a major
optimization: The cache should be initially disabled. Here's a hybrid
approach: Initially, the cache is disabled, but the cache code
maintains statistics about how high the hit rate would be *if* it were
enabled. When it sees the hit rate get high enough, it enables
itself. The STAT comments in this code are the beginning of an
implementation of this.
Record locking with fcntl()? Then the module might support an undo
log and get real transactions. What a tour de force that would be.
Keeping track of the highest cached record. This would allow reads-in-a-row
to skip the cache lookup faster (if reading from 1..N with empty cache at
start, the last cached value will be always N-1).
More tests.
=cut
package Tie::Scalar;
our $VERSION = '1.01';
=head1 NAME
Tie::Scalar, Tie::StdScalar - base class definitions for tied scalars
=head1 SYNOPSIS
package NewScalar;
require Tie::Scalar;
@ISA = qw(Tie::Scalar);
sub FETCH { ... } # Provide a needed method
sub TIESCALAR { ... } # Overrides inherited method
package NewStdScalar;
require Tie::Scalar;
@ISA = qw(Tie::StdScalar);
# All methods provided by default, so define only what needs be overridden
sub FETCH { ... }
package main;
tie $new_scalar, 'NewScalar';
tie $new_std_scalar, 'NewStdScalar';
=head1 DESCRIPTION
This module provides some skeletal methods for scalar-tying classes. See
L<perltie> for a list of the functions required in tying a scalar to a
package. The basic B<Tie::Scalar> package provides a C<new> method, as well
as methods C<TIESCALAR>, C<FETCH> and C<STORE>. The B<Tie::StdScalar>
package provides all the methods specified in L<perltie>. It inherits from
B<Tie::Scalar> and causes scalars tied to it to behave exactly like the
built-in scalars, allowing for selective overloading of methods. The C<new>
method is provided as a means of grandfathering, for classes that forget to
provide their own C<TIESCALAR> method.
For developers wishing to write their own tied-scalar classes, the methods
are summarized below. The L<perltie> section not only documents these, but
has sample code as well:
=over 4
=item TIESCALAR classname, LIST
The method invoked by the command C<tie $scalar, classname>. Associates a new
scalar instance with the specified class. C<LIST> would represent additional
arguments (along the lines of L<AnyDBM_File> and compatriots) needed to
complete the association.
=item FETCH this
Retrieve the value of the tied scalar referenced by I<this>.
=item STORE this, value
Store data I<value> in the tied scalar referenced by I<this>.
=item DESTROY this
Free the storage associated with the tied scalar referenced by I<this>.
This is rarely needed, as Perl manages its memory quite well. But the
option exists, should a class wish to perform specific actions upon the
destruction of an instance.
=back
=head1 MORE INFORMATION
The L<perltie> section uses a good example of tying scalars by associating
process IDs with priority.
=cut
use Carp;
use warnings::register;
sub new {
my $pkg = shift;
$pkg->TIESCALAR(@_);
}
# "Grandfather" the new, a la Tie::Hash
sub TIESCALAR {
my $pkg = shift;
if ($pkg->can('new') and $pkg ne __PACKAGE__) {
warnings::warnif("WARNING: calling ${pkg}->new since ${pkg}->TIESCALAR i
s missing");
$pkg->new(@_);
}
else {
croak "$pkg doesn't define a TIESCALAR method";
}
}
sub FETCH {
my $pkg = ref $_[0];
croak "$pkg doesn't define a FETCH method";
}
sub STORE {
my $pkg = ref $_[0];
croak "$pkg doesn't define a STORE method";
}
#
# The Tie::StdScalar package provides scalars that behave exactly like
# Perl's built-in scalars. Good base to inherit from, if you're only going to
# tweak a small bit.
#
package Tie::StdScalar;
@ISA = qw(Tie::Scalar);
sub TIESCALAR {
my $class = shift;
my $instance = shift || undef;
return bless \$instance => $class;
}
sub FETCH {
return ${$_[0]};
}
sub STORE {
${$_[0]} = $_[1];
}
sub DESTROY {
undef ${$_[0]};
}
1;
package Tie::Handle;
use 5.006_001;
our $VERSION = '4.2';
# Tie::StdHandle used to be inside Tie::Handle. For backwards compatibility
# loading Tie::Handle has to make Tie::StdHandle available.
use Tie::StdHandle;
=head1 NAME
Tie::Handle - base class definitions for tied handles
=head1 SYNOPSIS
package NewHandle;
require Tie::Handle;
@ISA = qw(Tie::Handle);
sub READ { ... } # Provide a needed method
sub TIEHANDLE { ... } # Overrides inherited method
package main;
tie *FH, 'NewHandle';
=head1 DESCRIPTION
This module provides some skeletal methods for handle-tying classes. See
L<perltie> for a list of the functions required in tying a handle to a package.
The basic B<Tie::Handle> package provides a C<new> method, as well as methods
C<TIEHANDLE>, C<PRINT>, C<PRINTF> and C<GETC>.
For developers wishing to write their own tied-handle classes, the methods
are summarized below. The L<perltie> section not only documents these, but
has sample code as well:
=over 4
=item TIEHANDLE classname, LIST
The method invoked by the command C<tie *glob, classname>. Associates a new
glob instance with the specified class. C<LIST> would represent additional
arguments (along the lines of L<AnyDBM_File> and compatriots) needed to
complete the association.
=item WRITE this, scalar, length, offset
Write I<length> bytes of data from I<scalar> starting at I<offset>.
=item PRINT this, LIST
Print the values in I<LIST>
=item PRINTF this, format, LIST
Print the values in I<LIST> using I<format>
=item READ this, scalar, length, offset
Read I<length> bytes of data into I<scalar> starting at I<offset>.
=item READLINE this
Read a single line
=item GETC this
Get a single character
=item CLOSE this
Close the handle
=item OPEN this, filename
(Re-)open the handle
=item BINMODE this
Specify content is binary
=item EOF this
Test for end of file.
=item TELL this
Return position in the file.
=item SEEK this, offset, whence
Position the file.
Test for end of file.
=item DESTROY this
Free the storage associated with the tied handle referenced by I<this>.
This is rarely needed, as Perl manages its memory quite well. But the
option exists, should a class wish to perform specific actions upon the
destruction of an instance.
=back
=head1 MORE INFORMATION
The L<perltie> section contains an example of tying handles.
=head1 COMPATIBILITY
This version of Tie::Handle is neither related to nor compatible with
the Tie::Handle (3.0) module available on CPAN. It was due to an
accident that two modules with the same name appeared. The namespace
clash has been cleared in favor of this module that comes with the
perl core in September 2000 and accordingly the version number has
been bumped up to 4.0.
=cut
use Carp;
use warnings::register;
sub new {
my $pkg = shift;
$pkg->TIEHANDLE(@_);
}
# "Grandfather" the new, a la Tie::Hash
sub TIEHANDLE {
my $pkg = shift;
if (defined &{"{$pkg}::new"}) {
warnings::warnif("WARNING: calling ${pkg}->new since ${pkg}->TIEHANDLE i
s missing");
$pkg->new(@_);
}
else {
croak "$pkg doesn't define a TIEHANDLE method";
}
}
sub PRINT {
my $self = shift;
if($self->can('WRITE') != \&WRITE) {
my $buf = join(defined $, ? $, : "",@_);
$buf .= $\ if defined $\;
$self->WRITE($buf,length($buf),0);
}
else {
croak ref($self)," doesn't define a PRINT method";
}
}
sub PRINTF {
my $self = shift;

if($self->can('WRITE') != \&WRITE) {
my $buf = sprintf(shift,@_);
$self->WRITE($buf,length($buf),0);
}
else {
croak ref($self)," doesn't define a PRINTF method";
}
}
sub READLINE {
my $pkg = ref $_[0];
croak "$pkg doesn't define a READLINE method";
}
sub GETC {
my $self = shift;

if($self->can('READ') != \&READ) {
my $buf;
$self->READ($buf,1);
return $buf;
}
else {
croak ref($self)," doesn't define a GETC method";
}
}
sub READ {
my $pkg = ref $_[0];
croak "$pkg doesn't define a READ method";
}
sub WRITE {
my $pkg = ref $_[0];
croak "$pkg doesn't define a WRITE method";
}
sub CLOSE {
my $pkg = ref $_[0];
croak "$pkg doesn't define a CLOSE method";
}
1;
use strict;
package Tie::Memoize;
use Tie::Hash;
our @ISA = 'Tie::ExtraHash';
our $VERSION = '1.1';
our $exists_token = \undef;
sub croak {require Carp; goto &Carp::croak}
# Format: [0: STORAGE, 1: EXISTS-CACHE, 2: FETCH_function;
# 3: EXISTS_function, 4: DATA, 5: EXISTS_different ]
sub FETCH {
my ($h,$key) = ($_[0][0], $_[1]);
my $res = $h->{$key};
return $res if defined $res; # Shortcut if accessible
return $res if exists $h->{$key}; # Accessible, but undef
my $cache = $_[0][1]{$key};
return if defined $cache and not $cache; # Known to not exist
my @res = $_[0][2]->($key, $_[0][4]); # Autoload
$_[0][1]{$key} = 0, return unless @res; # Cache non-existence
delete $_[0][1]{$key}; # Clear existence cache, not needed any more
$_[0][0]{$key} = $res[0]; # Store data and return
}
sub EXISTS {
my ($a,$key) = (shift, shift);
return 1 if exists $a->[0]{$key}; # Have data
my $cache = $a->[1]{$key};
return $cache if defined $cache; # Existence cache
my @res = $a->[3]($key,$a->[4]);
$a->[1]{$key} = 0, return unless @res; # Cache non-existence
# Now we know it exists
return ($a->[1]{$key} = 1) if $a->[5]; # Only existence reported
# Now know the value
$a->[0]{$key} = $res[0]; # Store data
return 1
}
sub TIEHASH {
croak 'syntax: tie %hash, \'Tie::AutoLoad\', \&fetch_subr' if @_ < 2;
croak 'syntax: tie %hash, \'Tie::AutoLoad\', \&fetch_subr, $data, \&exists_sub
r, \%data_cache, \%existence_cache' if @_ > 6;
push @_, undef if @_ < 3; # Data
push @_, $_[1] if @_ < 4; # exists
push @_, {} while @_ < 6; # initial value and caches
bless [ @_[4,5,1,3,2], $_[1] ne $_[3]], $_[0]
}
1;
=head1 NAME
Tie::Memoize - add data to hash when needed
=head1 SYNOPSIS
require Tie::Memoize;
tie %hash, 'Tie::Memoize',
\&fetch, # The rest is optional
$DATA, \&exists,
{%ini_value}, {%ini_existence};
=head1 DESCRIPTION
This package allows a tied hash to autoload its values on the first access,
and to use the cached value on the following accesses.
Only read-accesses (via fetching the value or C<exists>) result in calls to
the functions; the modify-accesses are performed as on a normal hash.
The required arguments during C<tie> are the hash, the package, and
the reference to the C<FETCH>ing function. The optional arguments are
an arbitrary scalar $data, the reference to the C<EXISTS> function,
and initial values of the hash and of the existence cache.
Both the C<FETCH>ing function and the C<EXISTS> functions have the
same signature: the arguments are C<$key, $data>; $data is the same
value as given as argument during tie()ing. Both functions should
return an empty list if the value does not exist. If C<EXISTS>
function is different from the C<FETCH>ing function, it should return
a TRUE value on success. The C<FETCH>ing function should return the
intended value if the key is valid.
=head1 Inheriting from B<Tie::Memoize>
The structure of the tied() data is an array reference with elements
0: cache of known values
1: cache of known existence of keys
2: FETCH function
3: EXISTS function
4: $data
The rest is for internal usage of this package. In particular, if
TIEHASH is overwritten, it should call SUPER::TIEHASH.
=head1 EXAMPLE
sub slurp {
my ($key, $dir) = shift;
open my $h, '<', "$dir/$key" or return;
local $/; <$h> # slurp it all
}
sub exists { my ($key, $dir) = shift; return -f "$dir/$key" }
tie %hash, 'Tie::Memoize', \&slurp, $directory, \&exists,
{ fake_file1 => $content1, fake_file2 => $content2 },
{ pretend_does_not_exists => 0, known_to_exist => 1 };
This example treats the slightly modified contents of $directory as a
hash. The modifications are that the keys F<fake_file1> and
F<fake_file2> fetch values $content1 and $content2, and
F<pretend_does_not_exists> will never be accessed. Additionally, the
existence of F<known_to_exist> is never checked (so if it does not
exists when its content is needed, the user of %hash may be confused).
=head1 BUGS
FIRSTKEY and NEXTKEY methods go through the keys which were already read,
not all the possible keys of the hash.
=head1 AUTHOR
Ilya Zakharevich L<mailto:perl-module-hash-memoize@ilyaz.org>.
=cut
package Tie::RefHash;
use vars qw/$VERSION/;
$VERSION = "1.38";
use 5.005;
=head1 NAME
Tie::RefHash - use references as hash keys
=head1 SYNOPSIS
require 5.004;
use Tie::RefHash;
tie HASHVARIABLE, 'Tie::RefHash', LIST;
tie HASHVARIABLE, 'Tie::RefHash::Nestable', LIST;
untie HASHVARIABLE;
=head1 DESCRIPTION
This module provides the ability to use references as hash keys if you
first C<tie> the hash variable to this module. Normally, only the
keys of the tied hash itself are preserved as references; to use
references as keys in hashes-of-hashes, use Tie::RefHash::Nestable,
included as part of Tie::RefHash.
It is implemented using the standard perl TIEHASH interface. Please
see the C<tie> entry in perlfunc(1) and perltie(1) for more information.
The Nestable version works by looking for hash references being stored
and converting them to tied hashes so that they too can have
references as keys. This will happen without warning whenever you
store a reference to one of your own hashes in the tied hash.
=head1 EXAMPLE
use Tie::RefHash;
tie %h, 'Tie::RefHash';
$a = [];
$b = {};
$c = \*main;
$d = \"gunk";
$e = sub { 'foo' };
%h = ($a => 1, $b => 2, $c => 3, $d => 4, $e => 5);
$a->[0] = 'foo';
$b->{foo} = 'bar';
for (keys %h) {
print ref($_), "\n";
}
tie %h, 'Tie::RefHash::Nestable';
$h{$a}->{$b} = 1;
for (keys %h, keys %{$h{$a}}) {
print ref($_), "\n";
}
=head1 THREAD SUPPORT
L<Tie::RefHash> fully supports threading using the C<CLONE> method.
=head1 STORABLE SUPPORT
L<Storable> hooks are provided for semantically correct serialization and
cloning of tied refhashes.
=head1 RELIC SUPPORT
This version of Tie::RefHash seems to no longer work with 5.004. This has not
been throughly investigated. Patches welcome ;-)
=head1 MAINTAINER
Yuval Kogman E<lt>nothingmuch@woobling.orgE<gt>
=head1 AUTHOR
Gurusamy Sarathy gsar@activestate.com
'Nestable' by Ed Avis ed@membled.com
=head1 SEE ALSO
perl(1), perlfunc(1), perltie(1)
=cut
use Tie::Hash;
use vars '@ISA';
@ISA = qw(Tie::Hash);
use strict;
use Carp qw/croak/;
BEGIN {
local $@;
# determine whether we need to take care of threads
use Config ();
my $usethreads = $Config::Config{usethreads}; # && exists $INC{"threads.pm"}
*_HAS_THREADS = $usethreads ? sub () { 1 } : sub () { 0 };
*_HAS_SCALAR_UTIL = eval { require Scalar::Util; 1 } ? sub () { 1 } : sub () {
0 };
*_HAS_WEAKEN = defined(&Scalar::Util::weaken) ? sub () { 1 } : sub () { 0 };
}
BEGIN {
# create a refaddr function
local $@;
if ( _HAS_SCALAR_UTIL ) {
Scalar::Util->import("refaddr");
} else {
require overload;
*refaddr = sub {
if ( overload::StrVal($_[0]) =~ /\( 0x ([a-zA-Z0-9]+) \)$/x) {
return $1;
} else {
die "couldn't parse StrVal: " . overload::StrVal($_[0]);
}
};
}
}
my (@thread_object_registry, $count); # used by the CLONE method to rehash the k
eys after their refaddr changed
sub TIEHASH {
my $c = shift;
my $s = [];
bless $s, $c;
while (@_) {
$s->STORE(shift, shift);
}
if (_HAS_THREADS ) {
if ( _HAS_WEAKEN ) {
# remember the object so that we can rekey it on CLONE
push @thread_object_registry, $s;
# but make this a weak reference, so that there are no leaks
Scalar::Util::weaken( $thread_object_registry[-1] );
if ( ++$count > 1000 ) {
# this ensures we don't fill up with a huge array dead weakrefs
@thread_object_registry = grep { defined } @thread_object_registry;
$count = 0;
}
} else {
$count++; # used in the warning
}
}
return $s;
}
my $storable_format_version = join("/", __PACKAGE__, "0.01");
sub STORABLE_freeze {
my ( $self, $is_cloning ) = @_;
my ( $refs, $reg ) = @$self;
return ( $storable_format_version, [ values %$refs ], $reg );
}
sub STORABLE_thaw {
my ( $self, $is_cloning, $version, $refs, $reg ) = @_;
croak "incompatible versions of Tie::RefHash between freeze and thaw"
unless $version eq $storable_format_version;
@$self = ( {}, $reg );
$self->_reindex_keys( $refs );
}
sub CLONE {
my $pkg = shift;
if ( $count and not _HAS_WEAKEN ) {
warn "Tie::RefHash is not threadsafe without Scalar::Util::weaken";
}
# when the thread has been cloned all the objects need to be updated.
# dead weakrefs are undefined, so we filter them out
@thread_object_registry = grep { defined && do { $_->_reindex_keys; 1 } } @thr
ead_object_registry;
$count = 0; # we just cleaned up
}
sub _reindex_keys {
my ( $self, $extra_keys ) = @_;
# rehash all the ref keys based on their new StrVal
%{ $self->[0] } = map { refaddr($_->[0]) => $_ } (values(%{ $self->[0] }), @{
$extra_keys || [] });
}
sub FETCH {
my($s, $k) = @_;
if (ref $k) {
my $kstr = refaddr($k);
if (defined $s->[0]{$kstr}) {
$s->[0]{$kstr}[1];
}
else {
undef;
}
}
else {
$s->[1]{$k};
}
}
sub STORE {
my($s, $k, $v) = @_;
if (ref $k) {
$s->[0]{refaddr($k)} = [$k, $v];
}
else {
$s->[1]{$k} = $v;
}
$v;
}
sub DELETE {
my($s, $k) = @_;
(ref $k)
? (delete($s->[0]{refaddr($k)}) || [])->[1]
: delete($s->[1]{$k});
}
sub EXISTS {
my($s, $k) = @_;
(ref $k) ? exists($s->[0]{refaddr($k)}) : exists($s->[1]{$k});
}
sub FIRSTKEY {
my $s = shift;
keys %{$s->[0]}; # reset iterator
keys %{$s->[1]}; # reset iterator
$s->[2] = 0; # flag for iteration, see NEXTKEY
$s->NEXTKEY;
}
sub NEXTKEY {
my $s = shift;
my ($k, $v);
if (!$s->[2]) {
if (($k, $v) = each %{$s->[0]}) {
return $v->[0];
}
else {
$s->[2] = 1;
}
}
return each %{$s->[1]};
}
sub CLEAR {
my $s = shift;
$s->[2] = 0;
%{$s->[0]} = ();
%{$s->[1]} = ();
}
package Tie::RefHash::Nestable;
use vars '@ISA';
@ISA = 'Tie::RefHash';
sub STORE {
my($s, $k, $v) = @_;
if (ref($v) eq 'HASH' and not tied %$v) {
my @elems = %$v;
tie %$v, ref($s), @elems;
}
$s->SUPER::STORE($k, $v);
}
1;
package Tie::StdHandle;
use strict;
use Tie::Handle;
use vars qw(@ISA $VERSION);
@ISA = 'Tie::Handle';
$VERSION = '4.2';
=head1 NAME
Tie::StdHandle - base class definitions for tied handles
=head1 SYNOPSIS
package NewHandle;
require Tie::Handle;
@ISA = qw(Tie::Handle);
sub READ { ... } # Provide a needed method
sub TIEHANDLE { ... } # Overrides inherited method
package main;
tie *FH, 'NewHandle';
=head1 DESCRIPTION
The B<Tie::StdHandle> package provide most methods for file handles described
in L<perltie> (the exceptions are C<UNTIE> and C<DESTROY>). It causes tied
file handles to behave exactly like standard file handles and allow for
selective overwriting of methods.
=cut
sub TIEHANDLE
{
my $class = shift;
my $fh = \do { local *HANDLE};
bless $fh,$class;
$fh->OPEN(@_) if (@_);
return $fh;
}
sub EOF { eof($_[0]) }
sub TELL { tell($_[0]) }
sub FILENO { fileno($_[0]) }
sub SEEK { seek($_[0],$_[1],$_[2]) }
sub CLOSE { close($_[0]) }
sub BINMODE { binmode($_[0]) }
sub OPEN
{
$_[0]->CLOSE if defined($_[0]->FILENO);
@_ == 2 ? open($_[0], $_[1]) : open($_[0], $_[1], $_[2]);
}
sub READ { read($_[0],$_[1],$_[2]) }
sub READLINE { my $fh = $_[0]; <$fh> }
sub GETC { getc($_[0]) }
sub WRITE
{
my $fh = $_[0];
print $fh substr($_[1],0,$_[2])
}
1;
package Tie::Hash::NamedCapture;
our $VERSION = "0.06";
# The real meat implemented in XS in universal.c in the core, but this
# method was left behind because gv.c expects a Purl-Perl method in
# this package when it loads the tie magic for %+ and %-
my ($one, $all) = Tie::Hash::NamedCapture::flags();
sub TIEHASH {
my ($pkg, %arg) = @_;
my $flag = $arg{all} ? $all : $one;
bless \$flag => $pkg;
}
tie %+, __PACKAGE__;
tie %-, __PACKAGE__, all => 1;
1;
__END__
=head1 NAME
Tie::Hash::NamedCapture - Named regexp capture buffers
=head1 SYNOPSIS
tie my %hash, "Tie::Hash::NamedCapture";
# %hash now behaves like %+
tie my %hash, "Tie::Hash::NamedCapture", all => 1;
# %hash now access buffers from regexp in $qr like %-
=head1 DESCRIPTION
This module is used to implement the special hashes C<%+> and C<%->, but it
can be used to tie other variables as you choose.
When the C<all> parameter is provided, then the tied hash elements will be
array refs listing the contents of each capture buffer whose name is the
same as the associated hash key. If none of these buffers were involved in
the match, the contents of that array ref will be as many C<undef> values
as there are capture buffers with that name. In other words, the tied hash
will behave as C<%->.
When the C<all> parameter is omitted or false, then the tied hash elements
will be the contents of the leftmost defined buffer with the name of the
associated hash key. In other words, the tied hash will behave as
C<%+>.
The keys of C<%->-like hashes correspond to all buffer names found in the
regular expression; the keys of C<%+>-like hashes list only the names of
buffers that have captured (and that are thus associated to defined values).
=head1 SEE ALSO
L<perlreapi>, L<re>, L<perlmodlib/Pragmatic Modules>, L<perlvar/"%+">,
L<perlvar/"%-">.
=cut
package Tie::SubstrHash;
our $VERSION = '1.00';
=head1 NAME
Tie::SubstrHash - Fixed-table-size, fixed-key-length hashing
=head1 SYNOPSIS
require Tie::SubstrHash;
tie %myhash, 'Tie::SubstrHash', $key_len, $value_len, $table_size;
=head1 DESCRIPTION
The B<Tie::SubstrHash> package provides a hash-table-like interface to
an array of determinate size, with constant key size and record size.
Upon tying a new hash to this package, the developer must specify the
size of the keys that will be used, the size of the value fields that the
keys will index, and the size of the overall table (in terms of key-value
pairs, not size in hard memory). I<These values will not change for the
duration of the tied hash>. The newly-allocated hash table may now have
data stored and retrieved. Efforts to store more than C<$table_size>
elements will result in a fatal error, as will efforts to store a value
not exactly C<$value_len> characters in length, or reference through a
key not exactly C<$key_len> characters in length. While these constraints
may seem excessive, the result is a hash table using much less internal
memory than an equivalent freely-allocated hash table.
=head1 CAVEATS
Because the current implementation uses the table and key sizes for the
hashing algorithm, there is no means by which to dynamically change the
value of any of the initialization parameters.
The hash does not support exists().
=cut
use Carp;
sub TIEHASH {
my $pack = shift;
my ($klen, $vlen, $tsize) = @_;
my $rlen = 1 + $klen + $vlen;
$tsize = [$tsize,
findgteprime($tsize * 1.1)]; # Allow 10% empty.
local $self = bless ["\0", $klen, $vlen, $tsize, $rlen, 0, -1];
$$self[0] x= $rlen * $tsize->[1];
$self;
}
sub CLEAR {
local($self) = @_;
$$self[0] = "\0" x ($$self[4] * $$self[3]->[1]);
$$self[5] = 0;
$$self[6] = -1;
}
sub FETCH {
local($self,$key) = @_;
local($klen, $vlen, $tsize, $rlen) = @$self[1..4];
&hashkey;
for (;;) {
$offset = $hash * $rlen;
$record = substr($$self[0], $offset, $rlen);
if (ord($record) == 0) {
return undef;
}
elsif (ord($record) == 1) {
}
elsif (substr($record, 1, $klen) eq $key) {
return substr($record, 1+$klen, $vlen);
}
&rehash;
}
}
sub STORE {
local($self,$key,$val) = @_;
local($klen, $vlen, $tsize, $rlen) = @$self[1..4];
croak("Table is full ($tsize->[0] elements)") if $$self[5] > $tsize->[0];
croak(qq/Value "$val" is not $vlen characters long/)
if length($val) != $vlen;
my $writeoffset;
&hashkey;
for (;;) {
$offset = $hash * $rlen;
$record = substr($$self[0], $offset, $rlen);
if (ord($record) == 0) {
$record = "\2". $key . $val;
die "panic" unless length($record) == $rlen;
$writeoffset = $offset unless defined $writeoffset;
substr($$self[0], $writeoffset, $rlen) = $record;
++$$self[5];
return;
}
elsif (ord($record) == 1) {
$writeoffset = $offset unless defined $writeoffset;
}
elsif (substr($record, 1, $klen) eq $key) {
$record = "\2". $key . $val;
die "panic" unless length($record) == $rlen;
substr($$self[0], $offset, $rlen) = $record;
return;
}
&rehash;
}
}
sub DELETE {
local($self,$key) = @_;
local($klen, $vlen, $tsize, $rlen) = @$self[1..4];
&hashkey;
for (;;) {
$offset = $hash * $rlen;
$record = substr($$self[0], $offset, $rlen);
if (ord($record) == 0) {
return undef;
}
elsif (ord($record) == 1) {
}
elsif (substr($record, 1, $klen) eq $key) {
substr($$self[0], $offset, 1) = "\1";
return substr($record, 1+$klen, $vlen);
--$$self[5];
}
&rehash;
}
}
sub FIRSTKEY {
local($self) = @_;
$$self[6] = -1;
&NEXTKEY;
}
sub NEXTKEY {
local($self) = @_;
local($klen, $vlen, $tsize, $rlen, $entries, $iterix) = @$self[1..6];
for (++$iterix; $iterix < $tsize->[1]; ++$iterix) {
next unless substr($$self[0], $iterix * $rlen, 1) eq "\2";
$$self[6] = $iterix;
return substr($$self[0], $iterix * $rlen + 1, $klen);
}
$$self[6] = -1;
undef;
}
sub EXISTS {
croak "Tie::SubstrHash does not support exists()";
}
sub hashkey {
croak(qq/Key "$key" is not $klen characters long/)
if length($key) != $klen;
$hash = 2;
for (unpack('C*', $key)) {
$hash = $hash * 33 + $_;
&_hashwrap if $hash >= 1e13;
}
&_hashwrap if $hash >= $tsize->[1];
$hash = 1 unless $hash;
$hashbase = $hash;
}
sub _hashwrap {
$hash -= int($hash / $tsize->[1]) * $tsize->[1];
}
sub rehash {
$hash += $hashbase;
$hash -= $tsize->[1] if $hash >= $tsize->[1];
}
# using POSIX::ceil() would be too heavy, and not all platforms have it.
sub ceil {
my $num = shift;
$num = int($num + 1) unless $num == int $num;
return $num;
}
# See:
#
# http://www-groups.dcs.st-andrews.ac.uk/~history/HistTopics/Prime_numbers.html
#
sub findgteprime { # find the smallest prime integer greater than or equal to
use integer;
my $num = ceil(shift);
return 2 if $num <= 2;
$num++ unless $num % 2;
my $i;
my $sqrtnum = int sqrt $num;
my $sqrtnumsquared = $sqrtnum * $sqrtnum;
NUM:
for (;; $num += 2) {
if ($sqrtnumsquared < $num) {
$sqrtnum++;
$sqrtnumsquared = $sqrtnum * $sqrtnum;
}
for ($i = 3; $i <= $sqrtnum; $i += 2) {
next NUM unless $num % $i;
}
return $num;
}
}
1;
package Time::Local;
require Exporter;
use Carp;
use Config;
use strict;
use integer;
use vars qw( $VERSION @ISA @EXPORT @EXPORT_OK );
$VERSION = '1.1901';
@ISA = qw( Exporter );
@EXPORT = qw( timegm timelocal );
@EXPORT_OK = qw( timegm_nocheck timelocal_nocheck );
my @MonthDays = ( 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 );
# Determine breakpoint for rolling century
my $ThisYear = ( localtime() )[5];
my $Breakpoint = ( $ThisYear + 50 ) % 100;
my $NextCentury = $ThisYear - $ThisYear % 100;
$NextCentury += 100 if $Breakpoint < 50;
my $Century = $NextCentury - 100;
my $SecOff = 0;
my ( %Options, %Cheat );
use constant SECS_PER_MINUTE => 60;
use constant SECS_PER_HOUR => 3600;
use constant SECS_PER_DAY => 86400;
my $MaxInt;
if ( $^O eq 'MacOS' ) {
# time_t is unsigned...
$MaxInt = ( 1 << ( 8 * $Config{ivsize} ) ) - 1;
}
else {
$MaxInt = ( ( 1 << ( 8 * $Config{ivsize} - 2 ) ) - 1 ) * 2 + 1;
}
my $MaxDay = int( ( $MaxInt - ( SECS_PER_DAY / 2 ) ) / SECS_PER_DAY ) - 1;
# Determine the EPOC day for this machine
my $Epoc = 0;
if ( $^O eq 'vos' ) {
# work around posix-977 -- VOS doesn't handle dates in the range
# 1970-1980.
$Epoc = _daygm( 0, 0, 0, 1, 0, 70, 4, 0 );
}
elsif ( $^O eq 'MacOS' ) {
$MaxDay *=2 if $^O eq 'MacOS'; # time_t unsigned ... quick hack?
# MacOS time() is seconds since 1 Jan 1904, localtime
# so we need to calculate an offset to apply later
$Epoc = 693901;
$SecOff = timelocal( localtime(0)) - timelocal( gmtime(0) ) ;
$Epoc += _daygm( gmtime(0) );
}
else {
$Epoc = _daygm( gmtime(0) );
}
%Cheat = (); # clear the cache as epoc has changed
sub _daygm {
# This is written in such a byzantine way in order to avoid
# lexical variables and sub calls, for speed
return $_[3] + (
$Cheat{ pack( 'ss', @_[ 4, 5 ] ) } ||= do {
my $month = ( $_[4] + 10 ) % 12;
my $year = ( $_[5] + 1900 ) - ( $month / 10 );
( ( 365 * $year )
+ ( $year / 4 )
- ( $year / 100 )
+ ( $year / 400 )
+ ( ( ( $month * 306 ) + 5 ) / 10 )
)
- $Epoc;
}
);
}
sub _timegm {
my $sec =
$SecOff + $_[0] + ( SECS_PER_MINUTE * $_[1] ) + ( SECS_PER_HOUR * $_[2]
);
return $sec + ( SECS_PER_DAY * &_daygm );
}
sub timegm {
my ( $sec, $min, $hour, $mday, $month, $year ) = @_;
if ( $year >= 1000 ) {
$year -= 1900;
}
elsif ( $year < 100 and $year >= 0 ) {
$year += ( $year > $Breakpoint ) ? $Century : $NextCentury;
}
unless ( $Options{no_range_check} ) {
croak "Month '$month' out of range 0..11"
if $month > 11
or $month < 0;
my $md = $MonthDays[$month];
++$md
if $month == 1 && _is_leap_year( $year + 1900 );
croak "Day '$mday' out of range 1..$md" if $mday > $md or $mday < 1;
croak "Hour '$hour' out of range 0..23" if $hour > 23 or $hour < 0;
croak "Minute '$min' out of range 0..59" if $min > 59 or $min < 0;
croak "Second '$sec' out of range 0..59" if $sec > 59 or $sec < 0;
}
my $days = _daygm( undef, undef, undef, $mday, $month, $year );
unless ($Options{no_range_check} or abs($days) < $MaxDay) {
my $msg = '';
$msg .= "Day too big - $days > $MaxDay\n" if $days > $MaxDay;
$year += 1900;
$msg .= "Cannot handle date ($sec, $min, $hour, $mday, $month, $year)";
croak $msg;
}
return $sec
+ $SecOff
+ ( SECS_PER_MINUTE * $min )
+ ( SECS_PER_HOUR * $hour )
+ ( SECS_PER_DAY * $days );
}
sub _is_leap_year {
return 0 if $_[0] % 4;
return 1 if $_[0] % 100;
return 0 if $_[0] % 400;
return 1;
}
sub timegm_nocheck {
local $Options{no_range_check} = 1;
return &timegm;
}
sub timelocal {
my $ref_t = &timegm;
my $loc_for_ref_t = _timegm( localtime($ref_t) );
my $zone_off = $loc_for_ref_t - $ref_t
or return $loc_for_ref_t;
# Adjust for timezone
my $loc_t = $ref_t - $zone_off;
# Are we close to a DST change or are we done
my $dst_off = $ref_t - _timegm( localtime($loc_t) );
# If this evaluates to true, it means that the value in $loc_t is
# the _second_ hour after a DST change where the local time moves
# backward.
if ( ! $dst_off &&
( ( $ref_t - SECS_PER_HOUR ) - _timegm( localtime( $loc_t - SECS_PER_HO
UR ) ) < 0 )
) {
return $loc_t - SECS_PER_HOUR;
}
# Adjust for DST change
$loc_t += $dst_off;
return $loc_t if $dst_off > 0;
# If the original date was a non-extent gap in a forward DST jump,
# we should now have the wrong answer - undo the DST adjustment
my ( $s, $m, $h ) = localtime($loc_t);
$loc_t -= $dst_off if $s != $_[0] || $m != $_[1] || $h != $_[2];
return $loc_t;
}
sub timelocal_nocheck {
local $Options{no_range_check} = 1;
return &timelocal;
}
1;
__END__
=head1 NAME
Time::Local - efficiently compute time from local and GMT time
=head1 SYNOPSIS
$time = timelocal($sec,$min,$hour,$mday,$mon,$year);
$time = timegm($sec,$min,$hour,$mday,$mon,$year);
=head1 DESCRIPTION
This module provides functions that are the inverse of built-in perl
functions C<localtime()> and C<gmtime()>. They accept a date as a
six-element array, and return the corresponding C<time(2)> value in
seconds since the system epoch (Midnight, January 1, 1970 GMT on Unix,
for example). This value can be positive or negative, though POSIX
only requires support for positive values, so dates before the
system's epoch may not work on all operating systems.
It is worth drawing particular attention to the expected ranges for
the values provided. The value for the day of the month is the actual
day (ie 1..31), while the month is the number of months since January
(0..11). This is consistent with the values returned from
C<localtime()> and C<gmtime()>.
=head1 FUNCTIONS
=head2 C<timelocal()> and C<timegm()>
This module exports two functions by default, C<timelocal()> and
C<timegm()>.
The C<timelocal()> and C<timegm()> functions perform range checking on
the input $sec, $min, $hour, $mday, and $mon values by default.
=head2 C<timelocal_nocheck()> and C<timegm_nocheck()>
If you are working with data you know to be valid, you can speed your
code up by using the "nocheck" variants, C<timelocal_nocheck()> and
C<timegm_nocheck()>. These variants must be explicitly imported.
use Time::Local 'timelocal_nocheck';
# The 365th day of 1999
print scalar localtime timelocal_nocheck 0,0,0,365,0,99;
If you supply data which is not valid (month 27, second 1,000) the
results will be unpredictable (so don't do that).
=head2 Year Value Interpretation
Strictly speaking, the year should be specified in a form consistent
with C<localtime()>, i.e. the offset from 1900. In order to make the
interpretation of the year easier for humans, however, who are more
accustomed to seeing years as two-digit or four-digit values, the
following conventions are followed:
=over 4
=item *
Years greater than 999 are interpreted as being the actual year,
rather than the offset from 1900. Thus, 1964 would indicate the year
Martin Luther King won the Nobel prize, not the year 3864.
=item *
Years in the range 100..999 are interpreted as offset from 1900, so
that 112 indicates 2012. This rule also applies to years less than
zero (but see note below regarding date range).
=item *
Years in the range 0..99 are interpreted as shorthand for years in the
rolling "current century," defined as 50 years on either side of the
current year. Thus, today, in 1999, 0 would refer to 2000, and 45 to
2045, but 55 would refer to 1955. Twenty years from now, 55 would
instead refer to 2055. This is messy, but matches the way people
currently think about two digit dates. Whenever possible, use an
absolute four digit year instead.
=back
The scheme above allows interpretation of a wide range of dates,
particularly if 4-digit years are used.
=head2 Limits of time_t
The range of dates that can be actually be handled depends on the size
of C<time_t> (usually a signed integer) on the given
platform. Currently, this is 32 bits for most systems, yielding an
approximate range from Dec 1901 to Jan 2038.
Both C<timelocal()> and C<timegm()> croak if given dates outside the
supported range.
=head2 Ambiguous Local Times (DST)
Because of DST changes, there are many time zones where the same local
time occurs for two different GMT times on the same day. For example,
in the "Europe/Paris" time zone, the local time of 2001-10-28 02:30:00
can represent either 2001-10-28 00:30:00 GMT, B<or> 2001-10-28
01:30:00 GMT.
When given an ambiguous local time, the timelocal() function should
always return the epoch for the I<earlier> of the two possible GMT
times.
=head2 Non-Existent Local Times (DST)
When a DST change causes a locale clock to skip one hour forward,
there will be an hour's worth of local times that don't exist. Again,
for the "Europe/Paris" time zone, the local clock jumped from
2001-03-25 01:59:59 to 2001-03-25 03:00:00.
If the C<timelocal()> function is given a non-existent local time, it
will simply return an epoch value for the time one hour later.
=head2 Negative Epoch Values
Negative epoch (C<time_t>) values are not officially supported by the
POSIX standards, so this module's tests do not test them. On some
systems, they are known not to work. These include MacOS (pre-OSX) and
Win32.
On systems which do support negative epoch values, this module should
be able to cope with dates before the start of the epoch, down the
minimum value of time_t for the system.
=head1 IMPLEMENTATION
These routines are quite efficient and yet are always guaranteed to
agree with C<localtime()> and C<gmtime()>. We manage this by caching
the start times of any months we've seen before. If we know the start
time of the month, we can always calculate any time within the month.
The start times are calculated using a mathematical formula. Unlike
other algorithms that do multiple calls to C<gmtime()>.
The C<timelocal()> function is implemented using the same cache. We
just assume that we're translating a GMT time, and then fudge it when
we're done for the timezone and daylight savings arguments. Note that
the timezone is evaluated for each date because countries occasionally
change their official timezones. Assuming that C<localtime()> corrects
for these changes, this routine will also be correct.
=head1 BUGS
The whole scheme for interpreting two-digit years can be considered a
bug.
=head1 SUPPORT
Support for this module is provided via the datetime@perl.org email
list. See http://lists.perl.org/ for more details.
Please submit bugs to the CPAN RT system at
http://rt.cpan.org/NoAuth/ReportBug.html?Queue=Time-Local or via email
at bug-time-local@rt.cpan.org.
=head1 COPYRIGHT
Copyright (c) 1997-2003 Graham Barr, 2003-2007 David Rolsky. All
rights reserved. This program is free software; you can redistribute
it and/or modify it under the same terms as Perl itself.
The full text of the license can be found in the LICENSE file included
with this module.
=head1 AUTHOR
This module is based on a Perl 4 library, timelocal.pl, that was
included with Perl 4.036, and was most likely written by Tom
Christiansen.
The current version was written by Graham Barr.
It is now being maintained separately from the Perl core by Dave
Rolsky, <autarch@urth.org>.
=cut
package Time::gmtime;
use strict;
use 5.006_001;
use Time::tm;
our(@ISA, @EXPORT, @EXPORT_OK, %EXPORT_TAGS, $VERSION);
BEGIN {
use Exporter ();
@ISA = qw(Exporter Time::tm);
@EXPORT = qw(gmtime gmctime);
@EXPORT_OK = qw(
$tm_sec $tm_min $tm_hour $tm_mday
$tm_mon $tm_year $tm_wday $tm_yday
$tm_isdst
);
%EXPORT_TAGS = ( FIELDS => [ @EXPORT_OK, @EXPORT ] );
$VERSION = 1.03;
}
use vars @EXPORT_OK;
sub populate (@) {
return unless @_;
my $tmob = Time::tm->new();
@$tmob = (
$tm_sec, $tm_min, $tm_hour, $tm_mday,
$tm_mon, $tm_year, $tm_wday, $tm_yday,
$tm_isdst )
= @_;
return $tmob;
}
sub gmtime (;$) { populate CORE::gmtime(@_ ? shift : time)}
sub gmctime (;$) { scalar CORE::gmtime(@_ ? shift : time)}
1;
__END__
=head1 NAME
Time::gmtime - by-name interface to Perl's built-in gmtime() function
=head1 SYNOPSIS
use Time::gmtime;
$gm = gmtime();
printf "The day in Greenwich is %s\n",
(qw(Sun Mon Tue Wed Thu Fri Sat Sun))[ $gm->wday() ];
use Time::gmtime qw(:FIELDS);
gmtime();
printf "The day in Greenwich is %s\n",
(qw(Sun Mon Tue Wed Thu Fri Sat Sun))[ $tm_wday ];
$now = gmctime();
use Time::gmtime;
use File::stat;
$date_string = gmctime(stat($file)->mtime);
=head1 DESCRIPTION
This module's default exports override the core gmtime() function,
replacing it with a version that returns "Time::tm" objects.
This object has methods that return the similarly named structure field
name from the C's tm structure from F<time.h>; namely sec, min, hour,
mday, mon, year, wday, yday, and isdst.
You may also import all the structure fields directly into your namespace
as regular variables using the :FIELDS import tag. (Note that this
still overrides your core functions.) Access these fields as variables
named with a preceding C<tm_> in front their method names. Thus,
C<$tm_obj-E<gt>mday()> corresponds to $tm_mday if you import the fields.
The gmctime() function provides a way of getting at the
scalar sense of the original CORE::gmtime() function.
To access this functionality without the core overrides,
pass the C<use> an empty import list, and then access
function functions with their full qualified names.
On the other hand, the built-ins are still available
via the C<CORE::> pseudo-package.
=head1 NOTE
While this class is currently implemented using the Class::Struct
module to build a struct-like class, you shouldn't rely upon this.
=head1 AUTHOR
Tom Christiansen
package Time::localtime;
use strict;
use 5.006_001;
use Time::tm;
our(@ISA, @EXPORT, @EXPORT_OK, %EXPORT_TAGS, $VERSION);
BEGIN {
use Exporter ();
@ISA = qw(Exporter Time::tm);
@EXPORT = qw(localtime ctime);
@EXPORT_OK = qw(
$tm_sec $tm_min $tm_hour $tm_mday
$tm_mon $tm_year $tm_wday $tm_yday
$tm_isdst
);
%EXPORT_TAGS = ( FIELDS => [ @EXPORT_OK, @EXPORT ] );
$VERSION = 1.02;
}
use vars @EXPORT_OK;
sub populate (@) {
return unless @_;
my $tmob = Time::tm->new();
@$tmob = (
$tm_sec, $tm_min, $tm_hour, $tm_mday,
$tm_mon, $tm_year, $tm_wday, $tm_yday,
$tm_isdst )
= @_;
return $tmob;
}
sub localtime (;$) { populate CORE::localtime(@_ ? shift : time)}
sub ctime (;$) { scalar CORE::localtime(@_ ? shift : time) }
1;
__END__
=head1 NAME
Time::localtime - by-name interface to Perl's built-in localtime() function
=head1 SYNOPSIS
use Time::localtime;
printf "Year is %d\n", localtime->year() + 1900;
$now = ctime();
use Time::localtime;
use File::stat;
$date_string = ctime(stat($file)->mtime);
=head1 DESCRIPTION
This module's default exports override the core localtime() function,
replacing it with a version that returns "Time::tm" objects.
This object has methods that return the similarly named structure field
name from the C's tm structure from F<time.h>; namely sec, min, hour,
mday, mon, year, wday, yday, and isdst.
You may also import all the structure fields directly into your namespace
as regular variables using the :FIELDS import tag. (Note that this still
overrides your core functions.) Access these fields as
variables named with a preceding C<tm_> in front their method names.
Thus, C<$tm_obj-E<gt>mday()> corresponds to $tm_mday if you import
the fields.
The ctime() function provides a way of getting at the
scalar sense of the original CORE::localtime() function.
To access this functionality without the core overrides,
pass the C<use> an empty import list, and then access
function functions with their full qualified names.
On the other hand, the built-ins are still available
via the C<CORE::> pseudo-package.
=head1 NOTE
While this class is currently implemented using the Class::Struct
module to build a struct-like class, you shouldn't rely upon this.
=head1 AUTHOR
Tom Christiansen
package Time::tm;
use strict;
our $VERSION = '1.00';
use Class::Struct qw(struct);
struct('Time::tm' => [
map { $_ => '$' } qw{ sec min hour mday mon year wday yday isdst }
]);
1;
__END__
=head1 NAME
Time::tm - internal object used by Time::gmtime and Time::localtime
=head1 SYNOPSIS
Don't use this module directly.
=head1 DESCRIPTION
This module is used internally as a base class by Time::localtime And
Time::gmtime functions. It creates a Time::tm struct object which is
addressable just like's C's tm structure from F<time.h>; namely with sec,
min, hour, mday, mon, year, wday, yday, and isdst.
This class is an internal interface only.
=head1 AUTHOR
Tom Christiansen
package Unicode::Collate;
BEGIN {
unless ("A" eq pack('U', 0x41)) {
die "Unicode::Collate cannot stringify a Unicode code point\n";
}
}
use 5.006;
use strict;
use warnings;
use Carp;
use File::Spec;
no warnings 'utf8';
our $VERSION = '0.52';
our $PACKAGE = __PACKAGE__;
my @Path = qw(Unicode Collate);
my $KeyFile = "allkeys.txt";
# Perl's boolean
use constant TRUE => 1;
use constant FALSE => "";
use constant NOMATCHPOS => -1;
# A coderef to get combining class imported from Unicode::Normalize
# (i.e. \&Unicode::Normalize::getCombinClass).
# This is also used as a HAS_UNICODE_NORMALIZE flag.
my $CVgetCombinClass;
# Supported Levels
use constant MinLevel => 1;
use constant MaxLevel => 4;
# Minimum weights at level 2 and 3, respectively
use constant Min2Wt => 0x20;
use constant Min3Wt => 0x02;
# Shifted weight at 4th level
use constant Shift4Wt => 0xFFFF;
# A boolean for Variable and 16-bit weights at 4 levels of Collation Element
# PROBLEM: The Default Unicode Collation Element Table
# has weights over 0xFFFF at the 4th level.
# The tie-breaking in the variable weights
# other than "shift" (as well as "shift-trimmed") is unreliable.
use constant VCE_TEMPLATE => 'Cn4';
# A sort key: 16-bit weights
# See also the PROBLEM on VCE_TEMPLATE above.
use constant KEY_TEMPLATE => 'n*';
# Level separator in a sort key:
# i.e. pack(KEY_TEMPLATE, 0)
use constant LEVEL_SEP => "\0\0";
# As Unicode code point separator for hash keys.
# A joined code point string (denoted by JCPS below)
# like "65;768" is used for internal processing
# instead of Perl's Unicode string like "\x41\x{300}",
# as the native code point is different from the Unicode code point
# on EBCDIC platform.
# This character must not be included in any stringified
# representation of an integer.
use constant CODE_SEP => ';';
# boolean values of variable weights
use constant NON_VAR => 0; # Non-Variable character
use constant VAR => 1; # Variable character
# specific code points
use constant Hangul_LBase => 0x1100;
use constant Hangul_LIni => 0x1100;
use constant Hangul_LFin => 0x1159;
use constant Hangul_LFill => 0x115F;
use constant Hangul_VBase => 0x1161;
use constant Hangul_VIni => 0x1160; # from Vowel Filler
use constant Hangul_VFin => 0x11A2;
use constant Hangul_TBase => 0x11A7; # from "no-final" codepoint
use constant Hangul_TIni => 0x11A8;
use constant Hangul_TFin => 0x11F9;
use constant Hangul_TCount => 28;
use constant Hangul_NCount => 588;
use constant Hangul_SBase => 0xAC00;
use constant Hangul_SIni => 0xAC00;
use constant Hangul_SFin => 0xD7A3;
use constant CJK_UidIni => 0x4E00;
use constant CJK_UidFin => 0x9FA5;
use constant CJK_UidF41 => 0x9FBB;
use constant CJK_ExtAIni => 0x3400;
use constant CJK_ExtAFin => 0x4DB5;
use constant CJK_ExtBIni => 0x20000;
use constant CJK_ExtBFin => 0x2A6D6;
use constant BMP_Max => 0xFFFF;
# Logical_Order_Exception in PropList.txt
my $DefaultRearrange = [ 0x0E40..0x0E44, 0x0EC0..0x0EC4 ];
sub UCA_Version { "14" }
sub Base_Unicode_Version { "4.1.0" }
######
sub pack_U {
return pack('U*', @_);
}
sub unpack_U {
return unpack('U*', shift(@_).pack('U*'));
}
######
my (%VariableOK);
@VariableOK{ qw/
blanked non-ignorable shifted shift-trimmed
/ } = (); # keys lowercased
our @ChangeOK = qw/
alternate backwards level normalization rearrange
katakana_before_hiragana upper_before_lower
overrideHangul overrideCJK preprocess UCA_Version
hangul_terminator variable
/;
our @ChangeNG = qw/
entry mapping table maxlength
ignoreChar ignoreName undefChar undefName variableTable
versionTable alternateTable backwardsTable forwardsTable rearrangeTable
derivCode normCode rearrangeHash
backwardsFlag
/;
# The hash key 'ignored' is deleted at v 0.21.
# The hash key 'isShift' is deleted at v 0.23.
# The hash key 'combining' is deleted at v 0.24.
# The hash key 'entries' is deleted at v 0.30.
# The hash key 'L3_ignorable' is deleted at v 0.40.
sub version {
my $self = shift;
return $self->{versionTable} || 'unknown';
}
my (%ChangeOK, %ChangeNG);
@ChangeOK{ @ChangeOK } = ();
@ChangeNG{ @ChangeNG } = ();
sub change {
my $self = shift;
my %hash = @_;
my %old;
if (exists $hash{variable} && exists $hash{alternate}) {
delete $hash{alternate};
}
elsif (!exists $hash{variable} && exists $hash{alternate}) {
$hash{variable} = $hash{alternate};
}
foreach my $k (keys %hash) {
if (exists $ChangeOK{$k}) {
$old{$k} = $self->{$k};
$self->{$k} = $hash{$k};
}
elsif (exists $ChangeNG{$k}) {
croak "change of $k via change() is not allowed!";
}
# else => ignored
}
$self->checkCollator();
return wantarray ? %old : $self;
}
sub _checkLevel {
my $level = shift;
my $key = shift; # 'level' or 'backwards'
MinLevel <= $level or croak sprintf
"Illegal level %d (in value for key '%s') lower than %d.",
$level, $key, MinLevel;
$level <= MaxLevel or croak sprintf
"Unsupported level %d (in value for key '%s') higher than %d.",
$level, $key, MaxLevel;
}
my %DerivCode = (
8 => \&_derivCE_8,
9 => \&_derivCE_9,
11 => \&_derivCE_9, # 11 == 9
14 => \&_derivCE_14,
);
sub checkCollator {
my $self = shift;
_checkLevel($self->{level}, "level");
$self->{derivCode} = $DerivCode{ $self->{UCA_Version} }
or croak "Illegal UCA version (passed $self->{UCA_Version}).";
$self->{variable} ||= $self->{alternate} || $self->{variableTable} ||
$self->{alternateTable} || 'shifted';
$self->{variable} = $self->{alternate} = lc($self->{variable});
exists $VariableOK{ $self->{variable} }
or croak "$PACKAGE unknown variable parameter name: $self->{variable}";
if (! defined $self->{backwards}) {
$self->{backwardsFlag} = 0;
}
elsif (! ref $self->{backwards}) {
_checkLevel($self->{backwards}, "backwards");
$self->{backwardsFlag} = 1 << $self->{backwards};
}
else {
my %level;
$self->{backwardsFlag} = 0;
for my $b (@{ $self->{backwards} }) {
_checkLevel($b, "backwards");
$level{$b} = 1;
}
for my $v (sort keys %level) {
$self->{backwardsFlag} += 1 << $v;
}
}
defined $self->{rearrange} or $self->{rearrange} = [];
ref $self->{rearrange}
or croak "$PACKAGE: list for rearrangement must be store in ARRAYREF";
# keys of $self->{rearrangeHash} are $self->{rearrange}.
$self->{rearrangeHash} = undef;
if (@{ $self->{rearrange} }) {
@{ $self->{rearrangeHash} }{ @{ $self->{rearrange} } } = ();
}
$self->{normCode} = undef;
if (defined $self->{normalization}) {
eval { require Unicode::Normalize };
$@ and croak "Unicode::Normalize is required to normalize strings";
$CVgetCombinClass ||= \&Unicode::Normalize::getCombinClass;
if ($self->{normalization} =~ /^(?:NF)D\z/) { # tweak for default
$self->{normCode} = \&Unicode::Normalize::NFD;
}
elsif ($self->{normalization} ne 'prenormalized') {
my $norm = $self->{normalization};
$self->{normCode} = sub {
Unicode::Normalize::normalize($norm, shift);
};
eval { $self->{normCode}->("") }; # try
$@ and croak "$PACKAGE unknown normalization form name: $norm";
}
}
return;
}
sub new
{
my $class = shift;
my $self = bless { @_ }, $class;
# If undef is passed explicitly, no file is read.
$self->{table} = $KeyFile if ! exists $self->{table};
$self->read_table() if defined $self->{table};
if ($self->{entry}) {
while ($self->{entry} =~ /([^\n]+)/g) {
$self->parseEntry($1);
}
}
$self->{level} ||= MaxLevel;
$self->{UCA_Version} ||= UCA_Version();
$self->{overrideHangul} = FALSE
if ! exists $self->{overrideHangul};
$self->{overrideCJK} = FALSE
if ! exists $self->{overrideCJK};
$self->{normalization} = 'NFD'
if ! exists $self->{normalization};
$self->{rearrange} = $self->{rearrangeTable} ||
($self->{UCA_Version} <= 11 ? $DefaultRearrange : [])
if ! exists $self->{rearrange};
$self->{backwards} = $self->{backwardsTable}
if ! exists $self->{backwards};
$self->checkCollator();
return $self;
}
sub read_table {
my $self = shift;
my($f, $fh);
foreach my $d (@INC) {
$f = File::Spec->catfile($d, @Path, $self->{table});
last if open($fh, $f);
$f = undef;
}
if (!defined $f) {
$f = File::Spec->catfile(@Path, $self->{table});
croak("$PACKAGE: Can't locate $f in \@INC (\@INC contains: @INC)");
}
while (my $line = <$fh>) {
next if $line =~ /^\s*#/;
unless ($line =~ s/^\s*\@//) {
$self->parseEntry($line);
next;
}
# matched ^\s*\@
if ($line =~ /^version\s*(\S*)/) {
$self->{versionTable} ||= $1;
}
elsif ($line =~ /^variable\s+(\S*)/) { # since UTS #10-9
$self->{variableTable} ||= $1;
}
elsif ($line =~ /^alternate\s+(\S*)/) { # till UTS #10-8
$self->{alternateTable} ||= $1;
}
elsif ($line =~ /^backwards\s+(\S*)/) {
push @{ $self->{backwardsTable} }, $1;
}
elsif ($line =~ /^forwards\s+(\S*)/) { # parhaps no use
push @{ $self->{forwardsTable} }, $1;
}
elsif ($line =~ /^rearrange\s+(.*)/) { # (\S*) is NG
push @{ $self->{rearrangeTable} }, _getHexArray($1);
}
}
close $fh;
}
##
## get $line, parse it, and write an entry in $self
##
sub parseEntry
{
my $self = shift;
my $line = shift;
my($name, $entry, @uv, @key);
return if $line !~ /^\s*[0-9A-Fa-f]/;
# removes comment and gets name
$name = $1
if $line =~ s/[#%]\s*(.*)//;
return if defined $self->{undefName} && $name =~ /$self->{undefName}/;
# gets element
my($e, $k) = split /;/, $line;
croak "Wrong Entry: <charList> must be separated by ';' from <collElement>"
if ! $k;
@uv = _getHexArray($e);
return if !@uv;
$entry = join(CODE_SEP, @uv); # in JCPS
if (defined $self->{undefChar} || defined $self->{ignoreChar}) {
my $ele = pack_U(@uv);
# regarded as if it were not entried in the table
return
if defined $self->{undefChar} && $ele =~ /$self->{undefChar}/;
# replaced as completely ignorable
$k = '[.0000.0000.0000.0000]'
if defined $self->{ignoreChar} && $ele =~ /$self->{ignoreChar}/;
}
# replaced as completely ignorable
$k = '[.0000.0000.0000.0000]'
if defined $self->{ignoreName} && $name =~ /$self->{ignoreName}/;
my $is_L3_ignorable = TRUE;
foreach my $arr ($k =~ /\[([^\[\]]+)\]/g) { # SPACEs allowed
my $var = $arr =~ /\*/; # exactly /^\*/ but be lenient.
my @wt = _getHexArray($arr);
push @key, pack(VCE_TEMPLATE, $var, @wt);
$is_L3_ignorable = FALSE
if $wt[0] || $wt[1] || $wt[2];
# Conformance Test for 3.1.1 and 4.0.0 shows Level 3 ignorable
# is completely ignorable.
# For expansion, an entry $is_L3_ignorable
# if and only if "all" CEs are [.0000.0000.0000].
}
$self->{mapping}{$entry} = $is_L3_ignorable ? [] : \@key;
if (@uv > 1) {
(!$self->{maxlength}{$uv[0]} || $self->{maxlength}{$uv[0]} < @uv)
and $self->{maxlength}{$uv[0]} = @uv;
}
}
##
## VCE = _varCE(variable term, VCE)
##
sub _varCE
{
my $vbl = shift;
my $vce = shift;
if ($vbl eq 'non-ignorable') {
return $vce;
}
my ($var, @wt) = unpack VCE_TEMPLATE, $vce;
if ($var) {
return pack(VCE_TEMPLATE, $var, 0, 0, 0,
$vbl eq 'blanked' ? $wt[3] : $wt[0]);
}
elsif ($vbl eq 'blanked') {
return $vce;
}
else {
return pack(VCE_TEMPLATE, $var, @wt[0..2],
$vbl eq 'shifted' && $wt[0]+$wt[1]+$wt[2] ? Shift4Wt : 0);
}
}
sub viewSortKey
{
my $self = shift;
$self->visualizeSortKey($self->getSortKey(@_));
}
sub visualizeSortKey
{
my $self = shift;
my $view = join " ", map sprintf("%04X", $_), unpack(KEY_TEMPLATE, shift);
if ($self->{UCA_Version} <= 8) {
$view =~ s/ ?0000 ?/|/g;
} else {
$view =~ s/\b0000\b/|/g;
}
return "[$view]";
}
##
## arrayref of JCPS = splitEnt(string to be collated)
## arrayref of arrayref[JCPS, ini_pos, fin_pos] = splitEnt(string, true)
##
sub splitEnt
{
my $self = shift;
my $wLen = $_[1];
my $code = $self->{preprocess};
my $norm = $self->{normCode};
my $map = $self->{mapping};
my $max = $self->{maxlength};
my $reH = $self->{rearrangeHash};
my $ver9 = $self->{UCA_Version} >= 9 && $self->{UCA_Version} <= 11;
my ($str, @buf);
if ($wLen) {
$code and croak "Preprocess breaks character positions. "
. "Don't use with index(), match(), etc.";
$norm and croak "Normalization breaks character positions. "
. "Don't use with index(), match(), etc.";
$str = $_[0];
}
else {
$str = $_[0];
$str = &$code($str) if ref $code;
$str = &$norm($str) if ref $norm;
}
# get array of Unicode code point of string.
my @src = unpack_U($str);
# rearrangement:
# Character positions are not kept if rearranged,
# then neglected if $wLen is true.
if ($reH && ! $wLen) {
for (my $i = 0; $i < @src; $i++) {
if (exists $reH->{ $src[$i] } && $i + 1 < @src) {
($src[$i], $src[$i+1]) = ($src[$i+1], $src[$i]);
$i++;
}
}
}
# remove a code point marked as a completely ignorable.
for (my $i = 0; $i < @src; $i++) {
$src[$i] = undef
if _isIllegal($src[$i]) || ($ver9 &&
$map->{ $src[$i] } && @{ $map->{ $src[$i] } } == 0);
}
for (my $i = 0; $i < @src; $i++) {
my $jcps = $src[$i];
# skip removed code point
if (! defined $jcps) {
if ($wLen && @buf) {
$buf[-1][2] = $i + 1;
}
next;
}
my $i_orig = $i;
# find contraction
if ($max->{$jcps}) {
my $temp_jcps = $jcps;
my $jcpsLen = 1;
my $maxLen = $max->{$jcps};
for (my $p = $i + 1; $jcpsLen < $maxLen && $p < @src; $p++) {
next if ! defined $src[$p];
$temp_jcps .= CODE_SEP . $src[$p];
$jcpsLen++;
if ($map->{$temp_jcps}) {
$jcps = $temp_jcps;
$i = $p;
}
}
# not-contiguous contraction with Combining Char (cf. UTS#10, S2.1).
# This process requires Unicode::Normalize.
# If "normalization" is undef, here should be skipped *always*
# (in spite of bool value of $CVgetCombinClass),
# since canonical ordering cannot be expected.
# Blocked combining character should not be contracted.
if ($self->{normalization})
# $self->{normCode} is false in the case of "prenormalized".
{
my $preCC = 0;
my $curCC = 0;
for (my $p = $i + 1; $p < @src; $p++) {
next if ! defined $src[$p];
$curCC = $CVgetCombinClass->($src[$p]);
last unless $curCC;
my $tail = CODE_SEP . $src[$p];
if ($preCC != $curCC && $map->{$jcps.$tail}) {
$jcps .= $tail;
$src[$p] = undef;
} else {
$preCC = $curCC;
}
}
}
}
# skip completely ignorable
if ($map->{$jcps} && @{ $map->{$jcps} } == 0) {
if ($wLen && @buf) {
$buf[-1][2] = $i + 1;
}
next;
}
push @buf, $wLen ? [$jcps, $i_orig, $i + 1] : $jcps;
}
return \@buf;
}
##
## list of VCE = getWt(JCPS)
##
sub getWt
{
my $self = shift;
my $u = shift;
my $vbl = $self->{variable};
my $map = $self->{mapping};
my $der = $self->{derivCode};
return if !defined $u;
return map(_varCE($vbl, $_), @{ $map->{$u} })
if $map->{$u};
# JCPS must not be a contraction, then it's a code point.
if (Hangul_SIni <= $u && $u <= Hangul_SFin) {
my $hang = $self->{overrideHangul};
my @hangulCE;
if ($hang) {
@hangulCE = map(pack(VCE_TEMPLATE, NON_VAR, @$_), &$hang($u));
}
elsif (!defined $hang) {
@hangulCE = $der->($u);
}
else {
my $max = $self->{maxlength};
my @decH = _decompHangul($u);
if (@decH == 2) {
my $contract = join(CODE_SEP, @decH);
@decH = ($contract) if $map->{$contract};
} else { # must be <@decH == 3>
if ($max->{$decH[0]}) {
my $contract = join(CODE_SEP, @decH);
if ($map->{$contract}) {
@decH = ($contract);
} else {
$contract = join(CODE_SEP, @decH[0,1]);
$map->{$contract} and @decH = ($contract, $decH[2]);
}
# even if V's ignorable, LT contraction is not supported.
# If such a situatution were required, NFD should be used.
}
if (@decH == 3 && $max->{$decH[1]}) {
my $contract = join(CODE_SEP, @decH[1,2]);
$map->{$contract} and @decH = ($decH[0], $contract);
}
}
@hangulCE = map({
$map->{$_} ? @{ $map->{$_} } : $der->($_);
} @decH);
}
return map _varCE($vbl, $_), @hangulCE;
}
elsif (_isUIdeo($u, $self->{UCA_Version})) {
my $cjk = $self->{overrideCJK};
return map _varCE($vbl, $_),
$cjk
? map(pack(VCE_TEMPLATE, NON_VAR, @$_), &$cjk($u))
: defined $cjk && $self->{UCA_Version} <= 8 && $u < 0x10000
? _uideoCE_8($u)
: $der->($u);
}
else {
return map _varCE($vbl, $_), $der->($u);
}
}
##
## string sortkey = getSortKey(string arg)
##
sub getSortKey
{
my $self = shift;
my $lev = $self->{level};
my $rEnt = $self->splitEnt(shift); # get an arrayref of JCPS
my $v2i = $self->{UCA_Version} >= 9 &&
$self->{variable} ne 'non-ignorable';
my @buf; # weight arrays
if ($self->{hangul_terminator}) {
my $preHST = '';
foreach my $jcps (@$rEnt) {
# weird things like VL, TL-contraction are not considered!
my $curHST = '';
foreach my $u (split /;/, $jcps) {
$curHST .= getHST($u);
}
if ($preHST && !$curHST || # hangul before non-hangul
$preHST =~ /L\z/ && $curHST =~ /^T/ ||
$preHST =~ /V\z/ && $curHST =~ /^L/ ||
$preHST =~ /T\z/ && $curHST =~ /^[LV]/) {
push @buf, $self->getWtHangulTerm();
}
$preHST = $curHST;
push @buf, $self->getWt($jcps);
}
$preHST # end at hangul
and push @buf, $self->getWtHangulTerm();
}
else {
foreach my $jcps (@$rEnt) {
push @buf, $self->getWt($jcps);
}
}
# make sort key
my @ret = ([],[],[],[]);
my $last_is_variable;
foreach my $vwt (@buf) {
my($var, @wt) = unpack(VCE_TEMPLATE, $vwt);
# "Ignorable (L1, L2) after Variable" since track. v. 9
if ($v2i) {
if ($var) {
$last_is_variable = TRUE;
}
elsif (!$wt[0]) { # ignorable
next if $last_is_variable;
}
else {
$last_is_variable = FALSE;
}
}
foreach my $v (0..$lev-1) {
0 < $wt[$v] and push @{ $ret[$v] }, $wt[$v];
}
}
# modification of tertiary weights
if ($self->{upper_before_lower}) {
foreach my $w (@{ $ret[2] }) {
if (0x8 <= $w && $w <= 0xC) { $w -= 6 } # lower
elsif (0x2 <= $w && $w <= 0x6) { $w += 6 } # upper
elsif ($w == 0x1C) { $w += 1 } # square upper
elsif ($w == 0x1D) { $w -= 1 } # square lower
}
}
if ($self->{katakana_before_hiragana}) {
foreach my $w (@{ $ret[2] }) {
if (0x0F <= $w && $w <= 0x13) { $w -= 2 } # katakana
elsif (0x0D <= $w && $w <= 0x0E) { $w += 5 } # hiragana
}
}
if ($self->{backwardsFlag}) {
for (my $v = MinLevel; $v <= MaxLevel; $v++) {
if ($self->{backwardsFlag} & (1 << $v)) {
@{ $ret[$v-1] } = reverse @{ $ret[$v-1] };
}
}
}
join LEVEL_SEP, map pack(KEY_TEMPLATE, @$_), @ret;
}
##
## int compare = cmp(string a, string b)
##
sub cmp { $_[0]->getSortKey($_[1]) cmp $_[0]->getSortKey($_[2]) }
sub eq { $_[0]->getSortKey($_[1]) eq $_[0]->getSortKey($_[2]) }
sub ne { $_[0]->getSortKey($_[1]) ne $_[0]->getSortKey($_[2]) }
sub lt { $_[0]->getSortKey($_[1]) lt $_[0]->getSortKey($_[2]) }
sub le { $_[0]->getSortKey($_[1]) le $_[0]->getSortKey($_[2]) }
sub gt { $_[0]->getSortKey($_[1]) gt $_[0]->getSortKey($_[2]) }
sub ge { $_[0]->getSortKey($_[1]) ge $_[0]->getSortKey($_[2]) }
##
## list[strings] sorted = sort(list[strings] arg)
##
sub sort {
my $obj = shift;
return
map { $_->[1] }
sort{ $a->[0] cmp $b->[0] }
map [ $obj->getSortKey($_), $_ ], @_;
}
sub _derivCE_14 {
my $u = shift;
my $base =
(CJK_UidIni <= $u && $u <= CJK_UidF41)
? 0xFB40 : # CJK
(CJK_ExtAIni <= $u && $u <= CJK_ExtAFin ||
CJK_ExtBIni <= $u && $u <= CJK_ExtBFin)
? 0xFB80 # CJK ext.
: 0xFBC0; # others
my $aaaa = $base + ($u >> 15);
my $bbbb = ($u & 0x7FFF) | 0x8000;
return
pack(VCE_TEMPLATE, NON_VAR, $aaaa, Min2Wt, Min3Wt, $u),
pack(VCE_TEMPLATE, NON_VAR, $bbbb, 0, 0, $u);
}
sub _derivCE_9 {
my $u = shift;
my $base =
(CJK_UidIni <= $u && $u <= CJK_UidFin)
? 0xFB40 : # CJK
(CJK_ExtAIni <= $u && $u <= CJK_ExtAFin ||
CJK_ExtBIni <= $u && $u <= CJK_ExtBFin)
? 0xFB80 # CJK ext.
: 0xFBC0; # others
my $aaaa = $base + ($u >> 15);
my $bbbb = ($u & 0x7FFF) | 0x8000;
return
pack(VCE_TEMPLATE, NON_VAR, $aaaa, Min2Wt, Min3Wt, $u),
pack(VCE_TEMPLATE, NON_VAR, $bbbb, 0, 0, $u);
}
sub _derivCE_8 {
my $code = shift;
my $aaaa = 0xFF80 + ($code >> 15);
my $bbbb = ($code & 0x7FFF) | 0x8000;
return
pack(VCE_TEMPLATE, NON_VAR, $aaaa, 2, 1, $code),
pack(VCE_TEMPLATE, NON_VAR, $bbbb, 0, 0, $code);
}
sub _uideoCE_8 {
my $u = shift;
return pack(VCE_TEMPLATE, NON_VAR, $u, Min2Wt, Min3Wt, $u);
}
sub _isUIdeo {
my ($u, $uca_vers) = @_;
return(
(CJK_UidIni <= $u &&
($uca_vers >= 14 ? ( $u <= CJK_UidF41) : ($u <= CJK_UidFin)))
||
(CJK_ExtAIni <= $u && $u <= CJK_ExtAFin)
||
(CJK_ExtBIni <= $u && $u <= CJK_ExtBFin)
);
}
sub getWtHangulTerm {
my $self = shift;
return _varCE($self->{variable},
pack(VCE_TEMPLATE, NON_VAR, $self->{hangul_terminator}, 0,0,0));
}
##
## "hhhh hhhh hhhh" to (dddd, dddd, dddd)
##
sub _getHexArray { map hex, $_[0] =~ /([0-9a-fA-F]+)/g }
#
# $code *must* be in Hangul syllable.
# Check it before you enter here.
#
sub _decompHangul {
my $code = shift;
my $si = $code - Hangul_SBase;
my $li = int( $si / Hangul_NCount);
my $vi = int(($si % Hangul_NCount) / Hangul_TCount);
my $ti = $si % Hangul_TCount;
return (
Hangul_LBase + $li,
Hangul_VBase + $vi,
$ti ? (Hangul_TBase + $ti) : (),
);
}
sub _isIllegal {
my $code = shift;
return ! defined $code # removed
|| ($code < 0 || 0x10FFFF < $code) # out of range
|| (($code & 0xFFFE) == 0xFFFE) # ??FFF[EF] (cf. utf8.c)
|| (0xD800 <= $code && $code <= 0xDFFF) # unpaired surrogates
|| (0xFDD0 <= $code && $code <= 0xFDEF) # other non-characters
;
}
# Hangul Syllable Type
sub getHST {
my $u = shift;
return
Hangul_LIni <= $u && $u <= Hangul_LFin || $u == Hangul_LFill ? "L" :
Hangul_VIni <= $u && $u <= Hangul_VFin ? "V" :
Hangul_TIni <= $u && $u <= Hangul_TFin ? "T" :
Hangul_SIni <= $u && $u <= Hangul_SFin ?
($u - Hangul_SBase) % Hangul_TCount ? "LVT" : "LV" : "";
}
##
## bool _nonIgnorAtLevel(arrayref weights, int level)
##
sub _nonIgnorAtLevel($$)
{
my $wt = shift;
return if ! defined $wt;
my $lv = shift;
return grep($wt->[$_-1] != 0, MinLevel..$lv) ? TRUE : FALSE;
}
##
## bool _eqArray(
## arrayref of arrayref[weights] source,
## arrayref of arrayref[weights] substr,
## int level)
## * comparison of graphemes vs graphemes.
## @$source >= @$substr must be true (check it before call this);
##
sub _eqArray($$$)
{
my $source = shift;
my $substr = shift;
my $lev = shift;
for my $g (0..@$substr-1){
# Do the $g'th graphemes have the same number of AV weigths?
return if @{ $source->[$g] } != @{ $substr->[$g] };
for my $w (0..@{ $substr->[$g] }-1) {
for my $v (0..$lev-1) {
return if $source->[$g][$w][$v] != $substr->[$g][$w][$v];
}
}
}
return 1;
}
##
## (int position, int length)
## int position = index(string, substring, position, [undoc'ed grobal])
##
## With "grobal" (only for the list context),
## returns list of arrayref[position, length].
##
sub index
{
my $self = shift;
my $str = shift;
my $len = length($str);
my $subE = $self->splitEnt(shift);
my $pos = @_ ? shift : 0;
$pos = 0 if $pos < 0;
my $grob = shift;
my $lev = $self->{level};
my $v2i = $self->{UCA_Version} >= 9 &&
$self->{variable} ne 'non-ignorable';
if (! @$subE) {
my $temp = $pos <= 0 ? 0 : $len <= $pos ? $len : $pos;
return $grob
? map([$_, 0], $temp..$len)
: wantarray ? ($temp,0) : $temp;
}
$len < $pos
and return wantarray ? () : NOMATCHPOS;
my $strE = $self->splitEnt($pos ? substr($str, $pos) : $str, TRUE);
@$strE
or return wantarray ? () : NOMATCHPOS;
my(@strWt, @iniPos, @finPos, @subWt, @g_ret);
my $last_is_variable;
for my $vwt (map $self->getWt($_), @$subE) {
my($var, @wt) = unpack(VCE_TEMPLATE, $vwt);
my $to_be_pushed = _nonIgnorAtLevel(\@wt,$lev);
# "Ignorable (L1, L2) after Variable" since track. v. 9
if ($v2i) {
if ($var) {
$last_is_variable = TRUE;
}
elsif (!$wt[0]) { # ignorable
$to_be_pushed = FALSE if $last_is_variable;
}
else {
$last_is_variable = FALSE;
}
}
if (@subWt && !$var && !$wt[0]) {
push @{ $subWt[-1] }, \@wt if $to_be_pushed;
} else {
push @subWt, [ \@wt ];
}
}
my $count = 0;
my $end = @$strE - 1;
$last_is_variable = FALSE; # reuse
for (my $i = 0; $i <= $end; ) { # no $i++
my $found_base = 0;
# fetch a grapheme
while ($i <= $end && $found_base == 0) {
for my $vwt ($self->getWt($strE->[$i][0])) {
my($var, @wt) = unpack(VCE_TEMPLATE, $vwt);
my $to_be_pushed = _nonIgnorAtLevel(\@wt,$lev);
# "Ignorable (L1, L2) after Variable" since track. v. 9
if ($v2i) {
if ($var) {
$last_is_variable = TRUE;
}
elsif (!$wt[0]) { # ignorable
$to_be_pushed = FALSE if $last_is_variable;
}
else {
$last_is_variable = FALSE;
}
}
if (@strWt && !$var && !$wt[0]) {
push @{ $strWt[-1] }, \@wt if $to_be_pushed;
$finPos[-1] = $strE->[$i][2];
} elsif ($to_be_pushed) {
push @strWt, [ \@wt ];
push @iniPos, $found_base ? NOMATCHPOS : $strE->[$i][1];
$finPos[-1] = NOMATCHPOS if $found_base;
push @finPos, $strE->[$i][2];
$found_base++;
}
# else ===> no-op
}
$i++;
}
# try to match
while ( @strWt > @subWt || (@strWt == @subWt && $i > $end) ) {
if ($iniPos[0] != NOMATCHPOS &&
$finPos[$#subWt] != NOMATCHPOS &&
_eqArray(\@strWt, \@subWt, $lev)) {
my $temp = $iniPos[0] + $pos;
if ($grob) {
push @g_ret, [$temp, $finPos[$#subWt] - $iniPos[0]];
splice @strWt, 0, $#subWt;
splice @iniPos, 0, $#subWt;
splice @finPos, 0, $#subWt;
}
else {
return wantarray
? ($temp, $finPos[$#subWt] - $iniPos[0])
: $temp;
}
}
shift @strWt;
shift @iniPos;
shift @finPos;
}
}
return $grob
? @g_ret
: wantarray ? () : NOMATCHPOS;
}
##
## scalarref to matching part = match(string, substring)
##
sub match
{
my $self = shift;
if (my($pos,$len) = $self->index($_[0], $_[1])) {
my $temp = substr($_[0], $pos, $len);
return wantarray ? $temp : \$temp;
# An lvalue ref \substr should be avoided,
# since its value is affected by modification of its referent.
}
else {
return;
}
}
##
## arrayref matching parts = gmatch(string, substring)
##
sub gmatch
{
my $self = shift;
my $str = shift;
my $sub = shift;
return map substr($str, $_->[0], $_->[1]),
$self->index($str, $sub, 0, 'g');
}
##
## bool subst'ed = subst(string, substring, replace)
##
sub subst
{
my $self = shift;
my $code = ref $_[2] eq 'CODE' ? $_[2] : FALSE;
if (my($pos,$len) = $self->index($_[0], $_[1])) {
if ($code) {
my $mat = substr($_[0], $pos, $len);
substr($_[0], $pos, $len, $code->($mat));
} else {
substr($_[0], $pos, $len, $_[2]);
}
return TRUE;
}
else {
return FALSE;
}
}
##
## int count = gsubst(string, substring, replace)
##
sub gsubst
{
my $self = shift;
my $code = ref $_[2] eq 'CODE' ? $_[2] : FALSE;
my $cnt = 0;
# Replacement is carried out from the end, then use reverse.
for my $pos_len (reverse $self->index($_[0], $_[1], 0, 'g')) {
if ($code) {
my $mat = substr($_[0], $pos_len->[0], $pos_len->[1]);
substr($_[0], $pos_len->[0], $pos_len->[1], $code->($mat));
} else {
substr($_[0], $pos_len->[0], $pos_len->[1], $_[2]);
}
$cnt++;
}
return $cnt;
}
1;
__END__
=head1 NAME
Unicode::Collate - Unicode Collation Algorithm
=head1 SYNOPSIS
use Unicode::Collate;
#construct
$Collator = Unicode::Collate->new(%tailoring);
#sort
@sorted = $Collator->sort(@not_sorted);
#compare
$result = $Collator->cmp($a, $b); # returns 1, 0, or -1.
# If %tailoring is false (i.e. empty),
# $Collator should do the default collation.
=head1 DESCRIPTION
This module is an implementation of Unicode Technical Standard #10
(a.k.a. UTS #10) - Unicode Collation Algorithm (a.k.a. UCA).
=head2 Constructor and Tailoring
The C<new> method returns a collator object.
$Collator = Unicode::Collate->new(
UCA_Version => $UCA_Version,
alternate => $alternate, # deprecated: use of 'variable' is recommended.
backwards => $levelNumber, # or \@levelNumbers
entry => $element,
hangul_terminator => $term_primary_weight,
ignoreName => qr/$ignoreName/,
ignoreChar => qr/$ignoreChar/,
katakana_before_hiragana => $bool,
level => $collationLevel,
normalization => $normalization_form,
overrideCJK => \&overrideCJK,
overrideHangul => \&overrideHangul,
preprocess => \&preprocess,
rearrange => \@charList,
table => $filename,
undefName => qr/$undefName/,
undefChar => qr/$undefChar/,
upper_before_lower => $bool,
variable => $variable,
);
=over 4
=item UCA_Version
If the tracking version number of UCA is given,
behavior of that tracking version is emulated on collating.
If omitted, the return value of C<UCA_Version()> is used.
C<UCA_Version()> should return the latest tracking version supported.
The supported tracking version: 8, 9, 11, or 14.
UCA Unicode Standard DUCET (@version)
---------------------------------------------------
8 3.1 3.0.1 (3.0.1d9)
9 3.1 with Corrigendum 3 3.1.1 (3.1.1)
11 4.0 4.0.0 (4.0.0)
14 4.1.0 4.1.0 (4.1.0)
Note: Recent UTS #10 renames "Tracking Version" to "Revision."
=item alternate
-- see 3.2.2 Alternate Weighting, version 8 of UTS #10
For backward compatibility, C<alternate> (old name) can be used
as an alias for C<variable>.
=item backwards
-- see 3.1.2 French Accents, UTS #10.
backwards => $levelNumber or \@levelNumbers
Weights in reverse order; ex. level 2 (diacritic ordering) in French.
If omitted, forwards at all the levels.
=item entry
-- see 3.1 Linguistic Features; 3.2.1 File Format, UTS #10.
If the same character (or a sequence of characters) exists
in the collation element table through C<table>,
mapping to collation elements is overrided.
If it does not exist, the mapping is defined additionally.
entry => <<'ENTRY', # for DUCET v4.0.0 (allkeys-4.0.0.txt)
0063 0068 ; [.0E6A.0020.0002.0063] # ch
0043 0068 ; [.0E6A.0020.0007.0043] # Ch
0043 0048 ; [.0E6A.0020.0008.0043] # CH
006C 006C ; [.0F4C.0020.0002.006C] # ll
004C 006C ; [.0F4C.0020.0007.004C] # Ll
004C 004C ; [.0F4C.0020.0008.004C] # LL
00F1 ; [.0F7B.0020.0002.00F1] # n-tilde
006E 0303 ; [.0F7B.0020.0002.00F1] # n-tilde
00D1 ; [.0F7B.0020.0008.00D1] # N-tilde
004E 0303 ; [.0F7B.0020.0008.00D1] # N-tilde
ENTRY
entry => <<'ENTRY', # for DUCET v4.0.0 (allkeys-4.0.0.txt)
00E6 ; [.0E33.0020.0002.00E6][.0E8B.0020.0002.00E6] # ae ligature as <a><e>
00C6 ; [.0E33.0020.0008.00C6][.0E8B.0020.0008.00C6] # AE ligature as <A><E>
ENTRY
B<NOTE:> The code point in the UCA file format (before C<';'>)
B<must> be a Unicode code point (defined as hexadecimal),
but not a native code point.
So C<0063> must always denote C<U+0063>,
but not a character of C<"\x63">.
Weighting may vary depending on collation element table.
So ensure the weights defined in C<entry> will be consistent with
those in the collation element table loaded via C<table>.
In DUCET v4.0.0, primary weight of C<C> is C<0E60>
and that of C<D> is C<0E6D>. So setting primary weight of C<CH> to C<0E6A>
(as a value between C<0E60> and C<0E6D>)
makes ordering as C<C E<lt> CH E<lt> D>.
Exactly speaking DUCET already has some characters between C<C> and C<D>:
C<small capital C> (C<U+1D04>) with primary weight C<0E64>,
C<c-hook/C-hook> (C<U+0188/U+0187>) with C<0E65>,
and C<c-curl> (C<U+0255>) with C<0E69>.
Then primary weight C<0E6A> for C<CH> makes C<CH>
ordered between C<c-curl> and C<D>.
=item hangul_terminator
-- see 7.1.4 Trailing Weights, UTS #10.
If a true value is given (non-zero but should be positive),
it will be added as a terminator primary weight to the end of
every standard Hangul syllable. Secondary and any higher weights
for terminator are set to zero.
If the value is false or C<hangul_terminator> key does not exist,
insertion of terminator weights will not be performed.
Boundaries of Hangul syllables are determined
according to conjoining Jamo behavior in F<the Unicode Standard>
and F<HangulSyllableType.txt>.
B<Implementation Note:>
(1) For expansion mapping (Unicode character mapped
to a sequence of collation elements), a terminator will not be added
between collation elements, even if Hangul syllable boundary exists there.
Addition of terminator is restricted to the next position
to the last collation element.
(2) Non-conjoining Hangul letters
(Compatibility Jamo, halfwidth Jamo, and enclosed letters) are not
automatically terminated with a terminator primary weight.
These characters may need terminator included in a collation element
table beforehand.
=item ignoreChar
=item ignoreName
-- see 3.2.2 Variable Weighting, UTS #10.
Makes the entry in the table completely ignorable;
i.e. as if the weights were zero at all level.
Through C<ignoreChar>, any character matching C<qr/$ignoreChar/>
will be ignored. Through C<ignoreName>, any character whose name
(given in the C<table> file as a comment) matches C<qr/$ignoreName/>
will be ignored.
E.g. when 'a' and 'e' are ignorable,
'element' is equal to 'lament' (or 'lmnt').
=item katakana_before_hiragana
-- see 7.3.1 Tertiary Weight Table, UTS #10.
By default, hiragana is before katakana.
If the parameter is made true, this is reversed.
B<NOTE>: This parameter simplemindedly assumes that any hiragana/katakana
distinctions must occur in level 3, and their weights at level 3 must be
same as those mentioned in 7.3.1, UTS #10.
If you define your collation elements which violate this requirement,
this parameter does not work validly.
=item level
-- see 4.3 Form Sort Key, UTS #10.
Set the maximum level.
Any higher levels than the specified one are ignored.
Level 1: alphabetic ordering
Level 2: diacritic ordering
Level 3: case ordering
Level 4: tie-breaking (e.g. in the case when variable is 'shifted')
ex.level => 2,
If omitted, the maximum is the 4th.
=item normalization
-- see 4.1 Normalize, UTS #10.
If specified, strings are normalized before preparation of sort keys
(the normalization is executed after preprocess).
A form name C<Unicode::Normalize::normalize()> accepts will be applied
as C<$normalization_form>.
Acceptable names include C<'NFD'>, C<'NFC'>, C<'NFKD'>, and C<'NFKC'>.
See C<Unicode::Normalize::normalize()> for detail.
If omitted, C<'NFD'> is used.
C<normalization> is performed after C<preprocess> (if defined).
Furthermore, special values, C<undef> and C<"prenormalized">, can be used,
though they are not concerned with C<Unicode::Normalize::normalize()>.
If C<undef> (not a string C<"undef">) is passed explicitly
as the value for this key,
any normalization is not carried out (this may make tailoring easier
if any normalization is not desired). Under C<(normalization =E<gt> undef)>,
only contiguous contractions are resolved;
e.g. even if C<A-ring> (and C<A-ring-cedilla>) is ordered after C<Z>,
C<A-cedilla-ring> would be primary equal to C<A>.
In this point,
C<(normalization =E<gt> undef, preprocess =E<gt> sub { NFD(shift) })>
B<is not> equivalent to C<(normalization =E<gt> 'NFD')>.
In the case of C<(normalization =E<gt> "prenormalized")>,
any normalization is not performed, but
non-contiguous contractions with combining characters are performed.
Therefore
C<(normalization =E<gt> 'prenormalized', preprocess =E<gt> sub { NFD(shift) })>
B<is> equivalent to C<(normalization =E<gt> 'NFD')>.
If source strings are finely prenormalized,
C<(normalization =E<gt> 'prenormalized')> may save time for normalization.
Except C<(normalization =E<gt> undef)>,
B<Unicode::Normalize> is required (see also B<CAVEAT>).
=item overrideCJK
-- see 7.1 Derived Collation Elements, UTS #10.
By default, CJK Unified Ideographs are ordered in Unicode codepoint order
but C<CJK Unified Ideographs> (if C<UCA_Version> is 8 to 11, its range is
C<U+4E00..U+9FA5>; if C<UCA_Version> is 14, its range is C<U+4E00..U+9FBB>)
are lesser than C<CJK Unified Ideographs Extension> (its range is
C<U+3400..U+4DB5> and C<U+20000..U+2A6D6>).
Through C<overrideCJK>, ordering of CJK Unified Ideographs can be overrided.
ex. CJK Unified Ideographs in the JIS code point order.
overrideCJK => sub {
my $u = shift; # get a Unicode codepoint
my $b = pack('n', $u); # to UTF-16BE
my $s = your_unicode_to_sjis_converter($b); # convert
my $n = unpack('n', $s); # convert sjis to short
[ $n, 0x20, 0x2, $u ]; # return the collation element
},
ex. ignores all CJK Unified Ideographs.
overrideCJK => sub {()}, # CODEREF returning empty list
# where ->eq("Pe\x{4E00}rl", "Perl") is true
# as U+4E00 is a CJK Unified Ideograph and to be ignorable.
If C<undef> is passed explicitly as the value for this key,
weights for CJK Unified Ideographs are treated as undefined.
But assignment of weight for CJK Unified Ideographs
in table or C<entry> is still valid.
=item overrideHangul
-- see 7.1 Derived Collation Elements, UTS #10.
By default, Hangul Syllables are decomposed into Hangul Jamo,
even if C<(normalization =E<gt> undef)>.
But the mapping of Hangul Syllables may be overrided.
This parameter works like C<overrideCJK>, so see there for examples.
If you want to override the mapping of Hangul Syllables,
NFD, NFKD, and FCD are not appropriate,
since they will decompose Hangul Syllables before overriding.
If C<undef> is passed explicitly as the value for this key,
weight for Hangul Syllables is treated as undefined
without decomposition into Hangul Jamo.
But definition of weight for Hangul Syllables
in table or C<entry> is still valid.
=item preprocess
-- see 5.1 Preprocessing, UTS #10.
If specified, the coderef is used to preprocess
before the formation of sort keys.
ex. dropping English articles, such as "a" or "the".
Then, "the pen" is before "a pencil".
preprocess => sub {
my $str = shift;
$str =~ s/\b(?:an?|the)\s+//gi;
return $str;
},
C<preprocess> is performed before C<normalization> (if defined).
=item rearrange
-- see 3.1.3 Rearrangement, UTS #10.
Characters that are not coded in logical order and to be rearranged.
If C<UCA_Version> is equal to or lesser than 11, default is:
rearrange => [ 0x0E40..0x0E44, 0x0EC0..0x0EC4 ],
If you want to disallow any rearrangement, pass C<undef> or C<[]>
(a reference to empty list) as the value for this key.
If C<UCA_Version> is equal to 14, default is C<[]> (i.e. no rearrangement).
B<According to the version 9 of UCA, this parameter shall not be used;
but it is not warned at present.>
=item table
-- see 3.2 Default Unicode Collation Element Table, UTS #10.
You can use another collation element table if desired.
The table file should locate in the F<Unicode/Collate> directory
on C<@INC>. Say, if the filename is F<Foo.txt>,
the table file is searched as F<Unicode/Collate/Foo.txt> in C<@INC>.
By default, F<allkeys.txt> (as the filename of DUCET) is used.
If you will prepare your own table file, any name other than F<allkeys.txt>
may be better to avoid namespace conflict.
If C<undef> is passed explicitly as the value for this key,
no file is read (but you can define collation elements via C<entry>).
A typical way to define a collation element table
without any file of table:
$onlyABC = Unicode::Collate->new(
table => undef,
entry => << 'ENTRIES',
0061 ; [.0101.0020.0002.0061] # LATIN SMALL LETTER A
0041 ; [.0101.0020.0008.0041] # LATIN CAPITAL LETTER A
0062 ; [.0102.0020.0002.0062] # LATIN SMALL LETTER B
0042 ; [.0102.0020.0008.0042] # LATIN CAPITAL LETTER B
0063 ; [.0103.0020.0002.0063] # LATIN SMALL LETTER C
0043 ; [.0103.0020.0008.0043] # LATIN CAPITAL LETTER C
ENTRIES
);
If C<ignoreName> or C<undefName> is used, character names should be
specified as a comment (following C<#>) on each line.
=item undefChar
=item undefName
-- see 6.3.4 Reducing the Repertoire, UTS #10.
Undefines the collation element as if it were unassigned in the table.
This reduces the size of the table.
If an unassigned character appears in the string to be collated,
the sort key is made from its codepoint
as a single-character collation element,
as it is greater than any other assigned collation elements
(in the codepoint order among the unassigned characters).
But, it'd be better to ignore characters
unfamiliar to you and maybe never used.
Through C<undefChar>, any character matching C<qr/$undefChar/>
will be undefined. Through C<undefName>, any character whose name
(given in the C<table> file as a comment) matches C<qr/$undefName/>
will be undefined.
ex. Collation weights for beyond-BMP characters are not stored in object:
undefChar => qr/[^\0-\x{fffd}]/,
=item upper_before_lower
-- see 6.6 Case Comparisons, UTS #10.
By default, lowercase is before uppercase.
If the parameter is made true, this is reversed.
B<NOTE>: This parameter simplemindedly assumes that any lowercase/uppercase
distinctions must occur in level 3, and their weights at level 3 must be
same as those mentioned in 7.3.1, UTS #10.
If you define your collation elements which differs from this requirement,
this parameter doesn't work validly.
=item variable
-- see 3.2.2 Variable Weighting, UTS #10.
This key allows to variable weighting for variable collation elements,
which are marked with an ASTERISK in the table
(NOTE: Many punction marks and symbols are variable in F<allkeys.txt>).
variable => 'blanked', 'non-ignorable', 'shifted', or 'shift-trimmed'.
These names are case-insensitive.
By default (if specification is omitted), 'shifted' is adopted.
'Blanked' Variable elements are made ignorable at levels 1 through 3;
considered at the 4th level.
'Non-Ignorable' Variable elements are not reset to ignorable.
'Shifted' Variable elements are made ignorable at levels 1 through 3
their level 4 weight is replaced by the old level 1 weight.
Level 4 weight for Non-Variable elements is 0xFFFF.
'Shift-Trimmed' Same as 'shifted', but all FFFF's at the 4th level
are trimmed.
=back
=head2 Methods for Collation
=over 4
=item C<@sorted = $Collator-E<gt>sort(@not_sorted)>
Sorts a list of strings.
=item C<$result = $Collator-E<gt>cmp($a, $b)>
Returns 1 (when C<$a> is greater than C<$b>)
or 0 (when C<$a> is equal to C<$b>)
or -1 (when C<$a> is lesser than C<$b>).
=item C<$result = $Collator-E<gt>eq($a, $b)>
=item C<$result = $Collator-E<gt>ne($a, $b)>
=item C<$result = $Collator-E<gt>lt($a, $b)>
=item C<$result = $Collator-E<gt>le($a, $b)>
=item C<$result = $Collator-E<gt>gt($a, $b)>
=item C<$result = $Collator-E<gt>ge($a, $b)>
They works like the same name operators as theirs.
eq : whether $a is equal to $b.
ne : whether $a is not equal to $b.
lt : whether $a is lesser than $b.
le : whether $a is lesser than $b or equal to $b.
gt : whether $a is greater than $b.
ge : whether $a is greater than $b or equal to $b.
=item C<$sortKey = $Collator-E<gt>getSortKey($string)>
-- see 4.3 Form Sort Key, UTS #10.
Returns a sort key.
You compare the sort keys using a binary comparison
and get the result of the comparison of the strings using UCA.
$Collator->getSortKey($a) cmp $Collator->getSortKey($b)
is equivalent to
$Collator->cmp($a, $b)
=item C<$sortKeyForm = $Collator-E<gt>viewSortKey($string)>
Converts a sorting key into its representation form.
If C<UCA_Version> is 8, the output is slightly different.
use Unicode::Collate;
my $c = Unicode::Collate->new();
print $c->viewSortKey("Perl"),"\n";
# output:
# [0B67 0A65 0B7F 0B03 | 0020 0020 0020 0020 | 0008 0002 0002 0002 | FFFF FFF
F FFFF FFFF]
# Level 1 Level 2 Level 3 Level 4
=back
=head2 Methods for Searching
B<DISCLAIMER:> If C<preprocess> or C<normalization> parameter is true
for C<$Collator>, calling these methods (C<index>, C<match>, C<gmatch>,
C<subst>, C<gsubst>) is croaked,
as the position and the length might differ
from those on the specified string.
(And C<rearrange> and C<hangul_terminator> parameters are neglected.)
The C<match>, C<gmatch>, C<subst>, C<gsubst> methods work
like C<m//>, C<m//g>, C<s///>, C<s///g>, respectively,
but they are not aware of any pattern, but only a literal substring.
=over 4
=item C<$position = $Collator-E<gt>index($string, $substring[, $position])>
=item C<($position, $length) = $Collator-E<gt>index($string, $substring[, $posit
ion])>
If C<$substring> matches a part of C<$string>, returns
the position of the first occurrence of the matching part in scalar context;
in list context, returns a two-element list of
the position and the length of the matching part.
If C<$substring> does not match any part of C<$string>,
returns C<-1> in scalar context and
an empty list in list context.
e.g. you say
my $Collator = Unicode::Collate->new( normalization => undef, level => 1 );
# (normalization => undef) is REQUIRED.
my $str = "Ich mu studieren Perl.";
my $sub = "MSS";
my $match;
if (my($pos,$len) = $Collator->index($str, $sub)) {
$match = substr($str, $pos, $len);
}
and get C<"mu"> in C<$match> since C<"mu">
is primary equal to C<"MSS">.
=item C<$match_ref = $Collator-E<gt>match($string, $substring)>
=item C<($match) = $Collator-E<gt>match($string, $substring)>
If C<$substring> matches a part of C<$string>, in scalar context, returns
B<a reference to> the first occurrence of the matching part
(C<$match_ref> is always true if matches,
since every reference is B<true>);
in list context, returns the first occurrence of the matching part.
If C<$substring> does not match any part of C<$string>,
returns C<undef> in scalar context and
an empty list in list context.
e.g.
if ($match_ref = $Collator->match($str, $sub)) { # scalar context
print "matches [$$match_ref].\n";
} else {
print "doesn't match.\n";
}
or
if (($match) = $Collator->match($str, $sub)) { # list context
print "matches [$match].\n";
} else {
print "doesn't match.\n";
}
=item C<@match = $Collator-E<gt>gmatch($string, $substring)>
If C<$substring> matches a part of C<$string>, returns
all the matching parts (or matching count in scalar context).
If C<$substring> does not match any part of C<$string>,
returns an empty list.
=item C<$count = $Collator-E<gt>subst($string, $substring, $replacement)>
If C<$substring> matches a part of C<$string>,
the first occurrence of the matching part is replaced by C<$replacement>
(C<$string> is modified) and return C<$count> (always equals to C<1>).
C<$replacement> can be a C<CODEREF>,
taking the matching part as an argument,
and returning a string to replace the matching part
(a bit similar to C<s/(..)/$coderef-E<gt>($1)/e>).
=item C<$count = $Collator-E<gt>gsubst($string, $substring, $replacement)>
If C<$substring> matches a part of C<$string>,
all the occurrences of the matching part is replaced by C<$replacement>
(C<$string> is modified) and return C<$count>.
C<$replacement> can be a C<CODEREF>,
taking the matching part as an argument,
and returning a string to replace the matching part
(a bit similar to C<s/(..)/$coderef-E<gt>($1)/eg>).
e.g.
my $Collator = Unicode::Collate->new( normalization => undef, level => 1 );
# (normalization => undef) is REQUIRED.
my $str = "Camel donkey zebra came\x{301}l CAMEL horse cAm\0E\0L...";
$Collator->gsubst($str, "camel", sub { "<b>$_[0]</b>" });
# now $str is "<b>Camel</b> donkey zebra <b>came\x{301}l</b> <b>CAMEL</b> hors
e <b>cAm\0E\0L</b>...";
# i.e., all the camels are made bold-faced.
=back
=head2 Other Methods
=over 4
=item C<%old_tailoring = $Collator-E<gt>change(%new_tailoring)>
Change the value of specified keys and returns the changed part.
$Collator = Unicode::Collate->new(level => 4);
$Collator->eq("perl", "PERL"); # false
%old = $Collator->change(level => 2); # returns (level => 4).
$Collator->eq("perl", "PERL"); # true
$Collator->change(%old); # returns (level => 2).
$Collator->eq("perl", "PERL"); # false
Not all C<(key,value)>s are allowed to be changed.
See also C<@Unicode::Collate::ChangeOK> and C<@Unicode::Collate::ChangeNG>.
In the scalar context, returns the modified collator
(but it is B<not> a clone from the original).
$Collator->change(level => 2)->eq("perl", "PERL"); # true
$Collator->eq("perl", "PERL"); # true; now max level is 2nd.
$Collator->change(level => 4)->eq("perl", "PERL"); # false
=item C<$version = $Collator-E<gt>version()>
Returns the version number (a string) of the Unicode Standard
which the C<table> file used by the collator object is based on.
If the table does not include a version line (starting with C<@version>),
returns C<"unknown">.
=item C<UCA_Version()>
Returns the tracking version number of UTS #10 this module consults.
=item C<Base_Unicode_Version()>
Returns the version number of UTS #10 this module consults.
=back
=head1 EXPORT
No method will be exported.
=head1 INSTALL
Though this module can be used without any C<table> file,
to use this module easily, it is recommended to install a table file
in the UCA format, by copying it under the directory
<a place in @INC>/Unicode/Collate.
The most preferable one is "The Default Unicode Collation Element Table"
(aka DUCET), available from the Unicode Consortium's website:
http://www.unicode.org/Public/UCA/
http://www.unicode.org/Public/UCA/latest/allkeys.txt (latest version)
If DUCET is not installed, it is recommended to copy the file
from http://www.unicode.org/Public/UCA/latest/allkeys.txt
to <a place in @INC>/Unicode/Collate/allkeys.txt
manually.
=head1 CAVEATS
=over 4
=item Normalization
Use of the C<normalization> parameter requires the B<Unicode::Normalize>
module (see L<Unicode::Normalize>).
If you need not it (say, in the case when you need not
handle any combining characters),
assign C<normalization =E<gt> undef> explicitly.
-- see 6.5 Avoiding Normalization, UTS #10.
=item Conformance Test
The Conformance Test for the UCA is available
under L<http://www.unicode.org/Public/UCA/>.
For F<CollationTest_SHIFTED.txt>,
a collator via C<Unicode::Collate-E<gt>new( )> should be used;
for F<CollationTest_NON_IGNORABLE.txt>, a collator via
C<Unicode::Collate-E<gt>new(variable =E<gt> "non-ignorable", level =E<gt> 3)>.
B<Unicode::Normalize is required to try The Conformance Test.>
=back
=head1 AUTHOR, COPYRIGHT AND LICENSE
The Unicode::Collate module for perl was written by SADAHIRO Tomoyuki,
<SADAHIRO@cpan.org>. This module is Copyright(C) 2001-2005,
SADAHIRO Tomoyuki. Japan. All rights reserved.
This module is free software; you can redistribute it and/or
modify it under the same terms as Perl itself.
The file Unicode/Collate/allkeys.txt was copied directly
from L<http://www.unicode.org/Public/UCA/4.1.0/allkeys.txt>.
This file is Copyright (c) 1991-2005 Unicode, Inc. All rights reserved.
Distributed under the Terms of Use in L<http://www.unicode.org/copyright.html>.
=head1 SEE ALSO
=over 4
=item Unicode Collation Algorithm - UTS #10
L<http://www.unicode.org/reports/tr10/>
=item The Default Unicode Collation Element Table (DUCET)
L<http://www.unicode.org/Public/UCA/latest/allkeys.txt>
=item The conformance test for the UCA
L<http://www.unicode.org/Public/UCA/latest/CollationTest.html>
L<http://www.unicode.org/Public/UCA/latest/CollationTest.zip>
=item Hangul Syllable Type
L<http://www.unicode.org/Public/UNIDATA/HangulSyllableType.txt>
=item Unicode Normalization Forms - UAX #15
L<http://www.unicode.org/reports/tr15/>
=back
=cut
package Unicode::UCD;
use strict;
use warnings;
our $VERSION = '0.27';
use Storable qw(dclone);
require Exporter;
our @ISA = qw(Exporter);
our @EXPORT_OK = qw(charinfo
charblock charscript
charblocks charscripts
charinrange
general_categories bidi_types
compexcl
casefold casespec
namedseq);
use Carp;
=head1 NAME
Unicode::UCD - Unicode character database
=head1 SYNOPSIS
use Unicode::UCD 'charinfo';
my $charinfo = charinfo($codepoint);
use Unicode::UCD 'casefold';
my $casefold = casefold(0xFB00);
use Unicode::UCD 'casespec';
my $casespec = casespec(0xFB00);
use Unicode::UCD 'charblock';
my $charblock = charblock($codepoint);
use Unicode::UCD 'charscript';
my $charscript = charscript($codepoint);
use Unicode::UCD 'charblocks';
my $charblocks = charblocks();
use Unicode::UCD 'charscripts';
my $charscripts = charscripts();
use Unicode::UCD qw(charscript charinrange);
my $range = charscript($script);
print "looks like $script\n" if charinrange($range, $codepoint);
use Unicode::UCD qw(general_categories bidi_types);
my $categories = general_categories();
my $types = bidi_types();
use Unicode::UCD 'compexcl';
my $compexcl = compexcl($codepoint);
use Unicode::UCD 'namedseq';
my $namedseq = namedseq($named_sequence_name);
my $unicode_version = Unicode::UCD::UnicodeVersion();
=head1 DESCRIPTION
The Unicode::UCD module offers a series of functions that
provide a simple interface to the Unicode
Character Database.
=head2 code point argument
Some of the functions are called with a I<code point argument>, which is either
a decimal or a hexadecimal scalar designating a Unicode code point, or C<U+>
followed by hexadecimals designating a Unicode code point. In other words, if
you want a code point to be interpreted as a hexadecimal number, you must
prefix it with either C<0x> or C<U+>, because a string like e.g. C<123> will be
interpreted as a decimal code point. Also note that Unicode is B<not> limited
to 16 bits (the number of Unicode code points is open-ended, in theory
unlimited): you may have more than 4 hexdigits.
=cut
my $UNICODEFH;
my $BLOCKSFH;
my $SCRIPTSFH;
my $VERSIONFH;
my $COMPEXCLFH;
my $CASEFOLDFH;
my $CASESPECFH;
my $NAMEDSEQFH;
sub openunicode {
my ($rfh, @path) = @_;
my $f;
unless (defined $$rfh) {
for my $d (@INC) {
use File::Spec;
$f = File::Spec->catfile($d, "unicore", @path);
last if open($$rfh, $f);
undef $f;
}
croak __PACKAGE__, ": failed to find ",
File::Spec->catfile(@path), " in @INC"
unless defined $f;
}
return $f;
}
=head2 B<charinfo()>
use Unicode::UCD 'charinfo';
my $charinfo = charinfo(0x41);
This returns information about the input L</code point argument>
as a reference to a hash of fields as defined by the Unicode
standard. If the L</code point argument> is not assigned in the standard
(i.e., has the general category C<Cn> meaning C<Unassigned>)
or is a non-character (meaning it is guaranteed to never be assigned in
the standard),
B<undef> is returned.
Fields that aren't applicable to the particular code point argument exist in the
returned hash, and are empty.
The keys in the hash with the meanings of their values are:
=over
=item B<code>
the input L</code point argument> expressed in hexadecimal, with leading zeros
added if necessary to make it contain at least four hexdigits
=item B<name>
name of I<code>, all IN UPPER CASE.
Some control-type code points do not have names.
This field will be empty for C<Surrogate> and C<Private Use> code points,
and for the others without a name,
it will contain a description enclosed in angle brackets, like
C<E<lt>controlE<gt>>.
=item B<category>
The short name of the general category of I<code>.
This will match one of the keys in the hash returned by L</general_categories()>
.
=item B<combining>
the combining class number for I<code> used in the Canonical Ordering Algorithm.
For Unicode 5.1, this is described in Section 3.11 C<Canonical Ordering Behavior
>
available at
L<http://www.unicode.org/versions/Unicode5.1.0/>
=item B<bidi>
bidirectional type of I<code>.
This will match one of the keys in the hash returned by L</bidi_types()>.
=item B<decomposition>
is empty if I<code> has no decomposition; or is one or more codes
(separated by spaces) that taken in order represent a decomposition for
I<code>. Each has at least four hexdigits.
The codes may be preceded by a word enclosed in angle brackets then a space,
like C<E<lt>compatE<gt> >, giving the type of decomposition
=item B<decimal>
if I<code> is a decimal digit this is its integer numeric value
=item B<digit>
if I<code> represents a whole number, this is its integer numeric value
=item B<numeric>
if I<code> represents a whole or rational number, this