2781 lines
96 KiB
Perl
2781 lines
96 KiB
Perl
#!./perl -wT
|
|
|
|
# This tests plain 'use locale' and adorned 'use locale ":not_characters"'
|
|
# Because these pragmas are compile time, and I (khw) am trying to test
|
|
# without using 'eval' as much as possible, which might cloud the issue, the
|
|
# crucial parts of the code are duplicated in a block for each pragma.
|
|
|
|
# Unfortunately, many systems have defective locale definitions. This test
|
|
# file looks for both perl bugs and bugs in the system's locale definitions.
|
|
# It can be difficult to tease apart which is which. For the latter, there
|
|
# are tests that are based on the POSIX standard. A character isn't supposed
|
|
# to be both a space and graphic, for example. Another example is if a
|
|
# character is the uppercase of another, that other should be the lowercase of
|
|
# the first. Including tests for these allows you to test for defective
|
|
# locales, as described in perllocale. The way this file distinguishes
|
|
# between defective locales, and perl bugs is to see what percentage of
|
|
# locales fail a given test. If it's a lot, then it's more likely to be a
|
|
# perl bug; only a few, those particular locales are likely defective. In
|
|
# that case the failing tests are marked TODO. (They should be reported to
|
|
# the vendor, however; but it's not perl's problem.) In some cases, this
|
|
# script has caused tickets to be filed against perl which turn out to be the
|
|
# platform's bug, but a higher percentage of locales are failing than the
|
|
# built-in cut-off point. For those platforms, code has been added to
|
|
# increase the cut-off, so those platforms don't trigger failing test reports.
|
|
# Ideally, the platforms would get fixed and that code would be changed to
|
|
# only kick-in when run on versions that are earlier than the fixed one. But,
|
|
# this rarely happens in practice.
|
|
|
|
# To make a TODO test, add the string 'TODO' to its %test_names value
|
|
|
|
my $is_ebcdic = ord("A") == 193;
|
|
my $os = lc $^O;
|
|
|
|
no warnings 'locale'; # We test even weird locales; and do some scary things
|
|
# in ok locales
|
|
|
|
binmode STDOUT, ':utf8';
|
|
binmode STDERR, ':utf8';
|
|
|
|
BEGIN {
|
|
chdir 't' if -d 't';
|
|
@INC = '../lib';
|
|
unshift @INC, '.';
|
|
require './loc_tools.pl';
|
|
unless (locales_enabled('LC_CTYPE')) {
|
|
print "1..0\n";
|
|
exit;
|
|
}
|
|
$| = 1;
|
|
require Config; import Config;
|
|
}
|
|
|
|
use strict;
|
|
use feature 'fc';
|
|
|
|
# =1 adds debugging output; =2 increases the verbosity somewhat
|
|
our $debug = $ENV{PERL_DEBUG_FULL_TEST} // 0;
|
|
|
|
# Certain tests have been shown to be problematical for a few locales. Don't
|
|
# fail them unless at least this percentage of the tested locales fail.
|
|
# On AIX machines, many locales call a no-break space a graphic.
|
|
# (There aren't 1000 locales currently in existence, so 99.9 works)
|
|
# EBCDIC os390 has more locales fail than normal, because it has locales that
|
|
# move various critical characters like '['.
|
|
my $acceptable_failure_percentage = ($os =~ / ^ ( aix ) $ /x)
|
|
? 99.9
|
|
: ($os =~ / ^ ( os390 ) $ /x)
|
|
? 10
|
|
: 5;
|
|
|
|
# The list of test numbers of the problematic tests.
|
|
my %problematical_tests;
|
|
|
|
# If any %problematical_tests fails in one of these locales, it is
|
|
# considered a TODO.
|
|
my %known_bad_locales = (
|
|
irix => qr/ ^ (?: cs | hu | sk ) $/x,
|
|
darwin => qr/ ^ lt_LT.ISO8859 /ix,
|
|
os390 => qr/ ^ italian /ix,
|
|
netbsd => qr/\bISO8859-2\b/i,
|
|
|
|
# This may be the same bug as the cygwin below; it's
|
|
# generating malformed UTF-8 on the radix being
|
|
# mulit-byte
|
|
solaris => qr/ ^ ( ar_ | pa_ ) /x,
|
|
);
|
|
|
|
# cygwin isn't returning proper radix length in this locale, but supposedly to
|
|
# be fixed in later versions.
|
|
if ($os eq 'cygwin' && version->new(($Config{osvers} =~ /^(\d+(?:\.\d+)+)/)[0]) le v2.4.1) {
|
|
$known_bad_locales{'cygwin'} = qr/ ^ ps_AF /ix;
|
|
}
|
|
|
|
use Dumpvalue;
|
|
|
|
my $dumper = Dumpvalue->new(
|
|
tick => qq{"},
|
|
quoteHighBit => 0,
|
|
unctrl => "quote"
|
|
);
|
|
|
|
sub debug {
|
|
return unless $debug;
|
|
my($mess) = join "", '# ', @_;
|
|
chomp $mess;
|
|
print STDERR $dumper->stringify($mess,1), "\n";
|
|
}
|
|
|
|
sub note {
|
|
local $debug = 1;
|
|
debug @_;
|
|
}
|
|
|
|
sub debug_more {
|
|
return unless $debug > 1;
|
|
return debug(@_);
|
|
}
|
|
|
|
sub debugf {
|
|
printf STDERR @_ if $debug;
|
|
}
|
|
|
|
$a = 'abc %9';
|
|
|
|
my $test_num = 0;
|
|
|
|
sub ok {
|
|
my ($result, $message) = @_;
|
|
$message = "" unless defined $message;
|
|
|
|
print 'not ' unless ($result);
|
|
print "ok " . ++$test_num;
|
|
print " $message";
|
|
print "\n";
|
|
return ($result) ? 1 : 0;
|
|
}
|
|
|
|
sub skip {
|
|
return ok 1, "skipped: " . shift;
|
|
}
|
|
|
|
sub fail {
|
|
return ok 0, shift;
|
|
}
|
|
|
|
# First we'll do a lot of taint checking for locales.
|
|
# This is the easiest to test, actually, as any locale,
|
|
# even the default locale will taint under 'use locale'.
|
|
|
|
sub is_tainted { # hello, camel two.
|
|
no warnings 'uninitialized' ;
|
|
my $dummy;
|
|
local $@;
|
|
not eval { $dummy = join("", @_), kill 0; 1 }
|
|
}
|
|
|
|
sub check_taint ($;$) {
|
|
my $message_tail = $_[1] // "";
|
|
|
|
# Extra blanks are so aligns with taint_not output
|
|
$message_tail = ": $message_tail" if $message_tail;
|
|
ok is_tainted($_[0]), "verify that is tainted$message_tail";
|
|
}
|
|
|
|
sub check_taint_not ($;$) {
|
|
my $message_tail = $_[1] // "";
|
|
$message_tail = ": $message_tail" if $message_tail;
|
|
ok((not is_tainted($_[0])), "verify that isn't tainted$message_tail");
|
|
}
|
|
|
|
foreach my $category (qw(ALL COLLATE CTYPE MESSAGES MONETARY NUMERIC TIME)) {
|
|
my $short_result = locales_enabled($category);
|
|
ok ($short_result == 0 || $short_result == 1,
|
|
"Verify locales_enabled('$category') returns 0 or 1");
|
|
debug("locales_enabled('$category') returned '$short_result'");
|
|
my $long_result = locales_enabled("LC_$category");
|
|
if (! ok ($long_result == $short_result,
|
|
" and locales_enabled('LC_$category') returns "
|
|
. "the same value")
|
|
) {
|
|
debug("locales_enabled('LC_$category') returned $long_result");
|
|
}
|
|
}
|
|
|
|
"\tb\t" =~ /^m?(\s)(.*)\1$/;
|
|
check_taint_not $&, "not tainted outside 'use locale'";
|
|
;
|
|
|
|
use locale; # engage locale and therefore locale taint.
|
|
|
|
# BE SURE TO COPY ANYTHING YOU ADD to these tests to the block below for
|
|
# ":notcharacters"
|
|
|
|
check_taint_not $a, '$a';
|
|
|
|
check_taint uc($a), 'uc($a)';
|
|
check_taint "\U$a", '"\U$a"';
|
|
check_taint ucfirst($a), 'ucfirst($a)';
|
|
check_taint "\u$a", '"\u$a"';
|
|
check_taint lc($a), 'lc($a)';
|
|
check_taint fc($a), 'fc($a)';
|
|
check_taint "\L$a", '"\L$a"';
|
|
check_taint "\F$a", '"\F$a"';
|
|
check_taint lcfirst($a), 'lcfirst($a)';
|
|
check_taint "\l$a", '"\l$a"';
|
|
|
|
check_taint_not sprintf('%e', 123.456), "sprintf('%e', 123.456)";
|
|
check_taint_not sprintf('%f', 123.456), "sprintf('%f', 123.456)";
|
|
check_taint_not sprintf('%g', 123.456), "sprintf('%g', 123.456)";
|
|
check_taint_not sprintf('%d', 123.456), "sprintf('%d', 123.456)";
|
|
check_taint_not sprintf('%x', 123.456), "sprintf('%x', 123.456)";
|
|
|
|
$_ = $a; # untaint $_
|
|
|
|
$_ = uc($a); # taint $_
|
|
|
|
check_taint $_, '$_ = uc($a)';
|
|
|
|
/(\w)/; # taint $&, $`, $', $+, $1.
|
|
check_taint $&, "\$& from /(\\w)/";
|
|
check_taint $`, "\t\$`";
|
|
check_taint $', "\t\$'";
|
|
check_taint $+, "\t\$+";
|
|
check_taint $1, "\t\$1";
|
|
check_taint_not $2, "\t\$2";
|
|
|
|
/(.)/; # untaint $&, $`, $', $+, $1.
|
|
check_taint_not $&, "\$& from /(.)/";
|
|
check_taint_not $`, "\t\$`";
|
|
check_taint_not $', "\t\$'";
|
|
check_taint_not $+, "\t\$+";
|
|
check_taint_not $1, "\t\$1";
|
|
check_taint_not $2, "\t\$2";
|
|
|
|
/(\W)/; # taint $&, $`, $', $+, $1.
|
|
check_taint $&, "\$& from /(\\W)/";
|
|
check_taint $`, "\t\$`";
|
|
check_taint $', "\t\$'";
|
|
check_taint $+, "\t\$+";
|
|
check_taint $1, "\t\$1";
|
|
check_taint_not $2, "\t\$2";
|
|
|
|
/(.)/; # untaint $&, $`, $', $+, $1.
|
|
check_taint_not $&, "\$& from /(.)/";
|
|
check_taint_not $`, "\t\$`";
|
|
check_taint_not $', "\t\$'";
|
|
check_taint_not $+, "\t\$+";
|
|
check_taint_not $1, "\t\$1";
|
|
check_taint_not $2, "\t\$2";
|
|
|
|
/(\s)/; # taint $&, $`, $', $+, $1.
|
|
check_taint $&, "\$& from /(\\s)/";
|
|
check_taint $`, "\t\$`";
|
|
check_taint $', "\t\$'";
|
|
check_taint $+, "\t\$+";
|
|
check_taint $1, "\t\$1";
|
|
check_taint_not $2, "\t\$2";
|
|
|
|
/(.)/; # untaint $&, $`, $', $+, $1.
|
|
check_taint_not $&, "\$& from /(.)/";
|
|
|
|
/(\S)/; # taint $&, $`, $', $+, $1.
|
|
check_taint $&, "\$& from /(\\S)/";
|
|
check_taint $`, "\t\$`";
|
|
check_taint $', "\t\$'";
|
|
check_taint $+, "\t\$+";
|
|
check_taint $1, "\t\$1";
|
|
check_taint_not $2, "\t\$2";
|
|
|
|
/(.)/; # untaint $&, $`, $', $+, $1.
|
|
check_taint_not $&, "\$& from /(.)/";
|
|
|
|
"0" =~ /(\d)/; # taint $&, $`, $', $+, $1.
|
|
check_taint $&, "\$& from /(\\d)/";
|
|
check_taint $`, "\t\$`";
|
|
check_taint $', "\t\$'";
|
|
check_taint $+, "\t\$+";
|
|
check_taint $1, "\t\$1";
|
|
check_taint_not $2, "\t\$2";
|
|
|
|
/(.)/; # untaint $&, $`, $', $+, $1.
|
|
check_taint_not $&, "\$& from /(.)/";
|
|
|
|
/(\D)/; # taint $&, $`, $', $+, $1.
|
|
check_taint $&, "\$& from /(\\D)/";
|
|
check_taint $`, "\t\$`";
|
|
check_taint $', "\t\$'";
|
|
check_taint $+, "\t\$+";
|
|
check_taint $1, "\t\$1";
|
|
check_taint_not $2, "\t\$2";
|
|
|
|
/(.)/; # untaint $&, $`, $', $+, $1.
|
|
check_taint_not $&, "\$& from /(.)/";
|
|
|
|
/([[:alnum:]])/; # taint $&, $`, $', $+, $1.
|
|
check_taint $&, "\$& from /([[:alnum:]])/";
|
|
check_taint $`, "\t\$`";
|
|
check_taint $', "\t\$'";
|
|
check_taint $+, "\t\$+";
|
|
check_taint $1, "\t\$1";
|
|
check_taint_not $2, "\t\$2";
|
|
|
|
/(.)/; # untaint $&, $`, $', $+, $1.
|
|
check_taint_not $&, "\$& from /(.)/";
|
|
|
|
/([[:^alnum:]])/; # taint $&, $`, $', $+, $1.
|
|
check_taint $&, "\$& from /([[:^alnum:]])/";
|
|
check_taint $`, "\t\$`";
|
|
check_taint $', "\t\$'";
|
|
check_taint $+, "\t\$+";
|
|
check_taint $1, "\t\$1";
|
|
check_taint_not $2, "\t\$2";
|
|
|
|
"a" =~ /(a)|(\w)/; # taint $&, $`, $', $+, $1.
|
|
check_taint $&, "\$& from /(a)|(\\w)/";
|
|
check_taint $`, "\t\$`";
|
|
check_taint $', "\t\$'";
|
|
check_taint $+, "\t\$+";
|
|
check_taint $1, "\t\$1";
|
|
ok($1 eq 'a', ("\t" x 5) . "\$1 is 'a'");
|
|
ok(! defined $2, ("\t" x 5) . "\$2 is undefined");
|
|
check_taint_not $2, "\t\$2";
|
|
check_taint_not $3, "\t\$3";
|
|
|
|
/(.)/; # untaint $&, $`, $', $+, $1.
|
|
check_taint_not $&, "\$& from /(.)/";
|
|
|
|
"\N{CYRILLIC SMALL LETTER A}" =~ /(\N{CYRILLIC CAPITAL LETTER A})/i; # no tainting because no locale dependence
|
|
check_taint_not $&, "\$& from /(\\N{CYRILLIC CAPITAL LETTER A})/i";
|
|
check_taint_not $`, "\t\$`";
|
|
check_taint_not $', "\t\$'";
|
|
check_taint_not $+, "\t\$+";
|
|
check_taint_not $1, "\t\$1";
|
|
ok($1 eq "\N{CYRILLIC SMALL LETTER A}", ("\t" x 4) . "\t\$1 is 'small cyrillic a'");
|
|
check_taint_not $2, "\t\$2";
|
|
|
|
/(.)/; # untaint $&, $`, $', $+, $1.
|
|
check_taint_not $&, "\$& from /./";
|
|
|
|
"(\N{KELVIN SIGN})" =~ /(\N{KELVIN SIGN})/i; # taints because depends on locale
|
|
check_taint $&, "\$& from /(\\N{KELVIN SIGN})/i";
|
|
check_taint $`, "\t\$`";
|
|
check_taint $', "\t\$'";
|
|
check_taint $+, "\t\$+";
|
|
check_taint $1, "\t\$1";
|
|
check_taint_not $2, "\t\$2";
|
|
|
|
/(.)/; # untaint $&, $`, $', $+, $1.
|
|
check_taint_not $&, "\$& from /(.)/";
|
|
|
|
"a:" =~ /(.)\b(.)/; # taint $&, $`, $', $+, $1.
|
|
check_taint $&, "\$& from /(.)\\b(.)/";
|
|
check_taint $`, "\t\$`";
|
|
check_taint $', "\t\$'";
|
|
check_taint $+, "\t\$+";
|
|
check_taint $1, "\t\$1";
|
|
check_taint $2, "\t\$2";
|
|
check_taint_not $3, "\t\$3";
|
|
|
|
/(.)/; # untaint $&, $`, $', $+, $1.
|
|
check_taint_not $&, "\$& from /./";
|
|
|
|
"aa" =~ /(.)\B(.)/; # taint $&, $`, $', $+, $1.
|
|
check_taint $&, "\$& from /(.)\\B(.)/";
|
|
check_taint $`, "\t\$`";
|
|
check_taint $', "\t\$'";
|
|
check_taint $+, "\t\$+";
|
|
check_taint $1, "\t\$1";
|
|
check_taint $2, "\t\$2";
|
|
check_taint_not $3, "\t\$3";
|
|
|
|
/(.)/; # untaint $&, $`, $', $+, $1.
|
|
check_taint_not $&, "\$& from /./";
|
|
|
|
"aaa" =~ /(.).(\1)/i; # notaint because not locale dependent
|
|
check_taint_not $&, "\$ & from /(.).(\\1)/";
|
|
check_taint_not $`, "\t\$`";
|
|
check_taint_not $', "\t\$'";
|
|
check_taint_not $+, "\t\$+";
|
|
check_taint_not $1, "\t\$1";
|
|
check_taint_not $2, "\t\$2";
|
|
check_taint_not $3, "\t\$3";
|
|
|
|
/(.)/; # untaint $&, $`, $', $+, $1.
|
|
check_taint_not $&, "\$ & from /./";
|
|
|
|
$_ = $a; # untaint $_
|
|
|
|
check_taint_not $_, 'untainting $_ works';
|
|
|
|
/(b)/; # this must not taint
|
|
check_taint_not $&, "\$ & from /(b)/";
|
|
check_taint_not $`, "\t\$`";
|
|
check_taint_not $', "\t\$'";
|
|
check_taint_not $+, "\t\$+";
|
|
check_taint_not $1, "\t\$1";
|
|
check_taint_not $2, "\t\$2";
|
|
|
|
$_ = $a; # untaint $_
|
|
|
|
check_taint_not $_, 'untainting $_ works';
|
|
|
|
$b = uc($a); # taint $b
|
|
s/(.+)/$b/; # this must taint only the $_
|
|
|
|
check_taint $_, '$_ (wasn\'t tainted) from s/(.+)/$b/ where $b is tainted';
|
|
check_taint_not $&, "\t\$&";
|
|
check_taint_not $`, "\t\$`";
|
|
check_taint_not $', "\t\$'";
|
|
check_taint_not $+, "\t\$+";
|
|
check_taint_not $1, "\t\$1";
|
|
check_taint_not $2, "\t\$2";
|
|
|
|
$_ = $a; # untaint $_
|
|
|
|
s/(.+)/b/; # this must not taint
|
|
check_taint_not $_, '$_ (wasn\'t tainted) from s/(.+)/b/';
|
|
check_taint_not $&, "\t\$&";
|
|
check_taint_not $`, "\t\$`";
|
|
check_taint_not $', "\t\$'";
|
|
check_taint_not $+, "\t\$+";
|
|
check_taint_not $1, "\t\$1";
|
|
check_taint_not $2, "\t\$2";
|
|
|
|
$b = $a; # untaint $b
|
|
|
|
($b = $a) =~ s/\w/$&/;
|
|
check_taint $b, '$b from ($b = $a) =~ s/\w/$&/'; # $b should be tainted.
|
|
check_taint_not $a, '$a from ($b = $a) =~ s/\w/$&/'; # $a should be not.
|
|
|
|
$_ = $a; # untaint $_
|
|
|
|
s/(\w)/\l$1/; # this must taint
|
|
check_taint $_, '$_ (wasn\'t tainted) from s/(\w)/\l$1/,'; # this must taint
|
|
check_taint $&, "\t\$&";
|
|
check_taint $`, "\t\$`";
|
|
check_taint $', "\t\$'";
|
|
check_taint $+, "\t\$+";
|
|
check_taint $1, "\t\$1";
|
|
check_taint_not $2, "\t\$2";
|
|
|
|
$_ = $a; # untaint $_
|
|
|
|
s/(\w)/\L$1/; # this must taint
|
|
check_taint $_, '$_ (wasn\'t tainted) from s/(\w)/\L$1/,';
|
|
check_taint $&, "\t\$&";
|
|
check_taint $`, "\t\$`";
|
|
check_taint $', "\t\$'";
|
|
check_taint $+, "\t\$+";
|
|
check_taint $1, "\t\$1";
|
|
check_taint_not $2, "\t\$2";
|
|
|
|
$_ = $a; # untaint $_
|
|
|
|
s/(\w)/\u$1/; # this must taint
|
|
check_taint $_, '$_ (wasn\'t tainted) from s/(\w)/\u$1/';
|
|
check_taint $&, "\t\$&";
|
|
check_taint $`, "\t\$`";
|
|
check_taint $', "\t\$'";
|
|
check_taint $+, "\t\$+";
|
|
check_taint $1, "\t\$1";
|
|
check_taint_not $2, "\t\$2";
|
|
|
|
$_ = $a; # untaint $_
|
|
|
|
s/(\w)/\U$1/; # this must taint
|
|
check_taint $_, '$_ (wasn\'t tainted) from s/(\w)/\U$1/';
|
|
check_taint $&, "\t\$&";
|
|
check_taint $`, "\t\$`";
|
|
check_taint $', "\t\$'";
|
|
check_taint $+, "\t\$+";
|
|
check_taint $1, "\t\$1";
|
|
check_taint_not $2, "\t\$2";
|
|
|
|
# After all this tainting $a should be cool.
|
|
|
|
check_taint_not $a, '$a still not tainted';
|
|
|
|
"a" =~ /([a-z])/;
|
|
check_taint_not $1, '"a" =~ /([a-z])/';
|
|
"foo.bar_baz" =~ /^(.*)[._](.*?)$/; # Bug 120675
|
|
check_taint_not $1, '"foo.bar_baz" =~ /^(.*)[._](.*?)$/';
|
|
|
|
# BE SURE TO COPY ANYTHING YOU ADD to the block below
|
|
|
|
{ # This is just the previous tests copied here with a different
|
|
# compile-time pragma.
|
|
|
|
use locale ':not_characters'; # engage restricted locale with different
|
|
# tainting rules
|
|
check_taint_not $a, '$a';
|
|
|
|
check_taint_not uc($a), 'uc($a)';
|
|
check_taint_not "\U$a", '"\U$a"';
|
|
check_taint_not ucfirst($a), 'ucfirst($a)';
|
|
check_taint_not "\u$a", '"\u$a"';
|
|
check_taint_not lc($a), 'lc($a)';
|
|
check_taint_not fc($a), 'fc($a)';
|
|
check_taint_not "\L$a", '"\L$a"';
|
|
check_taint_not "\F$a", '"\F$a"';
|
|
check_taint_not lcfirst($a), 'lcfirst($a)';
|
|
check_taint_not "\l$a", '"\l$a"';
|
|
|
|
check_taint_not sprintf('%e', 123.456), "sprintf('%e', 123.456)";
|
|
check_taint_not sprintf('%f', 123.456), "sprintf('%f', 123.456)";
|
|
check_taint_not sprintf('%g', 123.456), "sprintf('%g', 123.456)";
|
|
check_taint_not sprintf('%d', 123.456), "sprintf('%d', 123.456)";
|
|
check_taint_not sprintf('%x', 123.456), "sprintf('%x', 123.456)";
|
|
|
|
$_ = $a; # untaint $_
|
|
|
|
$_ = uc($a);
|
|
|
|
check_taint_not $_, '$_ = uc($a)';
|
|
|
|
/(\w)/;
|
|
check_taint_not $&, "\$& from /(\\w)/";
|
|
check_taint_not $`, "\t\$`";
|
|
check_taint_not $', "\t\$'";
|
|
check_taint_not $+, "\t\$+";
|
|
check_taint_not $1, "\t\$1";
|
|
check_taint_not $2, "\t\$2";
|
|
|
|
/(.)/; # untaint $&, $`, $', $+, $1.
|
|
check_taint_not $&, "\$& from /(.)/";
|
|
check_taint_not $`, "\t\$`";
|
|
check_taint_not $', "\t\$'";
|
|
check_taint_not $+, "\t\$+";
|
|
check_taint_not $1, "\t\$1";
|
|
check_taint_not $2, "\t\$2";
|
|
|
|
/(\W)/;
|
|
check_taint_not $&, "\$& from /(\\W)/";
|
|
check_taint_not $`, "\t\$`";
|
|
check_taint_not $', "\t\$'";
|
|
check_taint_not $+, "\t\$+";
|
|
check_taint_not $1, "\t\$1";
|
|
check_taint_not $2, "\t\$2";
|
|
|
|
/(.)/; # untaint $&, $`, $', $+, $1.
|
|
check_taint_not $&, "\$& from /(.)/";
|
|
check_taint_not $`, "\t\$`";
|
|
check_taint_not $', "\t\$'";
|
|
check_taint_not $+, "\t\$+";
|
|
check_taint_not $1, "\t\$1";
|
|
check_taint_not $2, "\t\$2";
|
|
|
|
/(\s)/;
|
|
check_taint_not $&, "\$& from /(\\s)/";
|
|
check_taint_not $`, "\t\$`";
|
|
check_taint_not $', "\t\$'";
|
|
check_taint_not $+, "\t\$+";
|
|
check_taint_not $1, "\t\$1";
|
|
check_taint_not $2, "\t\$2";
|
|
|
|
/(.)/; # untaint $&, $`, $', $+, $1.
|
|
check_taint_not $&, "\$& from /(.)/";
|
|
|
|
/(\S)/;
|
|
check_taint_not $&, "\$& from /(\\S)/";
|
|
check_taint_not $`, "\t\$`";
|
|
check_taint_not $', "\t\$'";
|
|
check_taint_not $+, "\t\$+";
|
|
check_taint_not $1, "\t\$1";
|
|
check_taint_not $2, "\t\$2";
|
|
|
|
/(.)/; # untaint $&, $`, $', $+, $1.
|
|
check_taint_not $&, "\$& from /(.)/";
|
|
|
|
"0" =~ /(\d)/;
|
|
check_taint_not $&, "\$& from /(\\d)/";
|
|
check_taint_not $`, "\t\$`";
|
|
check_taint_not $', "\t\$'";
|
|
check_taint_not $+, "\t\$+";
|
|
check_taint_not $1, "\t\$1";
|
|
check_taint_not $2, "\t\$2";
|
|
|
|
/(.)/; # untaint $&, $`, $', $+, $1.
|
|
check_taint_not $&, "\$& from /(.)/";
|
|
|
|
/(\D)/;
|
|
check_taint_not $&, "\$& from /(\\D)/";
|
|
check_taint_not $`, "\t\$`";
|
|
check_taint_not $', "\t\$'";
|
|
check_taint_not $+, "\t\$+";
|
|
check_taint_not $1, "\t\$1";
|
|
check_taint_not $2, "\t\$2";
|
|
|
|
/(.)/; # untaint $&, $`, $', $+, $1.
|
|
check_taint_not $&, "\$& from /(.)/";
|
|
|
|
/([[:alnum:]])/;
|
|
check_taint_not $&, "\$& from /([[:alnum:]])/";
|
|
check_taint_not $`, "\t\$`";
|
|
check_taint_not $', "\t\$'";
|
|
check_taint_not $+, "\t\$+";
|
|
check_taint_not $1, "\t\$1";
|
|
check_taint_not $2, "\t\$2";
|
|
|
|
/(.)/; # untaint $&, $`, $', $+, $1.
|
|
check_taint_not $&, "\$& from /(.)/";
|
|
|
|
/([[:^alnum:]])/;
|
|
check_taint_not $&, "\$& from /([[:^alnum:]])/";
|
|
check_taint_not $`, "\t\$`";
|
|
check_taint_not $', "\t\$'";
|
|
check_taint_not $+, "\t\$+";
|
|
check_taint_not $1, "\t\$1";
|
|
check_taint_not $2, "\t\$2";
|
|
|
|
"a" =~ /(a)|(\w)/;
|
|
check_taint_not $&, "\$& from /(a)|(\\w)/";
|
|
check_taint_not $`, "\t\$`";
|
|
check_taint_not $', "\t\$'";
|
|
check_taint_not $+, "\t\$+";
|
|
check_taint_not $1, "\t\$1";
|
|
ok($1 eq 'a', ("\t" x 5) . "\$1 is 'a'");
|
|
ok(! defined $2, ("\t" x 5) . "\$2 is undefined");
|
|
check_taint_not $2, "\t\$2";
|
|
check_taint_not $3, "\t\$3";
|
|
|
|
/(.)/; # untaint $&, $`, $', $+, $1.
|
|
check_taint_not $&, "\$& from /(.)/";
|
|
|
|
"\N{CYRILLIC SMALL LETTER A}" =~ /(\N{CYRILLIC CAPITAL LETTER A})/i;
|
|
check_taint_not $&, "\$& from /(\\N{CYRILLIC CAPITAL LETTER A})/i";
|
|
check_taint_not $`, "\t\$`";
|
|
check_taint_not $', "\t\$'";
|
|
check_taint_not $+, "\t\$+";
|
|
check_taint_not $1, "\t\$1";
|
|
ok($1 eq "\N{CYRILLIC SMALL LETTER A}", ("\t" x 4) . "\t\$1 is 'small cyrillic a'");
|
|
check_taint_not $2, "\t\$2";
|
|
|
|
/(.)/; # untaint $&, $`, $', $+, $1.
|
|
check_taint_not $&, "\$& from /./";
|
|
|
|
"(\N{KELVIN SIGN})" =~ /(\N{KELVIN SIGN})/i;
|
|
check_taint_not $&, "\$& from /(\\N{KELVIN SIGN})/i";
|
|
check_taint_not $`, "\t\$`";
|
|
check_taint_not $', "\t\$'";
|
|
check_taint_not $+, "\t\$+";
|
|
check_taint_not $1, "\t\$1";
|
|
check_taint_not $2, "\t\$2";
|
|
|
|
/(.)/; # untaint $&, $`, $', $+, $1.
|
|
check_taint_not $&, "\$& from /(.)/";
|
|
|
|
"a:" =~ /(.)\b(.)/;
|
|
check_taint_not $&, "\$& from /(.)\\b(.)/";
|
|
check_taint_not $`, "\t\$`";
|
|
check_taint_not $', "\t\$'";
|
|
check_taint_not $+, "\t\$+";
|
|
check_taint_not $1, "\t\$1";
|
|
check_taint_not $2, "\t\$2";
|
|
check_taint_not $3, "\t\$3";
|
|
|
|
/(.)/; # untaint $&, $`, $', $+, $1.
|
|
check_taint_not $&, "\$& from /./";
|
|
|
|
"aa" =~ /(.)\B(.)/;
|
|
check_taint_not $&, "\$& from /(.)\\B(.)/";
|
|
check_taint_not $`, "\t\$`";
|
|
check_taint_not $', "\t\$'";
|
|
check_taint_not $+, "\t\$+";
|
|
check_taint_not $1, "\t\$1";
|
|
check_taint_not $2, "\t\$2";
|
|
check_taint_not $3, "\t\$3";
|
|
|
|
/(.)/; # untaint $&, $`, $', $+, $1.
|
|
check_taint_not $&, "\$& from /./";
|
|
|
|
"aaa" =~ /(.).(\1)/i; # notaint because not locale dependent
|
|
check_taint_not $&, "\$ & from /(.).(\\1)/";
|
|
check_taint_not $`, "\t\$`";
|
|
check_taint_not $', "\t\$'";
|
|
check_taint_not $+, "\t\$+";
|
|
check_taint_not $1, "\t\$1";
|
|
check_taint_not $2, "\t\$2";
|
|
check_taint_not $3, "\t\$3";
|
|
|
|
/(.)/; # untaint $&, $`, $', $+, $1.
|
|
check_taint_not $&, "\$ & from /./";
|
|
|
|
$_ = $a; # untaint $_
|
|
|
|
check_taint_not $_, 'untainting $_ works';
|
|
|
|
/(b)/;
|
|
check_taint_not $&, "\$ & from /(b)/";
|
|
check_taint_not $`, "\t\$`";
|
|
check_taint_not $', "\t\$'";
|
|
check_taint_not $+, "\t\$+";
|
|
check_taint_not $1, "\t\$1";
|
|
check_taint_not $2, "\t\$2";
|
|
|
|
$_ = $a; # untaint $_
|
|
|
|
check_taint_not $_, 'untainting $_ works';
|
|
|
|
s/(.+)/b/;
|
|
check_taint_not $_, '$_ (wasn\'t tainted) from s/(.+)/b/';
|
|
check_taint_not $&, "\t\$&";
|
|
check_taint_not $`, "\t\$`";
|
|
check_taint_not $', "\t\$'";
|
|
check_taint_not $+, "\t\$+";
|
|
check_taint_not $1, "\t\$1";
|
|
check_taint_not $2, "\t\$2";
|
|
|
|
$b = $a; # untaint $b
|
|
|
|
($b = $a) =~ s/\w/$&/;
|
|
check_taint_not $b, '$b from ($b = $a) =~ s/\w/$&/';
|
|
check_taint_not $a, '$a from ($b = $a) =~ s/\w/$&/';
|
|
|
|
$_ = $a; # untaint $_
|
|
|
|
s/(\w)/\l$1/;
|
|
check_taint_not $_, '$_ (wasn\'t tainted) from s/(\w)/\l$1/,'; # this must taint
|
|
check_taint_not $&, "\t\$&";
|
|
check_taint_not $`, "\t\$`";
|
|
check_taint_not $', "\t\$'";
|
|
check_taint_not $+, "\t\$+";
|
|
check_taint_not $1, "\t\$1";
|
|
check_taint_not $2, "\t\$2";
|
|
|
|
$_ = $a; # untaint $_
|
|
|
|
s/(\w)/\L$1/;
|
|
check_taint_not $_, '$_ (wasn\'t tainted) from s/(\w)/\L$1/,';
|
|
check_taint_not $&, "\t\$&";
|
|
check_taint_not $`, "\t\$`";
|
|
check_taint_not $', "\t\$'";
|
|
check_taint_not $+, "\t\$+";
|
|
check_taint_not $1, "\t\$1";
|
|
check_taint_not $2, "\t\$2";
|
|
|
|
$_ = $a; # untaint $_
|
|
|
|
s/(\w)/\u$1/;
|
|
check_taint_not $_, '$_ (wasn\'t tainted) from s/(\w)/\u$1/';
|
|
check_taint_not $&, "\t\$&";
|
|
check_taint_not $`, "\t\$`";
|
|
check_taint_not $', "\t\$'";
|
|
check_taint_not $+, "\t\$+";
|
|
check_taint_not $1, "\t\$1";
|
|
check_taint_not $2, "\t\$2";
|
|
|
|
$_ = $a; # untaint $_
|
|
|
|
s/(\w)/\U$1/;
|
|
check_taint_not $_, '$_ (wasn\'t tainted) from s/(\w)/\U$1/';
|
|
check_taint_not $&, "\t\$&";
|
|
check_taint_not $`, "\t\$`";
|
|
check_taint_not $', "\t\$'";
|
|
check_taint_not $+, "\t\$+";
|
|
check_taint_not $1, "\t\$1";
|
|
check_taint_not $2, "\t\$2";
|
|
|
|
# After all this tainting $a should be cool.
|
|
|
|
check_taint_not $a, '$a still not tainted';
|
|
|
|
"a" =~ /([a-z])/;
|
|
check_taint_not $1, '"a" =~ /([a-z])/';
|
|
"foo.bar_baz" =~ /^(.*)[._](.*?)$/; # Bug 120675
|
|
check_taint_not $1, '"foo.bar_baz" =~ /^(.*)[._](.*?)$/';
|
|
|
|
}
|
|
|
|
# Here are in scope of 'use locale'
|
|
|
|
# I think we've seen quite enough of taint.
|
|
# Let us do some *real* locale work now,
|
|
# unless setlocale() is missing (i.e. minitest).
|
|
|
|
# The test number before our first setlocale()
|
|
my $final_without_setlocale = $test_num;
|
|
|
|
# Find locales.
|
|
|
|
debug "Scanning for locales...\n";
|
|
|
|
require POSIX; import POSIX ':locale_h';
|
|
|
|
my $categories = [ 'LC_CTYPE', 'LC_NUMERIC', 'LC_ALL' ];
|
|
debug "Scanning for just compatible";
|
|
my @Locale = find_locales($categories);
|
|
debug "Scanning for even incompatible";
|
|
my @include_incompatible_locales = find_locales($categories,
|
|
'even incompatible locales');
|
|
|
|
# The locales included in the incompatible list that aren't in the compatible
|
|
# one.
|
|
my @incompatible_locales;
|
|
|
|
if (@Locale < @include_incompatible_locales) {
|
|
my %seen;
|
|
@seen{@Locale} = ();
|
|
|
|
foreach my $item (@include_incompatible_locales) {
|
|
push @incompatible_locales, $item unless exists $seen{$item};
|
|
}
|
|
|
|
# For each bad locale, switch into it to find out why it's incompatible
|
|
for my $bad_locale (@incompatible_locales) {
|
|
my @warnings;
|
|
|
|
use warnings 'locale';
|
|
|
|
local $SIG{__WARN__} = sub {
|
|
my $warning = $_[0];
|
|
chomp $warning;
|
|
push @warnings, ($warning =~ s/\n/\n# /sgr);
|
|
};
|
|
|
|
debug "Trying incompatible $bad_locale";
|
|
my $ret = setlocale(&POSIX::LC_CTYPE, $bad_locale);
|
|
|
|
my $message = "testing of locale '$bad_locale' is skipped";
|
|
if (@warnings) {
|
|
skip $message . ":\n# " . join "\n# ", @warnings;
|
|
}
|
|
elsif (! $ret) {
|
|
skip("$message:\n#"
|
|
. " setlocale(&POSIX::LC_CTYPE, '$bad_locale') failed");
|
|
}
|
|
else {
|
|
fail $message . ", because it is was found to be incompatible with"
|
|
. " Perl, but could not discern reason";
|
|
}
|
|
}
|
|
}
|
|
|
|
debug "Locales =\n";
|
|
for ( @Locale ) {
|
|
debug "$_\n";
|
|
}
|
|
|
|
unless (@Locale) {
|
|
print "1..$test_num\n";
|
|
exit;
|
|
}
|
|
|
|
|
|
setlocale(&POSIX::LC_ALL, "C");
|
|
|
|
my %posixes;
|
|
|
|
my %Problem;
|
|
my %Okay;
|
|
my %Known_bad_locale; # Failed test for a locale known to be bad
|
|
my %Testing;
|
|
my @Added_alpha; # Alphas that aren't in the C locale.
|
|
my %test_names;
|
|
|
|
sub disp_chars {
|
|
# This returns a display string denoting the input parameter @_, each
|
|
# entry of which is a single character in the range 0-255. The first part
|
|
# of the output is a string of the characters in @_ that are ASCII
|
|
# graphics, and hence unambiguously displayable. They are given by code
|
|
# point order. The second part is the remaining code points, the ordinals
|
|
# of which are each displayed as 2-digit hex. Blanks are inserted so as
|
|
# to keep anything from the first part looking like a 2-digit hex number.
|
|
|
|
no locale;
|
|
my @chars = sort { ord $a <=> ord $b } @_;
|
|
my $output = "";
|
|
my $range_start;
|
|
my $start_class;
|
|
push @chars, chr(258); # This sentinel simplifies the loop termination
|
|
# logic
|
|
foreach my $i (0 .. @chars - 1) {
|
|
my $char = $chars[$i];
|
|
my $range_end;
|
|
my $class;
|
|
|
|
# We avoid using [:posix:] classes, as these are being tested in this
|
|
# file. Each equivalence class below is for things that can appear in
|
|
# a range; those that can't be in a range have class -1. 0 for those
|
|
# which should be output in hex; and >0 for the other ranges
|
|
if ($char =~ /[A-Z]/) {
|
|
$class = 2;
|
|
}
|
|
elsif ($char =~ /[a-z]/) {
|
|
$class = 3;
|
|
}
|
|
elsif ($char =~ /[0-9]/) {
|
|
$class = 4;
|
|
}
|
|
# Uncomment to get literal punctuation displayed instead of hex
|
|
#elsif ($char =~ /[[\]!"#\$\%&\'()*+,.\/:\\;<=>?\@\^_`{|}~-]/) {
|
|
# $class = -1; # Punct never appears in a range
|
|
#}
|
|
else {
|
|
$class = 0; # Output in hex
|
|
}
|
|
|
|
if (! defined $range_start) {
|
|
if ($class < 0) {
|
|
$output .= " " . $char;
|
|
}
|
|
else {
|
|
$range_start = ord $char;
|
|
$start_class = $class;
|
|
}
|
|
} # A range ends if not consecutive, or the class-type changes
|
|
elsif (ord $char != ($range_end = ord($chars[$i-1])) + 1
|
|
|| $class != $start_class)
|
|
{
|
|
|
|
# Here, the current character is not in the range. This means the
|
|
# previous character must have been. Output the range up through
|
|
# that one.
|
|
my $range_length = $range_end - $range_start + 1;
|
|
if ($start_class > 0) {
|
|
$output .= " " . chr($range_start);
|
|
$output .= "-" . chr($range_end) if $range_length > 1;
|
|
}
|
|
else {
|
|
$output .= sprintf(" %02X", $range_start);
|
|
$output .= sprintf("-%02X", $range_end) if $range_length > 1;
|
|
}
|
|
|
|
# Handle the new current character, as potentially beginning a new
|
|
# range
|
|
undef $range_start;
|
|
redo;
|
|
}
|
|
}
|
|
|
|
$output =~ s/^ //;
|
|
return $output;
|
|
}
|
|
|
|
sub disp_str ($) {
|
|
my $string = shift;
|
|
|
|
# Displays the string unambiguously. ASCII printables are always output
|
|
# as-is, though perhaps separated by blanks from other characters. If
|
|
# entirely printable ASCII, just returns the string. Otherwise if valid
|
|
# UTF-8 it uses the character names for non-printable-ASCII. Otherwise it
|
|
# outputs hex for each non-ASCII-printable byte.
|
|
|
|
return $string if $string =~ / ^ [[:print:]]* $/xa;
|
|
|
|
my $result = "";
|
|
my $prev_was_punct = 1; # Beginning is considered punct
|
|
if (utf8::valid($string) && utf8::is_utf8($string)) {
|
|
use charnames ();
|
|
foreach my $char (split "", $string) {
|
|
|
|
# Keep punctuation adjacent to other characters; otherwise
|
|
# separate them with a blank
|
|
if ($char =~ /[[:punct:]]/a) {
|
|
$result .= $char;
|
|
$prev_was_punct = 1;
|
|
}
|
|
elsif ($char =~ /[[:print:]]/a) {
|
|
$result .= " " unless $prev_was_punct;
|
|
$result .= $char;
|
|
$prev_was_punct = 0;
|
|
}
|
|
else {
|
|
$result .= " " unless $prev_was_punct;
|
|
my $name = charnames::viacode(ord $char);
|
|
$result .= (defined $name) ? $name : ':unknown:';
|
|
$prev_was_punct = 0;
|
|
}
|
|
}
|
|
}
|
|
else {
|
|
use bytes;
|
|
foreach my $char (split "", $string) {
|
|
if ($char =~ /[[:punct:]]/a) {
|
|
$result .= $char;
|
|
$prev_was_punct = 1;
|
|
}
|
|
elsif ($char =~ /[[:print:]]/a) {
|
|
$result .= " " unless $prev_was_punct;
|
|
$result .= $char;
|
|
$prev_was_punct = 0;
|
|
}
|
|
else {
|
|
$result .= " " unless $prev_was_punct;
|
|
$result .= sprintf("%02X", ord $char);
|
|
$prev_was_punct = 0;
|
|
}
|
|
}
|
|
}
|
|
|
|
return $result;
|
|
}
|
|
|
|
sub report_result {
|
|
my ($Locale, $i, $pass_fail, $message) = @_;
|
|
if ($pass_fail) {
|
|
push @{$Okay{$i}}, $Locale;
|
|
}
|
|
else {
|
|
$message //= "";
|
|
$message = " ($message)" if $message;
|
|
$Known_bad_locale{$i}{$Locale} = 1 if exists $known_bad_locales{$os}
|
|
&& $Locale =~ $known_bad_locales{$os};
|
|
$Problem{$i}{$Locale} = 1;
|
|
debug "failed $i ($test_names{$i}) with locale '$Locale'$message\n";
|
|
}
|
|
}
|
|
|
|
sub report_multi_result {
|
|
my ($Locale, $i, $results_ref) = @_;
|
|
|
|
# $results_ref points to an array, each element of which is a character that was
|
|
# in error for this test numbered '$i'. If empty, the test passed
|
|
|
|
my $message = "";
|
|
if (@$results_ref) {
|
|
$message = join " ", "for", disp_chars(@$results_ref);
|
|
}
|
|
report_result($Locale, $i, @$results_ref == 0, $message);
|
|
}
|
|
|
|
my $first_locales_test_number = $final_without_setlocale
|
|
+ 1 + @incompatible_locales;
|
|
my $locales_test_number;
|
|
my $not_necessarily_a_problem_test_number;
|
|
my $first_casing_test_number;
|
|
my %setlocale_failed; # List of locales that setlocale() didn't work on
|
|
|
|
foreach my $Locale (@Locale) {
|
|
$locales_test_number = $first_locales_test_number - 1;
|
|
debug "\n";
|
|
debug "Locale = $Locale\n";
|
|
|
|
unless (setlocale(&POSIX::LC_ALL, $Locale)) {
|
|
$setlocale_failed{$Locale} = $Locale;
|
|
next;
|
|
}
|
|
|
|
# We test UTF-8 locales only under ':not_characters'; It is easier to
|
|
# test them in other test files than here. Non- UTF-8 locales are tested
|
|
# only under plain 'use locale', as otherwise we would have to convert
|
|
# everything in them to Unicode.
|
|
|
|
my %UPPER = (); # All alpha X for which uc(X) == X and lc(X) != X
|
|
my %lower = (); # All alpha X for which lc(X) == X and uc(X) != X
|
|
my %BoThCaSe = (); # All alpha X for which uc(X) == lc(X) == X
|
|
|
|
my $is_utf8_locale = is_locale_utf8($Locale);
|
|
|
|
debug "is utf8 locale? = $is_utf8_locale\n";
|
|
|
|
debug "radix = " . disp_str(localeconv()->{decimal_point}) . "\n";
|
|
|
|
if (! $is_utf8_locale) {
|
|
use locale;
|
|
@{$posixes{'word'}} = grep /\w/, map { chr } 0..255;
|
|
@{$posixes{'digit'}} = grep /\d/, map { chr } 0..255;
|
|
@{$posixes{'space'}} = grep /\s/, map { chr } 0..255;
|
|
@{$posixes{'alpha'}} = grep /[[:alpha:]]/, map {chr } 0..255;
|
|
@{$posixes{'alnum'}} = grep /[[:alnum:]]/, map {chr } 0..255;
|
|
@{$posixes{'ascii'}} = grep /[[:ascii:]]/, map {chr } 0..255;
|
|
@{$posixes{'blank'}} = grep /[[:blank:]]/, map {chr } 0..255;
|
|
@{$posixes{'cntrl'}} = grep /[[:cntrl:]]/, map {chr } 0..255;
|
|
@{$posixes{'graph'}} = grep /[[:graph:]]/, map {chr } 0..255;
|
|
@{$posixes{'lower'}} = grep /[[:lower:]]/, map {chr } 0..255;
|
|
@{$posixes{'print'}} = grep /[[:print:]]/, map {chr } 0..255;
|
|
@{$posixes{'punct'}} = grep /[[:punct:]]/, map {chr } 0..255;
|
|
@{$posixes{'upper'}} = grep /[[:upper:]]/, map {chr } 0..255;
|
|
@{$posixes{'xdigit'}} = grep /[[:xdigit:]]/, map {chr } 0..255;
|
|
@{$posixes{'cased'}} = grep /[[:upper:][:lower:]]/i, map {chr } 0..255;
|
|
|
|
# Sieve the uppercase and the lowercase.
|
|
|
|
for (@{$posixes{'word'}}) {
|
|
if (/[^\d_]/) { # skip digits and the _
|
|
if (uc($_) eq $_) {
|
|
$UPPER{$_} = $_;
|
|
}
|
|
if (lc($_) eq $_) {
|
|
$lower{$_} = $_;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
else {
|
|
use locale ':not_characters';
|
|
@{$posixes{'word'}} = grep /\w/, map { chr } 0..255;
|
|
@{$posixes{'digit'}} = grep /\d/, map { chr } 0..255;
|
|
@{$posixes{'space'}} = grep /\s/, map { chr } 0..255;
|
|
@{$posixes{'alpha'}} = grep /[[:alpha:]]/, map {chr } 0..255;
|
|
@{$posixes{'alnum'}} = grep /[[:alnum:]]/, map {chr } 0..255;
|
|
@{$posixes{'ascii'}} = grep /[[:ascii:]]/, map {chr } 0..255;
|
|
@{$posixes{'blank'}} = grep /[[:blank:]]/, map {chr } 0..255;
|
|
@{$posixes{'cntrl'}} = grep /[[:cntrl:]]/, map {chr } 0..255;
|
|
@{$posixes{'graph'}} = grep /[[:graph:]]/, map {chr } 0..255;
|
|
@{$posixes{'lower'}} = grep /[[:lower:]]/, map {chr } 0..255;
|
|
@{$posixes{'print'}} = grep /[[:print:]]/, map {chr } 0..255;
|
|
@{$posixes{'punct'}} = grep /[[:punct:]]/, map {chr } 0..255;
|
|
@{$posixes{'upper'}} = grep /[[:upper:]]/, map {chr } 0..255;
|
|
@{$posixes{'xdigit'}} = grep /[[:xdigit:]]/, map {chr } 0..255;
|
|
@{$posixes{'cased'}} = grep /[[:upper:][:lower:]]/i, map {chr } 0..255;
|
|
for (@{$posixes{'word'}}) {
|
|
if (/[^\d_]/) { # skip digits and the _
|
|
if (uc($_) eq $_) {
|
|
$UPPER{$_} = $_;
|
|
}
|
|
if (lc($_) eq $_) {
|
|
$lower{$_} = $_;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
# Ordered, where possible, in groups of "this is a subset of the next
|
|
# one"
|
|
debug ":upper: = ", disp_chars(@{$posixes{'upper'}}), "\n";
|
|
debug ":lower: = ", disp_chars(@{$posixes{'lower'}}), "\n";
|
|
debug ":cased: = ", disp_chars(@{$posixes{'cased'}}), "\n";
|
|
debug ":alpha: = ", disp_chars(@{$posixes{'alpha'}}), "\n";
|
|
debug ":alnum: = ", disp_chars(@{$posixes{'alnum'}}), "\n";
|
|
debug ' \w = ', disp_chars(@{$posixes{'word'}}), "\n";
|
|
debug ":graph: = ", disp_chars(@{$posixes{'graph'}}), "\n";
|
|
debug ":print: = ", disp_chars(@{$posixes{'print'}}), "\n";
|
|
debug ' \d = ', disp_chars(@{$posixes{'digit'}}), "\n";
|
|
debug ":xdigit: = ", disp_chars(@{$posixes{'xdigit'}}), "\n";
|
|
debug ":blank: = ", disp_chars(@{$posixes{'blank'}}), "\n";
|
|
debug ' \s = ', disp_chars(@{$posixes{'space'}}), "\n";
|
|
debug ":punct: = ", disp_chars(@{$posixes{'punct'}}), "\n";
|
|
debug ":cntrl: = ", disp_chars(@{$posixes{'cntrl'}}), "\n";
|
|
debug ":ascii: = ", disp_chars(@{$posixes{'ascii'}}), "\n";
|
|
|
|
foreach (keys %UPPER) {
|
|
|
|
$BoThCaSe{$_}++ if exists $lower{$_};
|
|
}
|
|
foreach (keys %lower) {
|
|
$BoThCaSe{$_}++ if exists $UPPER{$_};
|
|
}
|
|
foreach (keys %BoThCaSe) {
|
|
delete $UPPER{$_};
|
|
delete $lower{$_};
|
|
}
|
|
|
|
my %Unassigned;
|
|
foreach my $ord ( 0 .. 255 ) {
|
|
$Unassigned{chr $ord} = 1;
|
|
}
|
|
foreach my $class (keys %posixes) {
|
|
foreach my $char (@{$posixes{$class}}) {
|
|
delete $Unassigned{$char};
|
|
}
|
|
}
|
|
|
|
debug "UPPER = ", disp_chars(sort { ord $a <=> ord $b } keys %UPPER), "\n";
|
|
debug "lower = ", disp_chars(sort { ord $a <=> ord $b } keys %lower), "\n";
|
|
debug "BoThCaSe = ", disp_chars(sort { ord $a <=> ord $b } keys %BoThCaSe), "\n";
|
|
debug "Unassigned = ", disp_chars(sort { ord $a <=> ord $b } keys %Unassigned), "\n";
|
|
|
|
my @failures;
|
|
my @fold_failures;
|
|
foreach my $x (sort { ord $a <=> ord $b } keys %UPPER) {
|
|
my $ok;
|
|
my $fold_ok;
|
|
if ($is_utf8_locale) {
|
|
use locale ':not_characters';
|
|
$ok = $x =~ /[[:upper:]]/;
|
|
$fold_ok = $x =~ /[[:lower:]]/i;
|
|
}
|
|
else {
|
|
use locale;
|
|
$ok = $x =~ /[[:upper:]]/;
|
|
$fold_ok = $x =~ /[[:lower:]]/i;
|
|
}
|
|
push @failures, $x unless $ok;
|
|
push @fold_failures, $x unless $fold_ok;
|
|
}
|
|
$locales_test_number++;
|
|
$first_casing_test_number = $locales_test_number;
|
|
$test_names{$locales_test_number} = 'Verify that /[[:upper:]]/ matches all alpha X for which uc(X) == X and lc(X) != X';
|
|
report_multi_result($Locale, $locales_test_number, \@failures);
|
|
|
|
$locales_test_number++;
|
|
|
|
$test_names{$locales_test_number} = 'Verify that /[[:lower:]]/i matches all alpha X for which uc(X) == X and lc(X) != X';
|
|
report_multi_result($Locale, $locales_test_number, \@fold_failures);
|
|
|
|
undef @failures;
|
|
undef @fold_failures;
|
|
|
|
foreach my $x (sort { ord $a <=> ord $b } keys %lower) {
|
|
my $ok;
|
|
my $fold_ok;
|
|
if ($is_utf8_locale) {
|
|
use locale ':not_characters';
|
|
$ok = $x =~ /[[:lower:]]/;
|
|
$fold_ok = $x =~ /[[:upper:]]/i;
|
|
}
|
|
else {
|
|
use locale;
|
|
$ok = $x =~ /[[:lower:]]/;
|
|
$fold_ok = $x =~ /[[:upper:]]/i;
|
|
}
|
|
push @failures, $x unless $ok;
|
|
push @fold_failures, $x unless $fold_ok;
|
|
}
|
|
|
|
$locales_test_number++;
|
|
$test_names{$locales_test_number} = 'Verify that /[[:lower:]]/ matches all alpha X for which lc(X) == X and uc(X) != X';
|
|
report_multi_result($Locale, $locales_test_number, \@failures);
|
|
|
|
$locales_test_number++;
|
|
$test_names{$locales_test_number} = 'Verify that /[[:upper:]]/i matches all alpha X for which lc(X) == X and uc(X) != X';
|
|
report_multi_result($Locale, $locales_test_number, \@fold_failures);
|
|
|
|
{ # Find the alphabetic characters that are not considered alphabetics
|
|
# in the default (C) locale.
|
|
|
|
no locale;
|
|
|
|
@Added_alpha = ();
|
|
for (keys %UPPER, keys %lower, keys %BoThCaSe) {
|
|
push(@Added_alpha, $_) if (/\W/);
|
|
}
|
|
}
|
|
|
|
@Added_alpha = sort { ord $a <=> ord $b } @Added_alpha;
|
|
|
|
debug "Added_alpha = ", disp_chars(@Added_alpha), "\n";
|
|
|
|
# Cross-check the whole 8-bit character set.
|
|
|
|
++$locales_test_number;
|
|
my @f;
|
|
$test_names{$locales_test_number} = 'Verify that \w and [:word:] are identical';
|
|
for (map { chr } 0..255) {
|
|
if ($is_utf8_locale) {
|
|
use locale ':not_characters';
|
|
push @f, $_ unless /[[:word:]]/ == /\w/;
|
|
}
|
|
else {
|
|
push @f, $_ unless /[[:word:]]/ == /\w/;
|
|
}
|
|
}
|
|
report_multi_result($Locale, $locales_test_number, \@f);
|
|
|
|
++$locales_test_number;
|
|
undef @f;
|
|
$test_names{$locales_test_number} = 'Verify that \d and [:digit:] are identical';
|
|
for (map { chr } 0..255) {
|
|
if ($is_utf8_locale) {
|
|
use locale ':not_characters';
|
|
push @f, $_ unless /[[:digit:]]/ == /\d/;
|
|
}
|
|
else {
|
|
push @f, $_ unless /[[:digit:]]/ == /\d/;
|
|
}
|
|
}
|
|
report_multi_result($Locale, $locales_test_number, \@f);
|
|
|
|
++$locales_test_number;
|
|
undef @f;
|
|
$test_names{$locales_test_number} = 'Verify that \s and [:space:] are identical';
|
|
for (map { chr } 0..255) {
|
|
if ($is_utf8_locale) {
|
|
use locale ':not_characters';
|
|
push @f, $_ unless /[[:space:]]/ == /\s/;
|
|
}
|
|
else {
|
|
push @f, $_ unless /[[:space:]]/ == /\s/;
|
|
}
|
|
}
|
|
report_multi_result($Locale, $locales_test_number, \@f);
|
|
|
|
++$locales_test_number;
|
|
undef @f;
|
|
$test_names{$locales_test_number} = 'Verify that [:posix:] and [:^posix:] are mutually exclusive';
|
|
for (map { chr } 0..255) {
|
|
if ($is_utf8_locale) {
|
|
use locale ':not_characters';
|
|
push @f, $_ unless (/[[:alpha:]]/ xor /[[:^alpha:]]/) ||
|
|
(/[[:alnum:]]/ xor /[[:^alnum:]]/) ||
|
|
(/[[:ascii:]]/ xor /[[:^ascii:]]/) ||
|
|
(/[[:blank:]]/ xor /[[:^blank:]]/) ||
|
|
(/[[:cntrl:]]/ xor /[[:^cntrl:]]/) ||
|
|
(/[[:digit:]]/ xor /[[:^digit:]]/) ||
|
|
(/[[:graph:]]/ xor /[[:^graph:]]/) ||
|
|
(/[[:lower:]]/ xor /[[:^lower:]]/) ||
|
|
(/[[:print:]]/ xor /[[:^print:]]/) ||
|
|
(/[[:space:]]/ xor /[[:^space:]]/) ||
|
|
(/[[:upper:]]/ xor /[[:^upper:]]/) ||
|
|
(/[[:word:]]/ xor /[[:^word:]]/) ||
|
|
(/[[:xdigit:]]/ xor /[[:^xdigit:]]/) ||
|
|
|
|
# effectively is what [:cased:] would be if it existed.
|
|
(/[[:upper:][:lower:]]/i xor /[^[:upper:][:lower:]]/i);
|
|
}
|
|
else {
|
|
push @f, $_ unless (/[[:alpha:]]/ xor /[[:^alpha:]]/) ||
|
|
(/[[:alnum:]]/ xor /[[:^alnum:]]/) ||
|
|
(/[[:ascii:]]/ xor /[[:^ascii:]]/) ||
|
|
(/[[:blank:]]/ xor /[[:^blank:]]/) ||
|
|
(/[[:cntrl:]]/ xor /[[:^cntrl:]]/) ||
|
|
(/[[:digit:]]/ xor /[[:^digit:]]/) ||
|
|
(/[[:graph:]]/ xor /[[:^graph:]]/) ||
|
|
(/[[:lower:]]/ xor /[[:^lower:]]/) ||
|
|
(/[[:print:]]/ xor /[[:^print:]]/) ||
|
|
(/[[:space:]]/ xor /[[:^space:]]/) ||
|
|
(/[[:upper:]]/ xor /[[:^upper:]]/) ||
|
|
(/[[:word:]]/ xor /[[:^word:]]/) ||
|
|
(/[[:xdigit:]]/ xor /[[:^xdigit:]]/) ||
|
|
(/[[:upper:][:lower:]]/i xor /[^[:upper:][:lower:]]/i);
|
|
}
|
|
}
|
|
report_multi_result($Locale, $locales_test_number, \@f);
|
|
|
|
# The rules for the relationships are given in:
|
|
# http://www.opengroup.org/onlinepubs/009695399/basedefs/xbd_chap07.html
|
|
|
|
|
|
++$locales_test_number;
|
|
undef @f;
|
|
$test_names{$locales_test_number} = 'Verify that [:lower:] contains at least a-z';
|
|
for ('a' .. 'z') {
|
|
if ($is_utf8_locale) {
|
|
use locale ':not_characters';
|
|
push @f, $_ unless /[[:lower:]]/;
|
|
}
|
|
else {
|
|
push @f, $_ unless /[[:lower:]]/;
|
|
}
|
|
}
|
|
report_multi_result($Locale, $locales_test_number, \@f);
|
|
|
|
++$locales_test_number;
|
|
undef @f;
|
|
$test_names{$locales_test_number} = 'Verify that [:lower:] is a subset of [:alpha:]';
|
|
for (map { chr } 0..255) {
|
|
if ($is_utf8_locale) {
|
|
use locale ':not_characters';
|
|
push @f, $_ if /[[:lower:]]/ and ! /[[:alpha:]]/;
|
|
}
|
|
else {
|
|
push @f, $_ if /[[:lower:]]/ and ! /[[:alpha:]]/;
|
|
}
|
|
}
|
|
report_multi_result($Locale, $locales_test_number, \@f);
|
|
|
|
++$locales_test_number;
|
|
undef @f;
|
|
$test_names{$locales_test_number} = 'Verify that [:upper:] contains at least A-Z';
|
|
for ('A' .. 'Z') {
|
|
if ($is_utf8_locale) {
|
|
use locale ':not_characters';
|
|
push @f, $_ unless /[[:upper:]]/;
|
|
}
|
|
else {
|
|
push @f, $_ unless /[[:upper:]]/;
|
|
}
|
|
}
|
|
report_multi_result($Locale, $locales_test_number, \@f);
|
|
|
|
++$locales_test_number;
|
|
undef @f;
|
|
$test_names{$locales_test_number} = 'Verify that [:upper:] is a subset of [:alpha:]';
|
|
for (map { chr } 0..255) {
|
|
if ($is_utf8_locale) {
|
|
use locale ':not_characters';
|
|
push @f, $_ if /[[:upper:]]/ and ! /[[:alpha:]]/;
|
|
}
|
|
else {
|
|
push @f, $_ if /[[:upper:]]/ and ! /[[:alpha:]]/;
|
|
}
|
|
}
|
|
report_multi_result($Locale, $locales_test_number, \@f);
|
|
|
|
++$locales_test_number;
|
|
undef @f;
|
|
$test_names{$locales_test_number} = 'Verify that /[[:lower:]]/i is a subset of [:alpha:]';
|
|
for (map { chr } 0..255) {
|
|
if ($is_utf8_locale) {
|
|
use locale ':not_characters';
|
|
push @f, $_ if /[[:lower:]]/i and ! /[[:alpha:]]/;
|
|
}
|
|
else {
|
|
push @f, $_ if /[[:lower:]]/i and ! /[[:alpha:]]/;
|
|
}
|
|
}
|
|
report_multi_result($Locale, $locales_test_number, \@f);
|
|
|
|
++$locales_test_number;
|
|
undef @f;
|
|
$test_names{$locales_test_number} = 'Verify that [:alpha:] is a subset of [:alnum:]';
|
|
for (map { chr } 0..255) {
|
|
if ($is_utf8_locale) {
|
|
use locale ':not_characters';
|
|
push @f, $_ if /[[:alpha:]]/ and ! /[[:alnum:]]/;
|
|
}
|
|
else {
|
|
push @f, $_ if /[[:alpha:]]/ and ! /[[:alnum:]]/;
|
|
}
|
|
}
|
|
report_multi_result($Locale, $locales_test_number, \@f);
|
|
|
|
++$locales_test_number;
|
|
undef @f;
|
|
$test_names{$locales_test_number} = 'Verify that [:digit:] contains at least 0-9';
|
|
for ('0' .. '9') {
|
|
if ($is_utf8_locale) {
|
|
use locale ':not_characters';
|
|
push @f, $_ unless /[[:digit:]]/;
|
|
}
|
|
else {
|
|
push @f, $_ unless /[[:digit:]]/;
|
|
}
|
|
}
|
|
report_multi_result($Locale, $locales_test_number, \@f);
|
|
|
|
++$locales_test_number;
|
|
undef @f;
|
|
$test_names{$locales_test_number} = 'Verify that [:digit:] is a subset of [:alnum:]';
|
|
for (map { chr } 0..255) {
|
|
if ($is_utf8_locale) {
|
|
use locale ':not_characters';
|
|
push @f, $_ if /[[:digit:]]/ and ! /[[:alnum:]]/;
|
|
}
|
|
else {
|
|
push @f, $_ if /[[:digit:]]/ and ! /[[:alnum:]]/;
|
|
}
|
|
}
|
|
report_multi_result($Locale, $locales_test_number, \@f);
|
|
|
|
++$locales_test_number;
|
|
undef @f;
|
|
$test_names{$locales_test_number} = 'Verify that [:digit:] matches either 10 or 20 code points';
|
|
report_result($Locale, $locales_test_number, @{$posixes{'digit'}} == 10 || @{$posixes{'digit'}} == 20);
|
|
|
|
++$locales_test_number;
|
|
undef @f;
|
|
$test_names{$locales_test_number} = 'Verify that if there is a second set of digits in [:digit:], they are consecutive';
|
|
if (@{$posixes{'digit'}} == 20) {
|
|
my $previous_ord;
|
|
for (map { chr } 0..255) {
|
|
next unless /[[:digit:]]/;
|
|
next if /[0-9]/;
|
|
if (defined $previous_ord) {
|
|
if ($is_utf8_locale) {
|
|
use locale ':not_characters';
|
|
push @f, $_ if ord $_ != $previous_ord + 1;
|
|
}
|
|
else {
|
|
push @f, $_ if ord $_ != $previous_ord + 1;
|
|
}
|
|
}
|
|
$previous_ord = ord $_;
|
|
}
|
|
}
|
|
report_multi_result($Locale, $locales_test_number, \@f);
|
|
|
|
++$locales_test_number;
|
|
undef @f;
|
|
my @xdigit_digits; # :digit: & :xdigit:
|
|
$test_names{$locales_test_number} = 'Verify that [:xdigit:] contains one or two blocks of 10 consecutive [:digit:] chars';
|
|
for (map { chr } 0..255) {
|
|
if ($is_utf8_locale) {
|
|
use locale ':not_characters';
|
|
# For utf8 locales, we actually use a stricter test: that :digit:
|
|
# is a subset of :xdigit:, as we know that only 0-9 should match
|
|
push @f, $_ if /[[:digit:]]/ and ! /[[:xdigit:]]/;
|
|
}
|
|
else {
|
|
push @xdigit_digits, $_ if /[[:digit:]]/ and /[[:xdigit:]]/;
|
|
}
|
|
}
|
|
if (! $is_utf8_locale) {
|
|
|
|
# For non-utf8 locales, @xdigit_digits is a list of the characters
|
|
# that are both :xdigit: and :digit:. Because :digit: is stored in
|
|
# increasing code point order (unless the tests above failed),
|
|
# @xdigit_digits is as well. There should be exactly 10 or
|
|
# 20 of these.
|
|
if (@xdigit_digits != 10 && @xdigit_digits != 20) {
|
|
@f = @xdigit_digits;
|
|
}
|
|
else {
|
|
|
|
# Look for contiguity in the series, adding any wrong ones to @f
|
|
my @temp = @xdigit_digits;
|
|
while (@temp > 1) {
|
|
push @f, $temp[1] if ($temp[0] != $temp[1] - 1)
|
|
|
|
# Skip this test for the 0th character of
|
|
# the second block of 10, as it won't be
|
|
# contiguous with the previous block
|
|
&& (! defined $xdigit_digits[10]
|
|
|| $temp[1] != $xdigit_digits[10]);
|
|
shift @temp;
|
|
}
|
|
}
|
|
}
|
|
|
|
report_multi_result($Locale, $locales_test_number, \@f);
|
|
|
|
++$locales_test_number;
|
|
undef @f;
|
|
$test_names{$locales_test_number} = 'Verify that [:xdigit:] contains at least A-F, a-f';
|
|
for ('A' .. 'F', 'a' .. 'f') {
|
|
if ($is_utf8_locale) {
|
|
use locale ':not_characters';
|
|
push @f, $_ unless /[[:xdigit:]]/;
|
|
}
|
|
else {
|
|
push @f, $_ unless /[[:xdigit:]]/;
|
|
}
|
|
}
|
|
report_multi_result($Locale, $locales_test_number, \@f);
|
|
|
|
++$locales_test_number;
|
|
undef @f;
|
|
$test_names{$locales_test_number} = 'Verify that any additional members of [:xdigit:], are in groups of 6 consecutive code points';
|
|
my $previous_ord;
|
|
my $count = 0;
|
|
for my $chr (map { chr } 0..255) {
|
|
next unless $chr =~ /[[:xdigit:]]/;
|
|
if ($is_utf8_locale) {
|
|
next if $chr =~ /[[:digit:]]/;
|
|
}
|
|
else {
|
|
next if grep { $chr eq $_ } @xdigit_digits;
|
|
}
|
|
next if $chr =~ /[A-Fa-f]/;
|
|
if (defined $previous_ord) {
|
|
if ($is_utf8_locale) {
|
|
use locale ':not_characters';
|
|
push @f, $chr if ord $chr != $previous_ord + 1;
|
|
}
|
|
else {
|
|
push @f, $chr if ord $chr != $previous_ord + 1;
|
|
}
|
|
}
|
|
$count++;
|
|
if ($count == 6) {
|
|
undef $previous_ord;
|
|
}
|
|
else {
|
|
$previous_ord = ord $chr;
|
|
}
|
|
}
|
|
report_multi_result($Locale, $locales_test_number, \@f);
|
|
|
|
++$locales_test_number;
|
|
undef @f;
|
|
$test_names{$locales_test_number} = 'Verify that [:xdigit:] is a subset of [:graph:]';
|
|
for (map { chr } 0..255) {
|
|
if ($is_utf8_locale) {
|
|
use locale ':not_characters';
|
|
push @f, $_ if /[[:xdigit:]]/ and ! /[[:graph:]]/;
|
|
}
|
|
else {
|
|
push @f, $_ if /[[:xdigit:]]/ and ! /[[:graph:]]/;
|
|
}
|
|
}
|
|
report_multi_result($Locale, $locales_test_number, \@f);
|
|
|
|
# Note that xdigit doesn't have to be a subset of alnum
|
|
|
|
++$locales_test_number;
|
|
undef @f;
|
|
$test_names{$locales_test_number} = 'Verify that [:punct:] is a subset of [:graph:]';
|
|
for (map { chr } 0..255) {
|
|
if ($is_utf8_locale) {
|
|
use locale ':not_characters';
|
|
push @f, $_ if /[[:punct:]]/ and ! /[[:graph:]]/;
|
|
}
|
|
else {
|
|
push @f, $_ if /[[:punct:]]/ and ! /[[:graph:]]/;
|
|
}
|
|
}
|
|
report_multi_result($Locale, $locales_test_number, \@f);
|
|
|
|
++$locales_test_number;
|
|
undef @f;
|
|
$test_names{$locales_test_number} = 'Verify that the space character is not in [:graph:]';
|
|
if ($is_utf8_locale) {
|
|
use locale ':not_characters';
|
|
push @f, " " if " " =~ /[[:graph:]]/;
|
|
}
|
|
else {
|
|
push @f, " " if " " =~ /[[:graph:]]/;
|
|
}
|
|
report_multi_result($Locale, $locales_test_number, \@f);
|
|
|
|
++$locales_test_number;
|
|
undef @f;
|
|
$test_names{$locales_test_number} = 'Verify that [:space:] contains at least [\f\n\r\t\cK ]';
|
|
for (' ', "\f", "\n", "\r", "\t", "\cK") {
|
|
if ($is_utf8_locale) {
|
|
use locale ':not_characters';
|
|
push @f, $_ unless /[[:space:]]/;
|
|
}
|
|
else {
|
|
push @f, $_ unless /[[:space:]]/;
|
|
}
|
|
}
|
|
report_multi_result($Locale, $locales_test_number, \@f);
|
|
|
|
++$locales_test_number;
|
|
undef @f;
|
|
$test_names{$locales_test_number} = 'Verify that [:blank:] contains at least [\t ]';
|
|
for (' ', "\t") {
|
|
if ($is_utf8_locale) {
|
|
use locale ':not_characters';
|
|
push @f, $_ unless /[[:blank:]]/;
|
|
}
|
|
else {
|
|
push @f, $_ unless /[[:blank:]]/;
|
|
}
|
|
}
|
|
report_multi_result($Locale, $locales_test_number, \@f);
|
|
|
|
++$locales_test_number;
|
|
undef @f;
|
|
$test_names{$locales_test_number} = 'Verify that [:blank:] is a subset of [:space:]';
|
|
for (map { chr } 0..255) {
|
|
if ($is_utf8_locale) {
|
|
use locale ':not_characters';
|
|
push @f, $_ if /[[:blank:]]/ and ! /[[:space:]]/;
|
|
}
|
|
else {
|
|
push @f, $_ if /[[:blank:]]/ and ! /[[:space:]]/;
|
|
}
|
|
}
|
|
report_multi_result($Locale, $locales_test_number, \@f);
|
|
|
|
++$locales_test_number;
|
|
undef @f;
|
|
$test_names{$locales_test_number} = 'Verify that [:graph:] is a subset of [:print:]';
|
|
for (map { chr } 0..255) {
|
|
if ($is_utf8_locale) {
|
|
use locale ':not_characters';
|
|
push @f, $_ if /[[:graph:]]/ and ! /[[:print:]]/;
|
|
}
|
|
else {
|
|
push @f, $_ if /[[:graph:]]/ and ! /[[:print:]]/;
|
|
}
|
|
}
|
|
report_multi_result($Locale, $locales_test_number, \@f);
|
|
|
|
++$locales_test_number;
|
|
undef @f;
|
|
$test_names{$locales_test_number} = 'Verify that the space character is in [:print:]';
|
|
if ($is_utf8_locale) {
|
|
use locale ':not_characters';
|
|
push @f, " " if " " !~ /[[:print:]]/;
|
|
}
|
|
else {
|
|
push @f, " " if " " !~ /[[:print:]]/;
|
|
}
|
|
report_multi_result($Locale, $locales_test_number, \@f);
|
|
|
|
++$locales_test_number;
|
|
undef @f;
|
|
$test_names{$locales_test_number} = 'Verify that isn\'t both [:cntrl:] and [:print:]';
|
|
for (map { chr } 0..255) {
|
|
if ($is_utf8_locale) {
|
|
use locale ':not_characters';
|
|
push @f, $_ if (/[[:print:]]/ and /[[:cntrl:]]/);
|
|
}
|
|
else {
|
|
push @f, $_ if (/[[:print:]]/ and /[[:cntrl:]]/);
|
|
}
|
|
}
|
|
report_multi_result($Locale, $locales_test_number, \@f);
|
|
|
|
++$locales_test_number;
|
|
undef @f;
|
|
$test_names{$locales_test_number} = 'Verify that isn\'t both [:alpha:] and [:digit:]';
|
|
for (map { chr } 0..255) {
|
|
if ($is_utf8_locale) {
|
|
use locale ':not_characters';
|
|
push @f, $_ if /[[:alpha:]]/ and /[[:digit:]]/;
|
|
}
|
|
else {
|
|
push @f, $_ if /[[:alpha:]]/ and /[[:digit:]]/;
|
|
}
|
|
}
|
|
report_multi_result($Locale, $locales_test_number, \@f);
|
|
|
|
++$locales_test_number;
|
|
undef @f;
|
|
$test_names{$locales_test_number} = 'Verify that isn\'t both [:alnum:] and [:punct:]';
|
|
for (map { chr } 0..255) {
|
|
if ($is_utf8_locale) {
|
|
use locale ':not_characters';
|
|
push @f, $_ if /[[:alnum:]]/ and /[[:punct:]]/;
|
|
}
|
|
else {
|
|
push @f, $_ if /[[:alnum:]]/ and /[[:punct:]]/;
|
|
}
|
|
}
|
|
report_multi_result($Locale, $locales_test_number, \@f);
|
|
|
|
++$locales_test_number;
|
|
undef @f;
|
|
$test_names{$locales_test_number} = 'Verify that isn\'t both [:xdigit:] and [:punct:]';
|
|
for (map { chr } 0..255) {
|
|
if ($is_utf8_locale) {
|
|
use locale ':not_characters';
|
|
push @f, $_ if (/[[:punct:]]/ and /[[:xdigit:]]/);
|
|
}
|
|
else {
|
|
push @f, $_ if (/[[:punct:]]/ and /[[:xdigit:]]/);
|
|
}
|
|
}
|
|
report_multi_result($Locale, $locales_test_number, \@f);
|
|
|
|
++$locales_test_number;
|
|
undef @f;
|
|
$test_names{$locales_test_number} = 'Verify that isn\'t both [:graph:] and [:space:]';
|
|
for (map { chr } 0..255) {
|
|
if ($is_utf8_locale) {
|
|
use locale ':not_characters';
|
|
push @f, $_ if (/[[:graph:]]/ and /[[:space:]]/);
|
|
}
|
|
else {
|
|
push @f, $_ if (/[[:graph:]]/ and /[[:space:]]/);
|
|
}
|
|
}
|
|
report_multi_result($Locale, $locales_test_number, \@f);
|
|
|
|
foreach ($first_casing_test_number..$locales_test_number) {
|
|
$problematical_tests{$_} = 1;
|
|
}
|
|
|
|
|
|
# Test for read-only scalars' locale vs non-locale comparisons.
|
|
|
|
{
|
|
no locale;
|
|
my $ok;
|
|
$a = "qwerty";
|
|
if ($is_utf8_locale) {
|
|
use locale ':not_characters';
|
|
$ok = ($a cmp "qwerty") == 0;
|
|
}
|
|
else {
|
|
use locale;
|
|
$ok = ($a cmp "qwerty") == 0;
|
|
}
|
|
report_result($Locale, ++$locales_test_number, $ok);
|
|
$test_names{$locales_test_number} = 'Verify that cmp works with a read-only scalar; no- vs locale';
|
|
}
|
|
|
|
{
|
|
my ($from, $to, $lesser, $greater,
|
|
@test, %test, $test, $yes, $no, $sign);
|
|
|
|
++$locales_test_number;
|
|
$test_names{$locales_test_number} = 'Verify that "le", "ne", etc work';
|
|
$not_necessarily_a_problem_test_number = $locales_test_number;
|
|
for (0..9) {
|
|
# Select a slice.
|
|
$from = int(($_*@{$posixes{'word'}})/10);
|
|
$to = $from + int(@{$posixes{'word'}}/10);
|
|
$to = $#{$posixes{'word'}} if ($to > $#{$posixes{'word'}});
|
|
$lesser = join('', @{$posixes{'word'}}[$from..$to]);
|
|
# Select a slice one character on.
|
|
$from++; $to++;
|
|
$to = $#{$posixes{'word'}} if ($to > $#{$posixes{'word'}});
|
|
$greater = join('', @{$posixes{'word'}}[$from..$to]);
|
|
if ($is_utf8_locale) {
|
|
use locale ':not_characters';
|
|
($yes, $no, $sign) = ($lesser lt $greater
|
|
? (" ", "not ", 1)
|
|
: ("not ", " ", -1));
|
|
}
|
|
else {
|
|
use locale;
|
|
($yes, $no, $sign) = ($lesser lt $greater
|
|
? (" ", "not ", 1)
|
|
: ("not ", " ", -1));
|
|
}
|
|
# all these tests should FAIL (return 0). Exact lt or gt cannot
|
|
# be tested because in some locales, say, eacute and E may test
|
|
# equal.
|
|
@test =
|
|
(
|
|
$no.' ($lesser le $greater)', # 1
|
|
'not ($lesser ne $greater)', # 2
|
|
' ($lesser eq $greater)', # 3
|
|
$yes.' ($lesser ge $greater)', # 4
|
|
$yes.' ($lesser ge $greater)', # 5
|
|
$yes.' ($greater le $lesser )', # 7
|
|
'not ($greater ne $lesser )', # 8
|
|
' ($greater eq $lesser )', # 9
|
|
$no.' ($greater ge $lesser )', # 10
|
|
'not (($lesser cmp $greater) == -($sign))' # 11
|
|
);
|
|
@test{@test} = 0 x @test;
|
|
$test = 0;
|
|
for my $ti (@test) {
|
|
if ($is_utf8_locale) {
|
|
use locale ':not_characters';
|
|
$test{$ti} = eval $ti;
|
|
}
|
|
else {
|
|
# Already in 'use locale';
|
|
$test{$ti} = eval $ti;
|
|
}
|
|
$test ||= $test{$ti}
|
|
}
|
|
report_result($Locale, $locales_test_number, $test == 0);
|
|
if ($test) {
|
|
debug "lesser = '$lesser'\n";
|
|
debug "greater = '$greater'\n";
|
|
debug "lesser cmp greater = ",
|
|
$lesser cmp $greater, "\n";
|
|
debug "greater cmp lesser = ",
|
|
$greater cmp $lesser, "\n";
|
|
debug "(greater) from = $from, to = $to\n";
|
|
for my $ti (@test) {
|
|
debugf("# %-40s %-4s", $ti,
|
|
$test{$ti} ? 'FAIL' : 'ok');
|
|
if ($ti =~ /\(\.*(\$.+ +cmp +\$[^\)]+)\.*\)/) {
|
|
debugf("(%s == %4d)", $1, eval $1);
|
|
}
|
|
debugf("\n#");
|
|
}
|
|
|
|
last;
|
|
}
|
|
}
|
|
|
|
use locale;
|
|
|
|
my @sorted_controls;
|
|
|
|
++$locales_test_number;
|
|
$test_names{$locales_test_number}
|
|
= 'Skip in locales where there are no controls;'
|
|
. ' otherwise verify that \0 sorts before any (other) control';
|
|
if (! $posixes{'cntrl'}) {
|
|
report_result($Locale, $locales_test_number, 1);
|
|
|
|
# We use all code points for the tests below since there aren't
|
|
# any controls
|
|
push @sorted_controls, chr $_ for 1..255;
|
|
@sorted_controls = sort @sorted_controls;
|
|
}
|
|
else {
|
|
@sorted_controls = @{$posixes{'cntrl'}};
|
|
push @sorted_controls, "\0",
|
|
unless grep { $_ eq "\0" } @sorted_controls;
|
|
@sorted_controls = sort @sorted_controls;
|
|
my $output = "";
|
|
for my $control (@sorted_controls) {
|
|
$output .= " " . disp_chars($control);
|
|
}
|
|
debug "sorted :cntrl: (plus NUL) = $output\n";
|
|
my $ok = $sorted_controls[0] eq "\0";
|
|
report_result($Locale, $locales_test_number, $ok);
|
|
|
|
shift @sorted_controls if $ok;
|
|
}
|
|
|
|
my $lowest_control = $sorted_controls[0];
|
|
|
|
++$locales_test_number;
|
|
$test_names{$locales_test_number}
|
|
= 'Skip in locales where all controls have primary sorting weight; '
|
|
. 'otherwise verify that \0 doesn\'t have primary sorting weight';
|
|
if ("a${lowest_control}c" lt "ab") {
|
|
report_result($Locale, $locales_test_number, 1);
|
|
}
|
|
else {
|
|
my $ok = "ab" lt "a\0c";
|
|
report_result($Locale, $locales_test_number, $ok);
|
|
}
|
|
|
|
++$locales_test_number;
|
|
$test_names{$locales_test_number}
|
|
= 'Verify that strings with embedded NUL collate';
|
|
my $ok = "a\0a\0a" lt "a${lowest_control}a${lowest_control}a";
|
|
report_result($Locale, $locales_test_number, $ok);
|
|
|
|
++$locales_test_number;
|
|
$test_names{$locales_test_number}
|
|
= 'Verify that strings with embedded NUL and '
|
|
. 'extra trailing NUL collate';
|
|
$ok = "a\0a\0" lt "a${lowest_control}a${lowest_control}";
|
|
report_result($Locale, $locales_test_number, $ok);
|
|
|
|
++$locales_test_number;
|
|
$test_names{$locales_test_number}
|
|
= 'Verify that empty strings collate';
|
|
$ok = "" le "";
|
|
report_result($Locale, $locales_test_number, $ok);
|
|
|
|
++$locales_test_number;
|
|
$test_names{$locales_test_number}
|
|
= "Skip in non-UTF-8 locales; otherwise verify that UTF8ness "
|
|
. "doesn't matter with collation";
|
|
if (! $is_utf8_locale) {
|
|
report_result($Locale, $locales_test_number, 1);
|
|
}
|
|
else {
|
|
|
|
# khw can't think of anything better. Start with a string that is
|
|
# higher than its UTF-8 representation in both EBCDIC and ASCII
|
|
my $string = chr utf8::unicode_to_native(0xff);
|
|
my $utf8_string = $string;
|
|
utf8::upgrade($utf8_string);
|
|
|
|
# 8 should be lt 9 in all locales (except ones that aren't
|
|
# ASCII-based, which might fail this)
|
|
$ok = ("a${string}8") lt ("a${utf8_string}9");
|
|
report_result($Locale, $locales_test_number, $ok);
|
|
}
|
|
|
|
++$locales_test_number;
|
|
$test_names{$locales_test_number}
|
|
= "Skip in UTF-8 locales; otherwise verify that single byte "
|
|
. "collates before 0x100 and above";
|
|
if ($is_utf8_locale) {
|
|
report_result($Locale, $locales_test_number, 1);
|
|
}
|
|
else {
|
|
my $max_collating = chr 0; # Find byte that collates highest
|
|
for my $i (0 .. 255) {
|
|
my $char = chr $i;
|
|
$max_collating = $char if $char gt $max_collating;
|
|
}
|
|
$ok = $max_collating lt chr 0x100;
|
|
report_result($Locale, $locales_test_number, $ok);
|
|
}
|
|
|
|
++$locales_test_number;
|
|
$test_names{$locales_test_number}
|
|
= "Skip in UTF-8 locales; otherwise verify that 0x100 and "
|
|
. "above collate in code point order";
|
|
if ($is_utf8_locale) {
|
|
report_result($Locale, $locales_test_number, 1);
|
|
}
|
|
else {
|
|
$ok = chr 0x100 lt chr 0x101;
|
|
report_result($Locale, $locales_test_number, $ok);
|
|
}
|
|
}
|
|
|
|
my $ok1;
|
|
my $ok2;
|
|
my $ok3;
|
|
my $ok4;
|
|
my $ok5;
|
|
my $ok6;
|
|
my $ok7;
|
|
my $ok8;
|
|
my $ok9;
|
|
my $ok10;
|
|
my $ok11;
|
|
my $ok12;
|
|
my $ok13;
|
|
my $ok14;
|
|
my $ok14_5;
|
|
my $ok15;
|
|
my $ok16;
|
|
my $ok17;
|
|
my $ok18;
|
|
my $ok19;
|
|
my $ok20;
|
|
my $ok21;
|
|
|
|
my $c;
|
|
my $d;
|
|
my $e;
|
|
my $f;
|
|
my $g;
|
|
my $h;
|
|
my $i;
|
|
my $j;
|
|
|
|
if (! $is_utf8_locale) {
|
|
use locale;
|
|
|
|
my ($x, $y) = (1.23, 1.23);
|
|
|
|
$a = "$x";
|
|
printf ''; # printf used to reset locale to "C"
|
|
$b = "$y";
|
|
$ok1 = $a eq $b;
|
|
|
|
$c = "$x";
|
|
my $z = sprintf ''; # sprintf used to reset locale to "C"
|
|
$d = "$y";
|
|
$ok2 = $c eq $d;
|
|
{
|
|
|
|
use warnings;
|
|
my $w = 0;
|
|
local $SIG{__WARN__} =
|
|
sub {
|
|
print "# @_\n";
|
|
$w++;
|
|
};
|
|
|
|
# The == (among other ops) used to warn for locales
|
|
# that had something else than "." as the radix character.
|
|
|
|
$ok3 = $c == 1.23;
|
|
$ok4 = $c == $x;
|
|
$ok5 = $c == $d;
|
|
{
|
|
no locale;
|
|
|
|
$e = "$x";
|
|
|
|
$ok6 = $e == 1.23;
|
|
$ok7 = $e == $x;
|
|
$ok8 = $e == $c;
|
|
}
|
|
|
|
$f = "1.23";
|
|
$g = 2.34;
|
|
$h = 1.5;
|
|
$i = 1.25;
|
|
$j = "$h:$i";
|
|
|
|
$ok9 = $f == 1.23;
|
|
$ok10 = $f == $x;
|
|
$ok11 = $f == $c;
|
|
$ok12 = abs(($f + $g) - 3.57) < 0.01;
|
|
$ok13 = $w == 0;
|
|
$ok14 = $ok14_5 = $ok15 = $ok16 = 1; # Skip for non-utf8 locales
|
|
}
|
|
{
|
|
no locale;
|
|
$ok17 = "1.5:1.25" eq sprintf("%g:%g", $h, $i);
|
|
}
|
|
$ok18 = $j eq sprintf("%g:%g", $h, $i);
|
|
}
|
|
else {
|
|
use locale ':not_characters';
|
|
|
|
my ($x, $y) = (1.23, 1.23);
|
|
$a = "$x";
|
|
printf ''; # printf used to reset locale to "C"
|
|
$b = "$y";
|
|
$ok1 = $a eq $b;
|
|
|
|
$c = "$x";
|
|
my $z = sprintf ''; # sprintf used to reset locale to "C"
|
|
$d = "$y";
|
|
$ok2 = $c eq $d;
|
|
{
|
|
use warnings;
|
|
my $w = 0;
|
|
local $SIG{__WARN__} =
|
|
sub {
|
|
print "# @_\n";
|
|
$w++;
|
|
};
|
|
$ok3 = $c == 1.23;
|
|
$ok4 = $c == $x;
|
|
$ok5 = $c == $d;
|
|
{
|
|
no locale;
|
|
$e = "$x";
|
|
|
|
$ok6 = $e == 1.23;
|
|
$ok7 = $e == $x;
|
|
$ok8 = $e == $c;
|
|
}
|
|
|
|
$f = "1.23";
|
|
$g = 2.34;
|
|
$h = 1.5;
|
|
$i = 1.25;
|
|
$j = "$h:$i";
|
|
|
|
$ok9 = $f == 1.23;
|
|
$ok10 = $f == $x;
|
|
$ok11 = $f == $c;
|
|
$ok12 = abs(($f + $g) - 3.57) < 0.01;
|
|
$ok13 = $w == 0;
|
|
|
|
# Look for non-ASCII error messages, and verify that the first
|
|
# such is in UTF-8 (the others almost certainly will be like the
|
|
# first). This is only done if the current locale has LC_MESSAGES
|
|
$ok14 = 1;
|
|
$ok14_5 = 1;
|
|
if ( locales_enabled('LC_MESSAGES')
|
|
&& setlocale(&POSIX::LC_MESSAGES, $Locale))
|
|
{
|
|
foreach my $err (keys %!) {
|
|
use Errno;
|
|
$! = eval "&Errno::$err"; # Convert to strerror() output
|
|
my $errnum = 0+$!;
|
|
my $strerror = "$!";
|
|
if ("$strerror" =~ /\P{ASCII}/) {
|
|
$ok14 = utf8::is_utf8($strerror);
|
|
no locale;
|
|
$ok14_5 = "$!" !~ /\P{ASCII}/;
|
|
debug( disp_str(
|
|
"non-ASCII \$! for error $errnum='$strerror'"))
|
|
if ! $ok14_5;
|
|
last;
|
|
}
|
|
}
|
|
}
|
|
|
|
# Similarly, we verify that a non-ASCII radix is in UTF-8. This
|
|
# also catches if there is a disparity between sprintf and
|
|
# stringification.
|
|
|
|
my $string_g = "$g";
|
|
my $sprintf_g = sprintf("%g", $g);
|
|
|
|
$ok15 = $string_g =~ / ^ \p{ASCII}+ $ /x || utf8::is_utf8($string_g);
|
|
$ok16 = $sprintf_g eq $string_g;
|
|
}
|
|
{
|
|
no locale;
|
|
$ok17 = "1.5:1.25" eq sprintf("%g:%g", $h, $i);
|
|
}
|
|
$ok18 = $j eq sprintf("%g:%g", $h, $i);
|
|
}
|
|
|
|
$ok19 = $ok20 = 1;
|
|
if (setlocale(&POSIX::LC_TIME, $Locale)) { # These tests aren't affected by
|
|
# :not_characters
|
|
my @times = CORE::localtime();
|
|
|
|
use locale;
|
|
$ok19 = POSIX::strftime("%p", @times) ne "%p"; # [perl #119425]
|
|
my $date = POSIX::strftime("'%A' '%B' '%Z' '%p'", @times);
|
|
debug("'Day' 'Month' 'TZ' 'am/pm' = ", disp_str($date));
|
|
|
|
# If there is any non-ascii, it better be UTF-8 in a UTF-8 locale, and
|
|
# not UTF-8 if the locale isn't UTF-8.
|
|
$ok20 = $date =~ / ^ \p{ASCII}+ $ /x
|
|
|| $is_utf8_locale == utf8::is_utf8($date);
|
|
}
|
|
|
|
$ok21 = 1;
|
|
if (locales_enabled('LC_MESSAGES')) {
|
|
foreach my $err (keys %!) {
|
|
no locale;
|
|
use Errno;
|
|
$! = eval "&Errno::$err"; # Convert to strerror() output
|
|
my $strerror = "$!";
|
|
if ($strerror =~ /\P{ASCII}/) {
|
|
$ok21 = 0;
|
|
debug(disp_str("non-ASCII strerror=$strerror"));
|
|
last;
|
|
}
|
|
}
|
|
}
|
|
|
|
report_result($Locale, ++$locales_test_number, $ok1);
|
|
$test_names{$locales_test_number} = 'Verify that an intervening printf doesn\'t change assignment results';
|
|
my $first_a_test = $locales_test_number;
|
|
|
|
debug "$first_a_test..$locales_test_number: \$a = $a, \$b = $b, Locale = $Locale\n";
|
|
|
|
report_result($Locale, ++$locales_test_number, $ok2);
|
|
$test_names{$locales_test_number} = 'Verify that an intervening sprintf doesn\'t change assignment results';
|
|
|
|
my $first_c_test = $locales_test_number;
|
|
|
|
$test_names{++$locales_test_number} = 'Verify that a different locale radix works when doing "==" with a constant';
|
|
report_result($Locale, $locales_test_number, $ok3);
|
|
$problematical_tests{$locales_test_number} = 1;
|
|
|
|
$test_names{++$locales_test_number} = 'Verify that a different locale radix works when doing "==" with a scalar';
|
|
report_result($Locale, $locales_test_number, $ok4);
|
|
$problematical_tests{$locales_test_number} = 1;
|
|
|
|
report_result($Locale, ++$locales_test_number, $ok5);
|
|
$test_names{$locales_test_number} = 'Verify that a different locale radix works when doing "==" with a scalar and an intervening sprintf';
|
|
$problematical_tests{$locales_test_number} = 1;
|
|
|
|
debug "$first_c_test..$locales_test_number: \$c = $c, \$d = $d, Locale = $Locale\n";
|
|
|
|
report_result($Locale, ++$locales_test_number, $ok6);
|
|
$test_names{$locales_test_number} = 'Verify that can assign stringified under inner no-locale block';
|
|
my $first_e_test = $locales_test_number;
|
|
|
|
report_result($Locale, ++$locales_test_number, $ok7);
|
|
$test_names{$locales_test_number} = 'Verify that "==" with a scalar still works in inner no locale';
|
|
|
|
$test_names{++$locales_test_number} = 'Verify that "==" with a scalar and an intervening sprintf still works in inner no locale';
|
|
report_result($Locale, $locales_test_number, $ok8);
|
|
$problematical_tests{$locales_test_number} = 1;
|
|
|
|
debug "$first_e_test..$locales_test_number: \$e = $e, no locale\n";
|
|
|
|
report_result($Locale, ++$locales_test_number, $ok9);
|
|
$test_names{$locales_test_number} = 'Verify that after a no-locale block, a different locale radix still works when doing "==" with a constant';
|
|
$problematical_tests{$locales_test_number} = 1;
|
|
my $first_f_test = $locales_test_number;
|
|
|
|
report_result($Locale, ++$locales_test_number, $ok10);
|
|
$test_names{$locales_test_number} = 'Verify that after a no-locale block, a different locale radix still works when doing "==" with a scalar';
|
|
$problematical_tests{$locales_test_number} = 1;
|
|
|
|
$test_names{++$locales_test_number} = 'Verify that after a no-locale block, a different locale radix still works when doing "==" with a scalar and an intervening sprintf';
|
|
report_result($Locale, $locales_test_number, $ok11);
|
|
$problematical_tests{$locales_test_number} = 1;
|
|
|
|
report_result($Locale, ++$locales_test_number, $ok12);
|
|
$test_names{$locales_test_number} = 'Verify that after a no-locale block, a different locale radix can participate in an addition and function call as numeric';
|
|
$problematical_tests{$locales_test_number} = 1;
|
|
|
|
report_result($Locale, ++$locales_test_number, $ok13);
|
|
$test_names{$locales_test_number} = 'Verify that don\'t get warning under "==" even if radix is not a dot';
|
|
$problematical_tests{$locales_test_number} = 1;
|
|
|
|
report_result($Locale, ++$locales_test_number, $ok14);
|
|
$test_names{$locales_test_number} = 'Verify that non-ASCII UTF-8 error messages are in UTF-8';
|
|
|
|
report_result($Locale, ++$locales_test_number, $ok14_5);
|
|
$test_names{$locales_test_number} = '... and are ASCII outside "use locale"';
|
|
|
|
report_result($Locale, ++$locales_test_number, $ok15);
|
|
$test_names{$locales_test_number} = 'Verify that a number with a UTF-8 radix has a UTF-8 stringification';
|
|
$problematical_tests{$locales_test_number} = 1;
|
|
|
|
report_result($Locale, ++$locales_test_number, $ok16);
|
|
$test_names{$locales_test_number} = 'Verify that a sprintf of a number with a UTF-8 radix yields UTF-8';
|
|
$problematical_tests{$locales_test_number} = 1;
|
|
|
|
report_result($Locale, ++$locales_test_number, $ok17);
|
|
$test_names{$locales_test_number} = 'Verify that a sprintf of a number outside locale scope uses a dot radix';
|
|
|
|
report_result($Locale, ++$locales_test_number, $ok18);
|
|
$test_names{$locales_test_number} = 'Verify that a sprintf of a number back within locale scope uses locale radix';
|
|
$problematical_tests{$locales_test_number} = 1;
|
|
|
|
report_result($Locale, ++$locales_test_number, $ok19);
|
|
$test_names{$locales_test_number} = 'Verify that strftime doesn\'t return "%p" in locales where %p is empty';
|
|
|
|
report_result($Locale, ++$locales_test_number, $ok20);
|
|
$test_names{$locales_test_number} = 'Verify that strftime returns date with UTF-8 flag appropriately set';
|
|
$problematical_tests{$locales_test_number} = 1; # This is broken in
|
|
# OS X 10.9.3
|
|
|
|
report_result($Locale, ++$locales_test_number, $ok21);
|
|
$test_names{$locales_test_number} = '"$!" is ASCII-only outside of locale scope';
|
|
|
|
debug "$first_f_test..$locales_test_number: \$f = $f, \$g = $g, back to locale = $Locale\n";
|
|
|
|
# Does taking lc separately differ from taking
|
|
# the lc "in-line"? (This was the bug 19990704.002 (#965), change #3568.)
|
|
# The bug was in the caching of the 'o'-magic.
|
|
if (! $is_utf8_locale) {
|
|
use locale;
|
|
|
|
sub lcA {
|
|
my $lc0 = lc $_[0];
|
|
my $lc1 = lc $_[1];
|
|
return $lc0 cmp $lc1;
|
|
}
|
|
|
|
sub lcB {
|
|
return lc($_[0]) cmp lc($_[1]);
|
|
}
|
|
|
|
my $x = "ab";
|
|
my $y = "aa";
|
|
my $z = "AB";
|
|
|
|
report_result($Locale, ++$locales_test_number,
|
|
lcA($x, $y) == 1 && lcB($x, $y) == 1 ||
|
|
lcA($x, $z) == 0 && lcB($x, $z) == 0);
|
|
}
|
|
else {
|
|
use locale ':not_characters';
|
|
|
|
sub lcC {
|
|
my $lc0 = lc $_[0];
|
|
my $lc1 = lc $_[1];
|
|
return $lc0 cmp $lc1;
|
|
}
|
|
|
|
sub lcD {
|
|
return lc($_[0]) cmp lc($_[1]);
|
|
}
|
|
|
|
my $x = "ab";
|
|
my $y = "aa";
|
|
my $z = "AB";
|
|
|
|
report_result($Locale, ++$locales_test_number,
|
|
lcC($x, $y) == 1 && lcD($x, $y) == 1 ||
|
|
lcC($x, $z) == 0 && lcD($x, $z) == 0);
|
|
}
|
|
$test_names{$locales_test_number} = 'Verify "lc(foo) cmp lc(bar)" is the same as using intermediaries for the cmp';
|
|
|
|
# Does lc of an UPPER (if different from the UPPER) match
|
|
# case-insensitively the UPPER, and does the UPPER match
|
|
# case-insensitively the lc of the UPPER. And vice versa.
|
|
{
|
|
use locale;
|
|
no utf8;
|
|
my $re = qr/[\[\(\{\*\+\?\|\^\$\\]/;
|
|
|
|
my @f = ();
|
|
++$locales_test_number;
|
|
$test_names{$locales_test_number} = 'Verify case insensitive matching works';
|
|
foreach my $x (sort { ord $a <=> ord $b } keys %UPPER) {
|
|
if (! $is_utf8_locale) {
|
|
my $y = lc $x;
|
|
next unless uc $y eq $x;
|
|
debug_more( "UPPER=", disp_chars(($x)),
|
|
"; lc=", disp_chars(($y)), "; ",
|
|
"; fc=", disp_chars((fc $x)), "; ",
|
|
disp_chars(($x)), "=~/", disp_chars(($y)), "/i=",
|
|
$x =~ /\Q$y/i ? 1 : 0,
|
|
"; ",
|
|
disp_chars(($y)), "=~/", disp_chars(($x)), "/i=",
|
|
$y =~ /\Q$x/i ? 1 : 0,
|
|
"\n");
|
|
#
|
|
# If $x and $y contain regular expression characters
|
|
# AND THEY lowercase (/i) to regular expression characters,
|
|
# regcomp() will be mightily confused. No, the \Q doesn't
|
|
# help here (maybe regex engine internal lowercasing
|
|
# is done after the \Q?) An example of this happening is
|
|
# the bg_BG (Bulgarian) locale under EBCDIC (OS/390 USS):
|
|
# the chr(173) (the "[") is the lowercase of the chr(235).
|
|
#
|
|
# Similarly losing EBCDIC locales include cs_cz, cs_CZ,
|
|
# el_gr, el_GR, en_us.IBM-037 (!), en_US.IBM-037 (!),
|
|
# et_ee, et_EE, hr_hr, hr_HR, hu_hu, hu_HU, lt_LT,
|
|
# mk_mk, mk_MK, nl_nl.IBM-037, nl_NL.IBM-037,
|
|
# pl_pl, pl_PL, ro_ro, ro_RO, ru_ru, ru_RU,
|
|
# sk_sk, sk_SK, sl_si, sl_SI, tr_tr, tr_TR.
|
|
#
|
|
# Similar things can happen even under (bastardised)
|
|
# non-EBCDIC locales: in many European countries before the
|
|
# advent of ISO 8859-x nationally customised versions of
|
|
# ISO 646 were devised, reusing certain punctuation
|
|
# characters for modified characters needed by the
|
|
# country/language. For example, the "|" might have
|
|
# stood for U+00F6 or LATIN SMALL LETTER O WITH DIAERESIS.
|
|
#
|
|
if ($x =~ $re || $y =~ $re) {
|
|
print "# Regex characters in '$x' or '$y', skipping test $locales_test_number for locale '$Locale'\n";
|
|
next;
|
|
}
|
|
push @f, $x unless $x =~ /\Q$y/i && $y =~ /\Q$x/i;
|
|
|
|
# fc is not a locale concept, so Perl uses lc for it.
|
|
push @f, $x unless lc $x eq fc $x;
|
|
}
|
|
else {
|
|
use locale ':not_characters';
|
|
my $y = lc $x;
|
|
next unless uc $y eq $x;
|
|
debug_more( "UPPER=", disp_chars(($x)),
|
|
"; lc=", disp_chars(($y)), "; ",
|
|
"; fc=", disp_chars((fc $x)), "; ",
|
|
disp_chars(($x)), "=~/", disp_chars(($y)), "/i=",
|
|
$x =~ /\Q$y/i ? 1 : 0,
|
|
"; ",
|
|
disp_chars(($y)), "=~/", disp_chars(($x)), "/i=",
|
|
$y =~ /\Q$x/i ? 1 : 0,
|
|
"\n");
|
|
|
|
push @f, $x unless $x =~ /\Q$y/i && $y =~ /\Q$x/i;
|
|
|
|
# The places where Unicode's lc is different from fc are
|
|
# skipped here by virtue of the 'next unless uc...' line above
|
|
push @f, $x unless lc $x eq fc $x;
|
|
}
|
|
}
|
|
|
|
foreach my $x (sort { ord $a <=> ord $b } keys %lower) {
|
|
if (! $is_utf8_locale) {
|
|
my $y = uc $x;
|
|
next unless lc $y eq $x;
|
|
debug_more( "lower=", disp_chars(($x)),
|
|
"; uc=", disp_chars(($y)), "; ",
|
|
"; fc=", disp_chars((fc $x)), "; ",
|
|
disp_chars(($x)), "=~/", disp_chars(($y)), "/i=",
|
|
$x =~ /\Q$y/i ? 1 : 0,
|
|
"; ",
|
|
disp_chars(($y)), "=~/", disp_chars(($x)), "/i=",
|
|
$y =~ /\Q$x/i ? 1 : 0,
|
|
"\n");
|
|
if ($x =~ $re || $y =~ $re) { # See above.
|
|
print "# Regex characters in '$x' or '$y', skipping test $locales_test_number for locale '$Locale'\n";
|
|
next;
|
|
}
|
|
push @f, $x unless $x =~ /\Q$y/i && $y =~ /\Q$x/i;
|
|
|
|
push @f, $x unless lc $x eq fc $x;
|
|
}
|
|
else {
|
|
use locale ':not_characters';
|
|
my $y = uc $x;
|
|
next unless lc $y eq $x;
|
|
debug_more( "lower=", disp_chars(($x)),
|
|
"; uc=", disp_chars(($y)), "; ",
|
|
"; fc=", disp_chars((fc $x)), "; ",
|
|
disp_chars(($x)), "=~/", disp_chars(($y)), "/i=",
|
|
$x =~ /\Q$y/i ? 1 : 0,
|
|
"; ",
|
|
disp_chars(($y)), "=~/", disp_chars(($x)), "/i=",
|
|
$y =~ /\Q$x/i ? 1 : 0,
|
|
"\n");
|
|
push @f, $x unless $x =~ /\Q$y/i && $y =~ /\Q$x/i;
|
|
|
|
push @f, $x unless lc $x eq fc $x;
|
|
}
|
|
}
|
|
report_multi_result($Locale, $locales_test_number, \@f);
|
|
$problematical_tests{$locales_test_number} = 1;
|
|
}
|
|
|
|
# [perl #109318]
|
|
{
|
|
my @f = ();
|
|
++$locales_test_number;
|
|
$test_names{$locales_test_number} = 'Verify atof with locale radix and negative exponent';
|
|
$problematical_tests{$locales_test_number} = 1;
|
|
|
|
my $radix = POSIX::localeconv()->{decimal_point};
|
|
my @nums = (
|
|
"3.14e+9", "3${radix}14e+9", "3.14e-9", "3${radix}14e-9",
|
|
"-3.14e+9", "-3${radix}14e+9", "-3.14e-9", "-3${radix}14e-9",
|
|
);
|
|
|
|
if (! $is_utf8_locale) {
|
|
use locale;
|
|
for my $num (@nums) {
|
|
push @f, $num
|
|
unless sprintf("%g", $num) =~ /3.+14/;
|
|
}
|
|
}
|
|
else {
|
|
use locale ':not_characters';
|
|
for my $num (@nums) {
|
|
push @f, $num
|
|
unless sprintf("%g", $num) =~ /3.+14/;
|
|
}
|
|
}
|
|
|
|
report_result($Locale, $locales_test_number, @f == 0);
|
|
if (@f) {
|
|
print "# failed $locales_test_number locale '$Locale' numbers @f\n"
|
|
}
|
|
}
|
|
}
|
|
|
|
my $final_locales_test_number = $locales_test_number;
|
|
|
|
# Recount the errors.
|
|
|
|
TEST_NUM:
|
|
foreach $test_num ($first_locales_test_number..$final_locales_test_number) {
|
|
my $has_non_global_failure = $Problem{$test_num}
|
|
|| ! defined $Okay{$test_num}
|
|
|| ! @{$Okay{$test_num}};
|
|
print "not " if %setlocale_failed || $has_non_global_failure;
|
|
print "ok $test_num";
|
|
$test_names{$test_num} = "" unless defined $test_names{$test_num};
|
|
|
|
# If TODO is in the test name, make it thus
|
|
my $todo = $test_names{$test_num} =~ s/\s*TODO\s*//;
|
|
print " $test_names{$test_num}";
|
|
if ($todo) {
|
|
print " # TODO\n";
|
|
}
|
|
elsif (%setlocale_failed || ! $has_non_global_failure) {
|
|
print "\n";
|
|
}
|
|
elsif ($has_non_global_failure) {
|
|
|
|
# If there are any locales that pass this test, or are known-bad, it
|
|
# may be that there are enough passes that we TODO the failure, but
|
|
# only for tests that we have decided can be problematical.
|
|
if ( ($Okay{$test_num} || $Known_bad_locale{$test_num})
|
|
&& grep { $_ == $test_num } keys %problematical_tests)
|
|
{
|
|
# Don't count the known-bad failures when calculating the
|
|
# percentage that fail.
|
|
my $known_failures = (exists $Known_bad_locale{$test_num})
|
|
? scalar(keys $Known_bad_locale{$test_num}->%*)
|
|
: 0;
|
|
my $adjusted_failures = scalar(keys $Problem{$test_num}->%*)
|
|
- $known_failures;
|
|
|
|
# Specially handle failures where only known-bad locales fail.
|
|
# This makes the diagnositics clearer.
|
|
if ($adjusted_failures <= 0) {
|
|
print " # TODO fails only on known bad locales: ",
|
|
join " ", keys $Known_bad_locale{$test_num}->%*, "\n";
|
|
next TEST_NUM;
|
|
}
|
|
|
|
# Round to nearest .1%
|
|
my $percent_fail = (int(.5 + (1000 * $adjusted_failures
|
|
/ scalar(@Locale))))
|
|
/ 10;
|
|
$todo = $percent_fail < $acceptable_failure_percentage;
|
|
print " # TODO" if $todo;
|
|
print "\n";
|
|
|
|
if ($debug) {
|
|
print "# $percent_fail% of locales (",
|
|
scalar(keys $Problem{$test_num}->%*),
|
|
" of ",
|
|
scalar(@Locale),
|
|
") fail the above test (TODO cut-off is ",
|
|
$acceptable_failure_percentage,
|
|
"%)\n";
|
|
}
|
|
elsif ($todo) {
|
|
print "# ", 100 - $percent_fail, "% of locales not known to be problematic on this platform\n";
|
|
print "# pass the above test, so it is likely that the failures\n";
|
|
print "# are errors in the locale definitions. The test is marked TODO, as the\n";
|
|
print "# problem is not likely to be Perl's\n";
|
|
}
|
|
}
|
|
|
|
if ($debug) {
|
|
print "# The code points that had this failure are given above. Look for lines\n";
|
|
print "# that match 'failed $test_num'\n";
|
|
}
|
|
else {
|
|
print "# For more details, rerun, with environment variable PERL_DEBUG_FULL_TEST=1.\n";
|
|
print "# Then look at that output for lines that match 'failed $test_num'\n";
|
|
}
|
|
if (defined $not_necessarily_a_problem_test_number
|
|
&& $test_num == $not_necessarily_a_problem_test_number)
|
|
{
|
|
print "# The failure of test $not_necessarily_a_problem_test_number is not necessarily fatal.\n";
|
|
print "# It usually indicates a problem in the environment,\n";
|
|
print "# not in Perl itself.\n";
|
|
}
|
|
}
|
|
}
|
|
|
|
$test_num = $final_locales_test_number;
|
|
|
|
if ( ! defined $Config{d_setlocale_accepts_any_locale_name}) {
|
|
# perl #115808
|
|
use warnings;
|
|
my $warned = 0;
|
|
local $SIG{__WARN__} = sub {
|
|
$warned = $_[0] =~ /uninitialized/;
|
|
};
|
|
my $z = "y" . setlocale(&POSIX::LC_ALL, "xyzzy");
|
|
ok($warned, "variable set to setlocale(\"invalid locale name\") is considered uninitialized");
|
|
}
|
|
|
|
# Test that tainting and case changing works on utf8 strings. These tests are
|
|
# placed last to avoid disturbing the hard-coded test numbers that existed at
|
|
# the time these were added above this in this file.
|
|
# This also tests that locale overrides unicode_strings in the same scope for
|
|
# non-utf8 strings.
|
|
setlocale(&POSIX::LC_ALL, "C");
|
|
{
|
|
use locale;
|
|
use feature 'unicode_strings';
|
|
|
|
foreach my $function ("uc", "ucfirst", "lc", "lcfirst", "fc") {
|
|
my @list; # List of code points to test for $function
|
|
|
|
# Used to calculate the changed case for ASCII characters by using the
|
|
# ord, instead of using one of the functions under test.
|
|
my $ascii_case_change_delta;
|
|
my $above_latin1_case_change_delta; # Same for the specific ords > 255
|
|
# that we use
|
|
|
|
# We test an ASCII character, which should change case;
|
|
# a Latin1 character, which shouldn't change case under this C locale,
|
|
# an above-Latin1 character that when the case is changed would cross
|
|
# the 255/256 boundary, so doesn't change case
|
|
# (the \x{149} is one of these, but changes into 2 characters, the
|
|
# first one of which doesn't cross the boundary.
|
|
# the final one in each list is an above-Latin1 character whose case
|
|
# does change. The code below uses its position in its list as a
|
|
# marker to indicate that it, unlike the other code points above
|
|
# ASCII, has a successful case change
|
|
#
|
|
# All casing operations under locale (but not :not_characters) should
|
|
# taint
|
|
if ($function =~ /^u/) {
|
|
@list = ("", "a",
|
|
chr(utf8::unicode_to_native(0xe0)),
|
|
chr(utf8::unicode_to_native(0xff)),
|
|
"\x{fb00}", "\x{149}", "\x{101}");
|
|
$ascii_case_change_delta = ($is_ebcdic) ? +64 : -32;
|
|
$above_latin1_case_change_delta = -1;
|
|
}
|
|
else {
|
|
@list = ("", "A",
|
|
chr(utf8::unicode_to_native(0xC0)),
|
|
"\x{17F}", "\x{100}");
|
|
$ascii_case_change_delta = ($is_ebcdic) ? -64 : +32;
|
|
$above_latin1_case_change_delta = +1;
|
|
}
|
|
foreach my $is_utf8_locale (0 .. 1) {
|
|
foreach my $j (0 .. $#list) {
|
|
my $char = $list[$j];
|
|
|
|
for my $encoded_in_utf8 (0 .. 1) {
|
|
my $should_be;
|
|
my $changed;
|
|
if (! $is_utf8_locale) {
|
|
no warnings 'locale';
|
|
$should_be = ($j == $#list)
|
|
? chr(ord($char) + $above_latin1_case_change_delta)
|
|
: (length $char == 0 || utf8::native_to_unicode(ord($char)) > 127)
|
|
? $char
|
|
: chr(ord($char) + $ascii_case_change_delta);
|
|
|
|
# This monstrosity is in order to avoid using an eval,
|
|
# which might perturb the results
|
|
$changed = ($function eq "uc")
|
|
? uc($char)
|
|
: ($function eq "ucfirst")
|
|
? ucfirst($char)
|
|
: ($function eq "lc")
|
|
? lc($char)
|
|
: ($function eq "lcfirst")
|
|
? lcfirst($char)
|
|
: ($function eq "fc")
|
|
? fc($char)
|
|
: die("Unexpected function \"$function\"");
|
|
}
|
|
else {
|
|
{
|
|
no locale;
|
|
|
|
# For utf8-locales the case changing functions
|
|
# should work just like they do outside of locale.
|
|
# Can use eval here because not testing it when
|
|
# not in locale.
|
|
$should_be = eval "$function('$char')";
|
|
die "Unexpected eval error $@ from 'eval \"$function('$char')\"'" if $@;
|
|
|
|
}
|
|
use locale ':not_characters';
|
|
$changed = ($function eq "uc")
|
|
? uc($char)
|
|
: ($function eq "ucfirst")
|
|
? ucfirst($char)
|
|
: ($function eq "lc")
|
|
? lc($char)
|
|
: ($function eq "lcfirst")
|
|
? lcfirst($char)
|
|
: ($function eq "fc")
|
|
? fc($char)
|
|
: die("Unexpected function \"$function\"");
|
|
}
|
|
ok($changed eq $should_be,
|
|
"$function(\"$char\") in C locale "
|
|
. (($is_utf8_locale)
|
|
? "(use locale ':not_characters'"
|
|
: "(use locale")
|
|
. (($encoded_in_utf8)
|
|
? "; encoded in utf8)"
|
|
: "; not encoded in utf8)")
|
|
. " should be \"$should_be\", got \"$changed\"");
|
|
|
|
# Tainting shouldn't happen for use locale :not_character
|
|
# (a utf8 locale)
|
|
(! $is_utf8_locale)
|
|
? check_taint($changed)
|
|
: check_taint_not($changed);
|
|
|
|
# Use UTF-8 next time through the loop
|
|
utf8::upgrade($char);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
# Give final advice.
|
|
|
|
my $didwarn = 0;
|
|
|
|
foreach ($first_locales_test_number..$final_locales_test_number) {
|
|
if ($Problem{$_}) {
|
|
my @f = sort keys %{ $Problem{$_} };
|
|
|
|
# Don't list the failures caused by known-bad locales.
|
|
if (exists $known_bad_locales{$os}) {
|
|
@f = grep { $_ !~ $known_bad_locales{$os} } @f;
|
|
next unless @f;
|
|
}
|
|
my $f = join(" ", @f);
|
|
$f =~ s/(.{50,60}) /$1\n#\t/g;
|
|
print
|
|
"#\n",
|
|
"# The locale ", (@f == 1 ? "definition" : "definitions"), "\n#\n",
|
|
"#\t", $f, "\n#\n",
|
|
"# on your system may have errors because the locale test $_\n",
|
|
"# \"$test_names{$_}\"\n",
|
|
"# failed in ", (@f == 1 ? "that locale" : "those locales"),
|
|
".\n";
|
|
print <<EOW;
|
|
#
|
|
# If your users are not using these locales you are safe for the moment,
|
|
# but please report this failure first to perlbug\@perl.org using the
|
|
# perlbug script (as described in the INSTALL file) so that the exact
|
|
# details of the failures can be sorted out first and then your operating
|
|
# system supplier can be alerted about these anomalies.
|
|
#
|
|
EOW
|
|
$didwarn = 1;
|
|
}
|
|
}
|
|
|
|
# Tell which locales were okay and which were not.
|
|
|
|
if ($didwarn) {
|
|
my (@s, @F);
|
|
|
|
foreach my $l (@Locale) {
|
|
my $p = 0;
|
|
if ($setlocale_failed{$l}) {
|
|
$p++;
|
|
}
|
|
else {
|
|
foreach my $t
|
|
($first_locales_test_number..$final_locales_test_number)
|
|
{
|
|
$p++ if $Problem{$t}{$l};
|
|
}
|
|
}
|
|
push @s, $l if $p == 0;
|
|
push @F, $l unless $p == 0;
|
|
}
|
|
|
|
if (@s) {
|
|
my $s = join(" ", @s);
|
|
$s =~ s/(.{50,60}) /$1\n#\t/g;
|
|
|
|
print
|
|
"# The following locales\n#\n",
|
|
"#\t", $s, "\n#\n",
|
|
"# tested okay.\n#\n",
|
|
} else {
|
|
print "# None of your locales were fully okay.\n";
|
|
}
|
|
|
|
if (@F) {
|
|
my $F = join(" ", @F);
|
|
$F =~ s/(.{50,60}) /$1\n#\t/g;
|
|
|
|
my $details = "";
|
|
unless ($debug) {
|
|
$details = "# For more details, rerun, with environment variable PERL_DEBUG_FULL_TEST=1.\n";
|
|
}
|
|
elsif ($debug == 1) {
|
|
$details = "# For even more details, rerun, with environment variable PERL_DEBUG_FULL_TEST=2.\n";
|
|
}
|
|
|
|
print
|
|
"# The following locales\n#\n",
|
|
"#\t", $F, "\n#\n",
|
|
"# had problems.\n#\n",
|
|
$details;
|
|
} else {
|
|
print "# None of your locales were broken.\n";
|
|
}
|
|
}
|
|
|
|
if (exists $known_bad_locales{$os} && ! %Known_bad_locale) {
|
|
$test_num++;
|
|
print "ok $test_num $^O no longer has known bad locales # TODO\n";
|
|
}
|
|
|
|
print "1..$test_num\n";
|
|
|
|
# eof
|