php-internal-docs 8.4.8
Unofficial docs for php/php-src
Loading...
Searching...
No Matches
uctest.php
Go to the documentation of this file.
1#!/usr/bin/env php
3
4$dir = __DIR__;
5$unicodeDataFile = $dir . '/UnicodeData.txt';
6$caseFoldingFile = $dir . '/CaseFolding.txt';
7$specialCasingFile = $dir . '/SpecialCasing.txt';
8
10foreach ($files as $file) {
11 if (!file_exists($file)) {
12 echo "File $file does not exist.\n";
13 return;
14 }
15}
16
20
21function parseDataFile(string $input) {
22 $lines = explode("\n", $input);
23 foreach ($lines as $line) {
24 // Strip comments
25 if (false !== $hashPos = strpos($line, '#')) {
26 $line = substr($line, 0, $hashPos);
27 }
28
29 // Skip empty lines
30 $line = trim($line);
31 if ($line === '') {
32 continue;
33 }
34
35 $fields = array_map('trim', explode(';', $line));
36 yield $fields;
37 }
38}
39
40function parseCodes(string $strCodes) : array {
41 $codes = [];
42 foreach (explode(' ', $strCodes) as $strCode) {
43 $codes[] = intval($strCode, 16);
44 }
45 return $codes;
46}
47
48function testCaseMap($type, int $origCode, array $newCodes) {
49 $origChar = mb_chr($origCode);
50 $newStr = "";
51 foreach ($newCodes as $newCode) {
52 $newStr .= mb_chr($newCode);
53 }
54
55 $mbNewStr = mb_convert_case($origChar, $type);
56 if ($mbNewStr !== $newStr) {
57 echo "$type: $mbNewStr != $newStr\n";
58 }
59}
60
61function testSimpleCaseMap($type, int $origCode, int $newCode) {
62 if ($newCode) {
63 testCaseMap($type, $origCode, [$newCode]);
64 } else {
65 testCaseMap($type, $origCode, [$origCode]);
66 }
67}
68
69function testUnicodeData(string $input) {
70 $uppers = [];
71 $folds = [];
72
73 foreach (parseDataFile($input) as $fields) {
74 assert(count($fields) == 15);
75
76 $code = intval($fields[0], 16);
77 $upperCase = intval($fields[12], 16);
78 $lowerCase = intval($fields[13], 16);
79 $titleCase = intval($fields[14], 16);
80 testSimpleCaseMap(MB_CASE_UPPER_SIMPLE, $code, $upperCase);
81 testSimpleCaseMap(MB_CASE_LOWER_SIMPLE, $code, $lowerCase);
82
83 // Unfortunately MB_CASE_TITLE does not actually return the title case, even when passed
84 // only a single character. It does ad-hoc magic based on the character class, so that
85 // certain characters, such as roman numerals or circled characters will not be
86 // title-cased.
87 //testSimpleCaseMap(MB_CASE_TITLE_SIMPLE, $code, $titleCase ?: $upperCase);
88
89 $chr = mb_chr($code);
90 $upper = mb_strtoupper($chr);
91 $uppers[$upper][] = $chr;
92 $fold = mb_convert_case($chr, 3);
93 $folds[$fold][] = $chr;
94 }
95}
96
97function testCaseFolding(string $input) {
98 foreach (parseDataFile($input) as $fields) {
99 assert(count($fields) == 4);
100
101 $code = intval($fields[0], 16);
102 $status = $fields[1];
103 if ($status == 'C' || $status == 'S') {
104 $foldCode = intval($fields[2], 16);
105 testSimpleCaseMap(MB_CASE_FOLD_SIMPLE, $code, $foldCode);
106 } else if ($status == 'F') {
107 $foldCodes = parseCodes($fields[2]);
108 testCaseMap(MB_CASE_FOLD, $code, $foldCodes);
109 }
110 }
111}
112
113function testSpecialCasing(string $input) {
114 foreach (parseDataFile($input) as $fields) {
115 assert(count($fields) >= 5);
116
117 $code = intval($fields[0], 16);
118 $lower = parseCodes($fields[1]);
119 $title = parseCodes($fields[2]);
120 $upper = parseCodes($fields[3]);
121
122 $cond = $fields[4];
123 if ($cond) {
124 // We don't support conditional mappings
125 continue;
126 }
127
128 testCaseMap(MB_CASE_LOWER, $code, $lower);
129 testCaseMap(MB_CASE_UPPER, $code, $upper);
130 testCaseMap(MB_CASE_TITLE, $code, $title);
131 }
132}
trim(string $string, string $characters=" \n\r\t\v\0")
file_exists(string $filename)
file_get_contents(string $filename, bool $use_include_path=false, $context=null, int $offset=0, ?int $length=null)
explode(string $separator, string $string, int $limit=PHP_INT_MAX)
intval(mixed $value, int $base=10)
strpos(string $haystack, string $needle, int $offset=0)
array_map(?callable $callback, array $array, array ... $arrays)
count(Countable|array $value, int $mode=COUNT_NORMAL)
assert(mixed $assertion, Throwable|string|null $description=null)
substr(string $string, int $offset, ?int $length=null)
$file
Definition encryption.php:9
const MB_CASE_LOWER
mb_strtoupper(string $string, ?string $encoding=null)
mb_convert_case(string $string, int $mode, ?string $encoding=null)
const MB_CASE_FOLD_SIMPLE
const MB_CASE_FOLD
mb_chr(int $codepoint, ?string $encoding=null)
const MB_CASE_TITLE
const MB_CASE_UPPER_SIMPLE
const MB_CASE_LOWER_SIMPLE
const MB_CASE_UPPER
$specialCasingFile
Definition ucgendat.php:55
if($argc< 2) $dir
Definition ucgendat.php:52
$unicodeDataFile
Definition ucgendat.php:53
$files
Definition ucgendat.php:59
$caseFoldingFile
Definition ucgendat.php:54
testSpecialCasing(string $input)
Definition uctest.php:113
testSimpleCaseMap($type, int $origCode, int $newCode)
Definition uctest.php:61
parseCodes(string $strCodes)
Definition uctest.php:40
testUnicodeData(string $input)
Definition uctest.php:69
testCaseFolding(string $input)
Definition uctest.php:97
testCaseMap($type, int $origCode, array $newCodes)
Definition uctest.php:48
parseDataFile(string $input)
Definition uctest.php:21
error_reporting(?int $error_level=null)
#define E_ALL
Definition zend_errors.h:43