php-internal-docs 8.4.8
Unofficial docs for php/php-src
Loading...
Searching...
No Matches
generate_name_perfect_hash_table.php
Go to the documentation of this file.
1<?php
2
3function read_encoding_pointer_array(): array {
4 // read the encoding pointer array
5 $file_content = file_get_contents(__DIR__ . '/mbfl_encoding.c');
6 $pattern = '/static const mbfl_encoding \*mbfl_encoding_ptr_list\[\][\s\S]*?\{([^}]*)\};/';
7 preg_match($pattern, $file_content, $matches);
8 assert(isset($matches[1]));
9 $array = explode(",\n", $matches[1]);
10 $array = array_map(function ($item) {
11 return trim($item, "&\n\t ");
12 }, $array);
13 array_pop($array); // Remove NULL
14 return $array;
15}
16
17function search_struct_in_file(array &$result, $struct_names, $file_path)
18{
19 $fileContent = file_get_contents($file_path);
20 $pattern = '/const mbfl_encoding\s+(' . implode('|', $struct_names) . ')\s* = {([^}]*)}/';
21 preg_match_all($pattern, $fileContent, $matches, PREG_SET_ORDER);
22 foreach ($matches as $match) {
23 $current_struct_name = $match[1];
24 $struct_definition = $match[2];
25 // Note: name is the second file
26 $name = explode(',', $struct_definition)[1];
27 $result[$current_struct_name] = trim($name, " \n\t\"");
28 }
29}
30
31function search_struct_in_dir($struct_names): array
32{
33 $result = [];
34 foreach (glob(__DIR__ . "/../**/*.c") as $file) {
35 search_struct_in_file($result, $struct_names, $file);
36 }
37 return $result;
38}
39
42
43// The single byte encodings are generated and cannot be found in dedicated generated structs
45 'mbfl_encoding_cp1251' => 'Windows-1251',
46 'mbfl_encoding_cp1252' => 'Windows-1252',
47 'mbfl_encoding_cp1254' => 'Windows-1254',
48 'mbfl_encoding_8859_1' => 'ISO-8859-1',
49 'mbfl_encoding_8859_2' => 'ISO-8859-2',
50 'mbfl_encoding_8859_3' => 'ISO-8859-3',
51 'mbfl_encoding_8859_4' => 'ISO-8859-4',
52 'mbfl_encoding_8859_5' => 'ISO-8859-5',
53 'mbfl_encoding_8859_6' => 'ISO-8859-6',
54 'mbfl_encoding_8859_7' => 'ISO-8859-7',
55 'mbfl_encoding_8859_8' => 'ISO-8859-8',
56 'mbfl_encoding_8859_9' => 'ISO-8859-9',
57 'mbfl_encoding_8859_10' => 'ISO-8859-10',
58 'mbfl_encoding_8859_13' => 'ISO-8859-13',
59 'mbfl_encoding_8859_14' => 'ISO-8859-14',
60 'mbfl_encoding_8859_15' => 'ISO-8859-15',
61 'mbfl_encoding_8859_16' => 'ISO-8859-16',
62 'mbfl_encoding_cp866' => 'CP866',
63 'mbfl_encoding_cp850' => 'CP850',
64 'mbfl_encoding_koi8r' => 'KOI8-R',
65 'mbfl_encoding_koi8u' => 'KOI8-U',
66 'mbfl_encoding_armscii8' => 'ArmSCII-8',
67 'mbfl_encoding_ascii' => 'ASCII',
68];
69
70// Add the fixed encodings
71foreach ($fixed_encodings as $encoding_pointer => $encoding_name) {
72 $encoding_pointer_array_name_mapping[$encoding_pointer] = $encoding_name;
73}
74
75// Consistency check: all of the encoding pointer array entries should be found
76foreach ($encoding_pointer_array as $encoding_pointer) {
77 assert(isset($encoding_pointer_array_name_mapping[$encoding_pointer]), "Missing entry for $encoding_pointer");
78}
79
81 return $encoding_pointer_array_name_mapping[$encoding_pointer];
83
84// Write out ordered name list, and invoke gperf for computing the perfect hash table
85file_put_contents(__DIR__ . '/encodings.txt', implode("\n", $ordered_name_list));
86ob_start();
87passthru('gperf ' . escapeshellarg(__DIR__ . '/encodings.txt') . ' --readonly-tables --null-strings --ignore-case -m 1000');
89@unlink(__DIR__ . '/encodings.txt');
90
91// Find asso_values array in $output
92$pattern = '/static const unsigned char asso_values\[\] =([^}]*)\};/';
93preg_match($pattern, $output, $matches);
94assert(isset($matches[1]));
95$asso_values = trim($matches[1], "\t \n{");
96echo "===--- Copy and paste the following values in the asso_values array in mbfl_encoding.c ---===\n";
97echo $asso_values, "\n";
98
99// Find word_list array in $output
100$pattern = '/static const char \* const wordlist\[\] =([^}]*)\};/';
101preg_match($pattern, $output, $matches);
102assert(isset($matches[1]));
103$word_list = trim($matches[1], "\t \n{");
104$word_list = str_replace('(char*)0', '-1', $word_list);
105foreach ($encoding_pointer_array_name_mapping as $key => $value)
106{
108 $word_list = str_replace("\"$value\"", $index, $word_list);
109}
110
111echo "===--- Copy and paste the following values in the mbfl_encoding_ptr_list_after_hashing array in mbfl_encoding.c ---===\n";
112echo $word_list, "\n";
unlink(string $filename, $context=null)
implode(string|array $separator, ?array $array=null)
trim(string $string, string $characters=" \n\r\t\v\0")
file_get_contents(string $filename, bool $use_include_path=false, $context=null, int $offset=0, ?int $length=null)
explode(string $separator, string $string, int $limit=PHP_INT_MAX)
str_replace(array|string $search, array|string $replace, string|array $subject, &$count=null)
passthru(string $command, &$result_code=null)
file_put_contents(string $filename, mixed $data, int $flags=0, $context=null)
array_map(?callable $callback, array $array, array ... $arrays)
array_search(mixed $needle, array $haystack, bool $strict=false)
glob(string $pattern, int $flags=0)
array_pop(array &$array)
assert(mixed $assertion, Throwable|string|null $description=null)
ob_start($callback=null, int $chunk_size=0, int $flags=PHP_OUTPUT_HANDLER_STDFLAGS)
escapeshellarg(string $arg)
$file
Definition encryption.php:9
foreach(explode("\n", $input) as $line) $result
foreach( $fixed_encodings as $encoding_pointer=> $encoding_name) foreach($encoding_pointer_array as $encoding_pointer) $ordered_name_list
search_struct_in_dir($struct_names)
search_struct_in_file(array &$result, $struct_names, $file_path)
#define PREG_SET_ORDER
Definition php_pcre.c:26
preg_match_all(string $pattern, string $subject, &$matches=null, int $flags=0, int $offset=0)
preg_match(string $pattern, string $subject, &$matches=null, int $flags=0, int $offset=0)
$array
Definition test.php:58