php-internal-docs 8.4.8
Unofficial docs for php/php-src
Loading...
Searching...
No Matches
html_table_gen.php
Go to the documentation of this file.
1#!/usr/bin/env php
2<?php
3/*
4 +----------------------------------------------------------------------+
5 | Copyright (c) The PHP Group |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | https://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
15 | Authors: Gustavo Lopes <cataphract@php.net> |
16 +----------------------------------------------------------------------+
17*/
18
19/* This file prints to stdout the contents of ext/standard/html_tables.h */
20/* put together with glue; have patience */
21
22$t = <<<CODE
23/*
24 +----------------------------------------------------------------------+
25 | Copyright (c) The PHP Group |
26 +----------------------------------------------------------------------+
27 | This source file is subject to version 3.01 of the PHP license, |
28 | that is bundled with this package in the file LICENSE, and is |
29 | available through the world-wide-web at the following url: |
30 | https://www.php.net/license/3_01.txt |
31 | If you did not receive a copy of the PHP license and are unable to |
32 | obtain it through the world-wide-web, please send a note to |
33 | license@php.net so we can mail you a copy immediately. |
34 +----------------------------------------------------------------------+
35*/
36
37#ifndef HTML_TABLES_H
38#define HTML_TABLES_H
39
40/**************************************************************************
41***************************************************************************
42** THIS FILE IS AUTOMATICALLY GENERATED. DO NOT MODIFY IT. **
43***************************************************************************
44** Please change html_tables/html_table_gen.php instead and then **
45** run it in order to generate this file **
46***************************************************************************
47**************************************************************************/
48
52 cs_numelems /* used to count the number of charsets */
53 };
54#define CHARSET_UNICODE_COMPAT(cs) ((cs) <= cs_8859_1)
55#define CHARSET_SINGLE_BYTE(cs) ((cs) > cs_utf_8 && (cs) < cs_big5)
56#define CHARSET_PARTIAL_SUPPORT(cs) ((cs) >= cs_big5)
57
58static const struct {
59 const char *codeset;
60 uint32_t codeset_len;
62} charset_map[] = {
63 { "ISO-8859-1", sizeof("ISO-8859-1")-1, cs_8859_1 },
64 { "ISO8859-1", sizeof("ISO8859-1")-1, cs_8859_1 },
65 { "ISO-8859-15", sizeof("ISO-8859-15")-1, cs_8859_15 },
66 { "ISO8859-15", sizeof("ISO8859-15")-1, cs_8859_15 },
67 { "utf-8", sizeof("utf-8")-1, cs_utf_8 },
68 { "cp1252", sizeof("cp1252")-1, cs_cp1252 },
69 { "Windows-1252", sizeof("Windows-1252")-1, cs_cp1252 },
70 { "1252", sizeof("1252")-1, cs_cp1252 },
71 { "BIG5", sizeof("BIG5")-1, cs_big5 },
72 { "950", sizeof("950")-1, cs_big5 },
73 { "GB2312", sizeof("GB2312")-1, cs_gb2312 },
74 { "936", sizeof("936")-1, cs_gb2312 },
75 { "BIG5-HKSCS", sizeof("BIG5-HKSCS")-1, cs_big5hkscs },
76 { "Shift_JIS", sizeof("Shift_JIS")-1, cs_sjis },
77 { "SJIS", sizeof("SJIS")-1, cs_sjis },
78 { "932", sizeof("932")-1, cs_sjis },
79 { "SJIS-win", sizeof("SJIS-win")-1, cs_sjis },
80 { "CP932", sizeof("CP932")-1, cs_sjis },
81 { "EUCJP", sizeof("EUCJP")-1, cs_eucjp },
82 { "EUC-JP", sizeof("EUC-JP")-1, cs_eucjp },
83 { "eucJP-win", sizeof("eucJP-win")-1, cs_eucjp },
84 { "KOI8-R", sizeof("KOI8-R")-1, cs_koi8r },
85 { "koi8-ru", sizeof("koi8-ru")-1, cs_koi8r },
86 { "koi8r", sizeof("koi8r")-1, cs_koi8r },
87 { "cp1251", sizeof("cp1251")-1, cs_cp1251 },
88 { "Windows-1251", sizeof("Windows-1251")-1, cs_cp1251 },
89 { "win-1251", sizeof("win-1251")-1, cs_cp1251 },
90 { "iso8859-5", sizeof("iso8859-5")-1, cs_8859_5 },
91 { "iso-8859-5", sizeof("iso-8859-5")-1, cs_8859_5 },
92 { "cp866", sizeof("cp866")-1, cs_cp866 },
93 { "866", sizeof("866")-1, cs_cp866 },
94 { "ibm866", sizeof("ibm866")-1, cs_cp866 },
95 { "MacRoman", sizeof("MacRoman")-1, cs_macroman }
96};
97
98/* longest entity name length excluding & and ; */
99#define LONGEST_ENTITY_LENGTH 31
100
101/* Definitions for mappings *to* Unicode.
102 * The origin charset must have at most 256 code points.
103 * The multi-byte encodings are not supported */
104typedef struct {
105 unsigned short uni_cp[64];
107
108typedef struct {
109 const enc_to_uni_stage2 *inner[4];
110} enc_to_uni;
111
112/* bits 7-8 bits (only single bytes encodings supported )*/
113#define ENT_ENC_TO_UNI_STAGE1(k) ((k & 0xC0) >> 6)
114/* bits 1-6 */
115#define ENT_ENC_TO_UNI_STAGE2(k) ((k) & 0x3F)
116
117
118CODE;
119
120echo $t;
121
123 array(
124 "ident" => "iso88591",
125 "enumid" => 1,
126 "name" => "ISO-8859-1",
127 "file" => "mappings/8859-1.TXT",
128 ),
129 array(
130 "ident" => "iso88595",
131 "enumid" => 5,
132 "name" => "ISO-8859-5",
133 "file" => "mappings/8859-5.TXT",
134 ),
135 array(
136 "ident" => "iso885915",
137 "enumid" => 3,
138 "name" => "ISO-8859-15",
139 "file" => "mappings/8859-15.TXT",
140 ),
141 array(
142 "ident" => "win1252",
143 "enumid" => 2,
144 "enumident" => "cp1252",
145 "name" => "Windows-1252",
146 "file" => "mappings/CP1252.TXT",
147 ),
148 array(
149 "ident" => "win1251",
150 "enumid" => 4,
151 "enumident" => "cp1252",
152 "name" => "Windows-1251",
153 "file" => "mappings/CP1251.TXT",
154 ),
155 array(
156 "ident" => "koi8r",
157 "enumid" => 8,
158 "name" => "KOI8-R",
159 "file" => "mappings/KOI8-R.TXT",
160 ),
161 array(
162 "ident" => "cp866",
163 "enumid" => 6,
164 "name" => "CP-866",
165 "file" => "mappings/CP866.TXT",
166 ),
167 array(
168 "ident" => "macroman",
169 "enumid" => 7,
170 "name" => "MacRoman",
171 "file" => "mappings/ROMAN.TXT",
172 ),
173);
174
175$prevStage2 = array();
176
177foreach ($encodings as $e) {
178 echo
179"/* {{{ Mappings *to* Unicode for {$e['name']} */\n\n";
180
181 /* process file */
182 $map = array();
183 $lines = explode("\n", file_get_contents($e{'file'}));
184 foreach ($lines as $l) {
185 if (preg_match("/^0x([0-9A-Z]{2})\t0x([0-9A-Z]{2,})/i", $l, $matches))
186 $map[] = array($matches[1], $matches[2]);
187 }
188
189 $mappy = array();
190 foreach ($map as $v) { $mappy[hexdec($v[0])] = hexdec($v[1]); }
191
192 $mstable = array("ident" => $e['ident']);
193 /* calculate two-stage tables */
194 for ($i = 0; $i < 4; $i++) {
195 for ($j = 0; $j < 64; $j++) {
196 $cp = $i << 6 | $j;
197 $mstable[$i][$j] = isset($mappy[$cp]) ? $mappy[$cp] : NULL;
198 }
199 }
200
201 echo
202"/* {{{ Stage 2 tables for {$e['name']} */\n\n";
203
204 $s2tables_idents = array();
205 for ($i = 0; $i < 4; $i++) {
206 if (($t = array_keys($prevStage2, $mstable[$i])) !== array()) {
207 $s2tables_idents[$i] = $encodings[$t[0]/5]["ident"];
208 continue;
209 }
210
211 $s2tables_idents[$i] = $e["ident"];
212
213 echo "static const enc_to_uni_stage2 enc_to_uni_s2_{$e['ident']}_".
214 sprintf("%02X", $i << 6)." = { {\n";
215 for ($j = 0; $j < 64; $j++) {
216 if ($j == 0) echo "\t";
217 elseif ($j % 6 == 0) echo "\n\t";
218 else echo " ";
219 if ($mstable[$i][$j] !== NULL)
220 echo sprintf("0x%04X,", $mstable[$i][$j]);
221 else
222 echo "0xFFFF,"; /* special value; indicates no mapping */
223 }
224 echo "\n} };\n\n";
225
226 $prevStage2[] = $mstable[$i];
227 }
228
229 echo
230"/* end of stage 2 tables for {$e['name']} }}} */\n\n";
231
232 echo
233"/* {{{ Stage 1 table for {$e['name']} */\n";
234
235 echo
236"static const enc_to_uni enc_to_uni_{$e['ident']} = { {
237\t&enc_to_uni_s2_{$s2tables_idents[0]}_00,
238\t&enc_to_uni_s2_{$s2tables_idents[1]}_40,
239\t&enc_to_uni_s2_{$s2tables_idents[2]}_80,
240\t&enc_to_uni_s2_{$s2tables_idents[3]}_C0 }
241};
242";
243
244 echo
245"/* end of stage 1 table for {$e['name']} }}} */\n\n";
246}
247
248$maxencnum = max(array_map(function($e) { return $e['enumid']; }, $encodings));
249$a = range(0, $maxencnum);
250foreach ($encodings as $e) { $a[$e['enumid']] = $e['ident']; }
251
252 echo
253"/* {{{ Index of tables for encoding conversion */
254static const enc_to_uni *const enc_to_uni_index[cs_numelems] = {\n";
255
256foreach ($a as $k => $v) {
257 if (is_numeric($v))
258 echo "\tNULL,\n";
259 else
260 echo "\t&enc_to_uni_$v,\n";
261}
262
263 echo
264"};
265/* }}} */\n";
266
267$t = <<<CODE
268
269/* Definitions for mappings *from* Unicode */
270
271typedef struct {
272 unsigned short un_code_point; /* we don't need bigger */
273 unsigned char cs_code; /* currently, we only have maps to single-byte encodings */
274} uni_to_enc;
275
276
277CODE;
278
279echo $t;
280
281$encodings = array(
282 array(
283 "ident" => "iso885915",
284 "name" => "ISO-8859-15",
285 "file" => "mappings/8859-15.TXT",
286 "range" => array(0xA4, 0xBE),
287 ),
288 array(
289 "ident" => "win1252",
290 "name" => "Windows-1252",
291 "file" => "mappings/CP1252.TXT",
292 "range" => array(0x80, 0x9F),
293 ),
294 array(
295 "ident" => "win1251",
296 "name" => "Windows-1251",
297 "file" => "mappings/CP1251.TXT",
298 "range" => array(0x80, 0xFF),
299 ),
300 array(
301 "ident" => "koi8r",
302 "name" => "KOI8-R",
303 "file" => "mappings/KOI8-R.TXT",
304 "range" => array(0x80, 0xFF),
305 ),
306 array(
307 "ident" => "cp866",
308 "name" => "CP-866",
309 "file" => "mappings/CP866.TXT",
310 "range" => array(0x80, 0xFF),
311 ),
312 array(
313 "ident" => "macroman",
314 "name" => "MacRoman",
315 "file" => "mappings/ROMAN.TXT",
316 "range" => array(0x80, 0xFF),
317 ),
318);
319
320foreach ($encodings as $e) {
321 echo
322"/* {{{ Mappings *from* Unicode for {$e['name']} */\n";
323
324 /* process file */
325 $map = array();
326 $lines = explode("\n", file_get_contents($e{'file'}));
327 foreach ($lines as $l) {
328 if (preg_match("/^0x([0-9A-Z]{2})\t0x([0-9A-Z]{2,})\s+#\s*(.*)$/i", $l, $matches))
329 $map[] = array($matches[1], $matches[2], rtrim($matches[3]));
330 }
331
332 $mappy = array();
333 foreach ($map as $v) {
334 if (hexdec($v[0]) >= $e['range'][0] && hexdec($v[0]) <= $e['range'][1])
335 $mappy[hexdec($v[1])] = array(hexdec($v[0]), strtolower($v[2]));
336 }
337 ksort($mappy);
338
339 echo
340"static const uni_to_enc unimap_{$e['ident']}[] = {\n";
341
342 foreach ($mappy as $k => $v) {
343 echo "\t{ ", sprintf("0x%04X", $k), ", ", sprintf("0x%02X", $v[0]), " },\t/* ",
344 $v[1], " */\n";
345 }
346 echo "};\n";
347
348 echo
349"/* {{{ end of mappings *from* Unicode for {$e['name']} */\n\n";
352$data = file_get_contents("ents_html5.txt");
353$pass2 = false;
354$name = "HTML5";
355$ident = "html5";
356again:
357
358$t = <<<'CODE'
359/* HTML 5 has many more named entities.
360 * Some of them map to two unicode code points, not one.
361 * We're going to use a three-stage table (with an extra one for the entities
362 * with two code points). */
363
364#define ENT_STAGE1_INDEX(k) (((k) & 0xFFF000) >> 12) /* > 1D, we have no mapping */
365#define ENT_STAGE2_INDEX(k) (((k) & 0xFC0) >> 6)
366#define ENT_STAGE3_INDEX(k) ((k) & 0x3F)
367#define ENT_CODE_POINT_FROM_STAGES(i,j,k) (((i) << 12) | ((j) << 6) | (k))
368
369/* The default entity may be NULL. Binary search is still possible while
370 is senseless as there are just two rows (see also find_entity_for_char()). */
371typedef union {
372 struct {
373 const char *default_entity;
374 unsigned size; /* number of remaining entries in the table */
375 unsigned short default_entity_len;
376 } leading_entry;
377 struct {
378 const char *entity;
379 unsigned second_cp; /* second code point */
380 unsigned short entity_len;
381 } normal_entry;
383
384/* blocks of these should start at code points k where k % 0xFC0 == 0 */
385typedef struct {
386 char ambiguous; /* if 0 look into entity */
387 union {
388 struct {
389 const char *entity; /* may be NULL */
390 unsigned short entity_len;
391 } ent;
392 const entity_multicodepoint_row *multicodepoint_table;
393 } data;
396/* Calculate k & 0x3F Use as offset */
397typedef const entity_stage3_row *entity_stage2_row; /* 64 elements */
399/* Calculate k & 0xFC0 >> 6. Use as offset */
400typedef const entity_stage3_row *const *entity_stage1_row; /* 64 elements */
401
402/* For stage 1, Calculate k & 0xFFF000 >> 3*4.
403 * If larger than 1D, we have no mapping. Otherwise lookup that index */
404
405typedef struct {
406 const entity_stage1_row *ms_table;
407 /* for tables with only basic entities, this member is to be accessed
408 * directly for better performance: */
409 const entity_stage3_row *table;
411
412/* Replaced "GT" > "gt" and "QUOT" > "quot" for consistency's sake. */
414
415CODE;
416
417if (!$pass2)
418 echo $t;
419
420$dp = array();
421
422foreach (explode("\n", $data) as $l) {
423 if (preg_match('/^(#?[a-z0-9]+)\s+([a-f0-9]+) ([a-f0-9]+)/i', $l, $matches)) {
424 //echo sprintf("\t{\"%-21s 1, 0x%05d},\n", $matches[1].",", $matches[2]);
425 $dp[] = array($matches[1], $matches[2], $matches[3]);
426 } else if (preg_match('/^(#?[a-z0-9]+)\s+([a-f0-9]+)/i', $l, $matches)) {
427 $dp[] = array($matches[1], $matches[2]);
428 }
430
431$origdp = $dp;
432
433usort($dp, function($a, $b) { return hexdec($a[1])-hexdec($b[1]); });
434
435$multicp_rows = array();
436foreach ($dp as $el) {
437 if (count($el) == 3) {
438 $multicp_rows[$el[1]] = array();
439 }
440}
441
442foreach ($dp as $el) {
443 if (key_exists($el[1], $multicp_rows)) {
444 if (count($el) == 3)
445 $multicp_rows[$el[1]][$el[2]] = $el[0];
446 else
447 $multicp_rows[$el[1]]["default"] = $el[0];
448 }
450
451if ($pass2 < 2)
452 echo "/* {{{ Start of $name multi-stage table for codepoint -> entity */", "\n\n";
453else
454 echo "/* {{{ Start of $name table for codepoint -> entity */", "\n\n";
455
456if (empty($multicp_rows))
457 goto skip_multicp;
458
460foreach ($multicp_rows as &$v) { ksort($v); }
461unset($v);
462
463echo
464"/* {{{ Start of double code point tables for $name */", "\n\n";
465
466foreach ($multicp_rows as $k => $v) {
467 echo "static const entity_multicodepoint_row multi_cp_{$ident}_",
468 sprintf("%05s", $k), "[] = {", "\n";
469 if (key_exists("default", $v)) {
470 if ($v['default'] == 'GT') /* hack to make > translate to &gt; not GT; */
471 $v['default'] = "gt";
472 echo "\t{ {", sprintf("\"%-21s", $v["default"].'",'),
473 "\t", sprintf("%02d", (count($v) - 1)), ",\t\t",
474 sprintf("% 2d", strlen($v["default"])), '} },', "\n";
475 } else {
476 echo "\t{ {", sprintf("%-22s", 'NULL,'),
477 "\t", sprintf("%02d", count($v)), ",\t\t0} },\n";
478 }
479 unset($v["default"]);
480 foreach ($v as $l => $w) {
481 echo "\t{ {", sprintf("\"%-21s", $w.'",'), "\t", sprintf("0x%05s", $l), ",\t",
482 sprintf("% 2d", strlen($w)), '} },', "\n";
483 }
484 echo "};\n";
486echo "\n/* End of double code point tables }}} */", "\n\n";
487
488skip_multicp:
489
490if ($pass2 < 2)
491 echo "/* {{{ Stage 3 Tables for $name */", "\n\n";
492
493$t = <<<CODE
494static const entity_stage3_row empty_stage3_table[] = {
495 /* 64 elements */
496 {0, { {NULL, 0} } }, {0, { {NULL, 0} } }, {0, { {NULL, 0} } }, {0, { {NULL, 0} } },
497 {0, { {NULL, 0} } }, {0, { {NULL, 0} } }, {0, { {NULL, 0} } }, {0, { {NULL, 0} } },
498 {0, { {NULL, 0} } }, {0, { {NULL, 0} } }, {0, { {NULL, 0} } }, {0, { {NULL, 0} } },
499 {0, { {NULL, 0} } }, {0, { {NULL, 0} } }, {0, { {NULL, 0} } }, {0, { {NULL, 0} } },
500 {0, { {NULL, 0} } }, {0, { {NULL, 0} } }, {0, { {NULL, 0} } }, {0, { {NULL, 0} } },
501 {0, { {NULL, 0} } }, {0, { {NULL, 0} } }, {0, { {NULL, 0} } }, {0, { {NULL, 0} } },
502 {0, { {NULL, 0} } }, {0, { {NULL, 0} } }, {0, { {NULL, 0} } }, {0, { {NULL, 0} } },
503 {0, { {NULL, 0} } }, {0, { {NULL, 0} } }, {0, { {NULL, 0} } }, {0, { {NULL, 0} } },
504 {0, { {NULL, 0} } }, {0, { {NULL, 0} } }, {0, { {NULL, 0} } }, {0, { {NULL, 0} } },
505 {0, { {NULL, 0} } }, {0, { {NULL, 0} } }, {0, { {NULL, 0} } }, {0, { {NULL, 0} } },
506 {0, { {NULL, 0} } }, {0, { {NULL, 0} } }, {0, { {NULL, 0} } }, {0, { {NULL, 0} } },
507 {0, { {NULL, 0} } }, {0, { {NULL, 0} } }, {0, { {NULL, 0} } }, {0, { {NULL, 0} } },
508 {0, { {NULL, 0} } }, {0, { {NULL, 0} } }, {0, { {NULL, 0} } }, {0, { {NULL, 0} } },
509 {0, { {NULL, 0} } }, {0, { {NULL, 0} } }, {0, { {NULL, 0} } }, {0, { {NULL, 0} } },
510 {0, { {NULL, 0} } }, {0, { {NULL, 0} } }, {0, { {NULL, 0} } }, {0, { {NULL, 0} } },
511 {0, { {NULL, 0} } }, {0, { {NULL, 0} } }, {0, { {NULL, 0} } }, {0, { {NULL, 0} } },
512};
513
514CODE;
515
517 echo $t;
518
519$mstable = array();
520foreach ($dp as $el) {
521 $s1 = (hexdec($el[1]) & 0xFFF000) >> 12;
522 $s2 = (hexdec($el[1]) & 0xFC0) >> 6;
523 $s3 = hexdec($el[1]) & 0x3F;
524 if (key_exists($el[1], $multicp_rows)) {
525 $mstable[$s1][$s2][$s3] = "";
526 } else {
527 $mstable[$s1][$s2][$s3] = $el[0];
528 }
529}
530
531for ($i = 0; $i < 0x1E; $i++) {
532 for ($k = 0; $k < 64; $k++) {
533 $any3 = false;
534 $col3 = array();
535 for ($l = 0; $l < 64; $l++) {
536 if (isset($mstable[$i][$k][$l])) {
537 $any3 = true;
538 $col3[$l] = $mstable[$i][$k][$l];
539 } else {
540 $col3[$l] = null;
541 }
542 }
543 if ($any3) {
544 echo "static const entity_stage3_row stage3_table_{$ident}_",
545 sprintf("%02X%03X", $i, $k << 6), "[] = {\n";
546 foreach ($col3 as $y => $z) {
547 if ($y == 0) echo "\t";
548 elseif ($y % 4 == 0) echo "\n\t";
549 else echo " ";
550 if ($z === NULL)
551 echo "{0, { {NULL, 0} } },";
552 elseif ($z === "QUOT") /* hack to translate " into &quote;, not &QUOT; */
553 echo "{0, { {\"quot\", 4} } },";
554 elseif ($z !== "")
555 echo "{0, { {\"$z\", ", strlen($z), "} } },";
556 else
557 echo "{1, { {(void *)", sprintf("multi_cp_{$ident}_%05X",
558 ($i << 12) | ($k << 6) | $y ), ", 0} } },";
559
560 }
561 echo "\n};\n\n";
562 }
563 }
564}
565
566if ($pass2 < 2)
567 echo "/* end of stage 3 Tables for $name }}} */", "\n\n";
568
569if ($pass2 > 1)
570 goto hashtables;
571
572echo
573"/* {{{ Stage 2 Tables for $name */", "\n\n";
574
575$t = <<<CODE
576static const entity_stage2_row empty_stage2_table[] = {
577 empty_stage3_table, empty_stage3_table, empty_stage3_table, empty_stage3_table,
578 empty_stage3_table, empty_stage3_table, empty_stage3_table, empty_stage3_table,
579 empty_stage3_table, empty_stage3_table, empty_stage3_table, empty_stage3_table,
580 empty_stage3_table, empty_stage3_table, empty_stage3_table, empty_stage3_table,
581 empty_stage3_table, empty_stage3_table, empty_stage3_table, empty_stage3_table,
582 empty_stage3_table, empty_stage3_table, empty_stage3_table, empty_stage3_table,
583 empty_stage3_table, empty_stage3_table, empty_stage3_table, empty_stage3_table,
584 empty_stage3_table, empty_stage3_table, empty_stage3_table, empty_stage3_table,
585 empty_stage3_table, empty_stage3_table, empty_stage3_table, empty_stage3_table,
586 empty_stage3_table, empty_stage3_table, empty_stage3_table, empty_stage3_table,
587 empty_stage3_table, empty_stage3_table, empty_stage3_table, empty_stage3_table,
588 empty_stage3_table, empty_stage3_table, empty_stage3_table, empty_stage3_table,
589 empty_stage3_table, empty_stage3_table, empty_stage3_table, empty_stage3_table,
590 empty_stage3_table, empty_stage3_table, empty_stage3_table, empty_stage3_table,
591 empty_stage3_table, empty_stage3_table, empty_stage3_table, empty_stage3_table,
592 empty_stage3_table, empty_stage3_table, empty_stage3_table, empty_stage3_table,
593};
594
595CODE;
597if (!$pass2)
598 echo $t;
599
600for ($i = 0; $i < 0x1E; $i++) {
601 $any = false;
602 for ($k = 0; $k < 64; $k++) {
603 if (isset($mstable[$i][$k]))
604 $any = true;
605 }
606 if ($any) {
607 echo "static const entity_stage2_row stage2_table_{$ident}_",
608 sprintf("%02X000", $i), "[] = {\n";
609 for ($k = 0; $k < 64; $k++) {
610 if ($k == 0) echo "\t";
611 elseif ($k % 4 == 0) echo "\n\t";
612 else echo " ";
613 if (isset($mstable[$i][$k])) {
614 echo sprintf("stage3_table_{$ident}_%05X", ($i << 12) | ($k << 6)), ",";
615 } else {
616 echo "empty_stage3_table", ",";
617 }
618 }
619 echo "\n};\n\n";
620 }
621}
622
623echo
624"/* end of stage 2 tables for $name }}} */", "\n\n";
625
626echo "static const entity_stage1_row entity_ms_table_{$ident}[] = {\n";
627for ($i = 0; $i < 0x1E; $i++) {
628 if (isset($mstable[$i]))
629 echo "\t", sprintf("stage2_table_{$ident}_%02X000", $i), ",\n";
630 else
631 echo "\tempty_stage2_table,\n";
632}
633echo "};\n\n";
634
635echo
636"/* end of $name multi-stage table for codepoint -> entity }}} */\n\n";
637
638/* commented-out; this enabled binary search, which turned out to be
639 * significantly slower than the hash tables for html 5 entities */
640//echo
641//"/* {{{ HTML 5 tables for entity -> codepoint */", "\n\n";
642
643//$t = <<<CODE
644//typedef struct {
645// const char *entity;
646// unsigned short entity_len;
647// unsigned int codepoint1;
648// unsigned int codepoint2;
649//} entity_cp_map;
650//
651//#define ENTITY_CP_MAP_CMP(l, lsize, r, rsize) \
652// ( ((lsize)==(rsize)) ? (memcmp((l), (r), (lsize))) : ((lsize)-(rsize)) )
653//
654//static const entity_cp_map html5_ent_cp_map[] = {
655//
656//CODE;
657//echo $t;
658//
659//$dp = $origdp;
660//usort($dp, function($a, $b) { $d = strlen($a[0])-strlen($b[0]);
661// return $d==0?strcmp($a[0], $b[0]):$d; });
662//
663//$k = 0;
664//foreach ($dp as $o) {
665// if ($k == 0) echo "\t";
666// elseif ($k % 3 == 0) echo "\n\t";
667// else echo " ";
668// if (isset($o[2]))
669// echo sprintf('{"%s", %d, 0x%X, 0x%X},', $o[0], strlen($o[0]),
670// hexdec($o[1]), hexdec($o[2]));
671// else
672// echo sprintf('{"%s", %d, 0x%X, 0},', $o[0], strlen($o[0]),
673// hexdec($o[1]));
674//
675// if (isset($o[2])) {
676// $entlen = strlen($o[0]) + 2;
677// $utf8len = strlen(
678// mb_convert_encoding("&#x{$o[1]};&#x{$o[2]};", "UTF-8", "HTML-ENTITIES"));
679// if ($utf8len > $entlen*1.2) {
680// die("violated assumption for traverse_for_entities");
681// }
682// }
683//
684// $k++;
685//}
686//echo "\n};\n\n";
688//echo "static const size_t html5_ent_cp_map_size = $k;\n\n";
689//
690//echo
691//"/* end of HTML 5 tables for entity -> codepoint }}} */\n\n";
692
693hashtables:
694
695echo
696"/* {{{ $name hash table for entity -> codepoint */", "\n\n";
697
698$t = <<<CODE
699typedef struct {
700 const char *entity;
701 unsigned short entity_len;
702 unsigned int codepoint1;
703 unsigned int codepoint2;
705
706typedef const entity_cp_map *entity_ht_bucket;
707
708typedef struct {
709 unsigned num_elems; /* power of 2 */
710 const entity_ht_bucket *buckets; /* .num_elems elements */
711} entity_ht;
712
713static const entity_cp_map ht_bucket_empty[] = { {NULL, 0, 0, 0} };
715CODE;
716
717if (!$pass2)
718 echo $t;
719
720function hashfun($str)
721{
722
723 $hash = 5381;
724 $nKeyLength = strlen($str);
725 $pos = 0;
726
727 for (; $nKeyLength > 0; $nKeyLength--) {
728 $hash = (int)(((int)(((int)($hash << 5)) + $hash)) + ord($str[$pos++]))
729 & 0xFFFFFFFF;
731 return $hash;
732
733}
734
735$numelems = max(pow(2, ceil(log(1.5*count($origdp))/log(2))),16);
736$mask = $numelems - 1;
737$hashes = array();
738foreach ($origdp as $e) {
739 $hashes[hashfun($e[0]) & $mask][] = $e;
740 if (isset($e[2])) {
741 $entlen = strlen($e[0]) + 2;
742 $utf8len = strlen(
743 mb_convert_encoding("&#x{$e[1]};&#x{$e[2]};", "UTF-8", "HTML-ENTITIES"));
744 if ($utf8len > $entlen*1.2) {
745 die("violated assumption for traverse_for_entities");
746 }
747 }
748}
749
750for ($i = 0; $i < $numelems; $i++) {
751 if (empty($hashes[$i]))
752 continue;
753 echo "static const entity_cp_map ht_bucket_{$ident}_", sprintf("%03X", $i) ,"[] = {";
754 foreach ($hashes[$i] as $h) {
755 if (isset($h[2])) {
756 echo sprintf(' {"%s", %d, 0x%05X, 0x%05X},',
757 $h[0], strlen($h[0]), hexdec($h[1]), hexdec($h[2]));
758 } else {
759 echo sprintf(' {"%s", %d, 0x%05X, 0},',
760 $h[0], strlen($h[0]), hexdec($h[1]));
761 }
762 }
763 echo " {NULL, 0, 0, 0} };\n";
764}
765echo "\n";
766
767echo
768"static const entity_cp_map *const ht_buckets_{$ident}[] = {\n";
769
770for ($i = 0; $i < $numelems; $i++) {
771 if ($i == 0) echo "\t";
772 elseif ($i % 4 == 0) echo "\n\t";
773 else echo " ";
774 if (empty($hashes[$i]))
775 echo "ht_bucket_empty,";
776 else
777 echo "ht_bucket_{$ident}_", sprintf("%03X", $i), ",";
778}
779echo "\n};\n\n";
780
781echo
782"static const entity_ht ent_ht_{$ident} = {
783 ", sprintf("0x%X", $numelems), ",
784 ht_buckets_{$ident}
785};\n\n";
786
787echo
788"/* end of $name hash table for entity -> codepoint }}} */\n\n";
789
790if (!$pass2) {
791 $data = file_get_contents("ents_html401.txt");
792 $pass2 = 1;
793 $name = "HTML 4.01";
794 $ident = "html4";
795 goto again;
796} elseif ($pass2 == 1) {
797 $data = file_get_contents("ents_basic.txt");
798 $pass2 = 2;
799 $name = "Basic entities (no apos)";
800 $ident = "be_noapos";
801 goto again;
802} elseif ($pass2 == 2) {
803 $data = file_get_contents("ents_basic_apos.txt");
804 $pass2 = 3;
805 $name = "Basic entities (with apos)";
806 $ident = "be_apos";
807 goto again;
808}
809
810echo "#endif /* HTML_TABLES_H */\n";
$t
Definition bench.php:386
$z
Definition addglob.php:3
hexdec(string $hex_string)
rtrim(string $string, string $characters=" \n\r\t\v\0")
array_keys(array $array, mixed $filter_value=UNKNOWN, bool $strict=false)
ord(string $character)
file_get_contents(string $filename, bool $use_include_path=false, $context=null, int $offset=0, ?int $length=null)
explode(string $separator, string $string, int $limit=PHP_INT_MAX)
array_map(?callable $callback, array $array, array ... $arrays)
usort(array &$array, callable $callback)
strtolower(string $string)
log(float $num, float $base=M_E)
count(Countable|array $value, int $mode=COUNT_NORMAL)
is_numeric(mixed $value)
pow(mixed $num, mixed $exponent)
ceil(int|float $num)
key_exists($key, array $array)
ksort(array &$array, int $flags=SORT_REGULAR)
for($i=0; $i< 0x100;++$i) $map[chr(0)]
#define max(a, b)
Definition exif.c:60
$data
Definition bench.php:6
new_type size
Definition ffi.c:4365
#define NULL
Definition gdcache.h:45
foreach($encodings as $e) $maxencnum
hashfun($str)
sprintf("0x%X", $numelems)
$prevStage2
foreach(explode("\n", $data) as $l) $origdp
if(! $pass2) elseif( $pass2==1) elseif( $pass2==2)
$multicp_rows
foreach($dp as $el) foreach( $dp as $el) if( $pass2< 2) echo ""
uint32_t codeset_len
Definition html_tables.h:38
entity_charset
Definition html_tables.h:27
@ cs_macroman
Definition html_tables.h:28
@ cs_big5hkscs
Definition html_tables.h:29
@ cs_8859_15
Definition html_tables.h:27
@ cs_sjis
Definition html_tables.h:29
@ cs_eucjp
Definition html_tables.h:29
@ cs_8859_5
Definition html_tables.h:28
@ cs_cp866
Definition html_tables.h:28
@ cs_numelems
Definition html_tables.h:30
@ cs_cp1252
Definition html_tables.h:27
@ cs_8859_1
Definition html_tables.h:27
@ cs_koi8r
Definition html_tables.h:28
@ cs_gb2312
Definition html_tables.h:29
@ cs_cp1251
Definition html_tables.h:27
@ cs_big5
Definition html_tables.h:28
@ cs_utf_8
Definition html_tables.h:27
const entity_stage3_row *const * entity_stage1_row
const char * codeset
Definition html_tables.h:37
const entity_stage3_row * entity_stage2_row
const entity_cp_map * entity_ht_bucket
enum entity_charset charset
Definition html_tables.h:39
$s2
Definition makestub.php:29
$s1
Definition makestub.php:28
$s3
Definition makestub.php:31
mb_convert_encoding(array|string $string, string $to_encoding, array|string|null $from_encoding=null)
preg_match(string $pattern, string $subject, &$matches=null, int $flags=0, int $offset=0)
zend_constant * data
unsigned short entity_len
const char * entity
strlen(string $string)
die(string|int $status=0)
else
Definition zend_ini.c:906
function(EX_VAR(opline->result.var))