54#define CHARSET_UNICODE_COMPAT(cs) ((cs) <= cs_8859_1)
55#define CHARSET_SINGLE_BYTE(cs) ((cs) > cs_utf_8 && (cs) < cs_big5)
56#define CHARSET_PARTIAL_SUPPORT(cs) ((cs) >= cs_big5)
63 {
"ISO-8859-1",
sizeof(
"ISO-8859-1")-1,
cs_8859_1 },
64 {
"ISO8859-1",
sizeof(
"ISO8859-1")-1,
cs_8859_1 },
65 {
"ISO-8859-15",
sizeof(
"ISO-8859-15")-1,
cs_8859_15 },
66 {
"ISO8859-15",
sizeof(
"ISO8859-15")-1,
cs_8859_15 },
67 {
"utf-8",
sizeof(
"utf-8")-1,
cs_utf_8 },
68 {
"cp1252",
sizeof(
"cp1252")-1,
cs_cp1252 },
69 {
"Windows-1252",
sizeof(
"Windows-1252")-1,
cs_cp1252 },
71 {
"BIG5",
sizeof(
"BIG5")-1,
cs_big5 },
72 {
"950",
sizeof(
"950")-1,
cs_big5 },
73 {
"GB2312",
sizeof(
"GB2312")-1,
cs_gb2312 },
76 {
"Shift_JIS",
sizeof(
"Shift_JIS")-1,
cs_sjis },
77 {
"SJIS",
sizeof(
"SJIS")-1,
cs_sjis },
78 {
"932",
sizeof(
"932")-1,
cs_sjis },
79 {
"SJIS-win",
sizeof(
"SJIS-win")-1,
cs_sjis },
80 {
"CP932",
sizeof(
"CP932")-1,
cs_sjis },
81 {
"EUCJP",
sizeof(
"EUCJP")-1,
cs_eucjp },
82 {
"EUC-JP",
sizeof(
"EUC-JP")-1,
cs_eucjp },
83 {
"eucJP-win",
sizeof(
"eucJP-win")-1,
cs_eucjp },
84 {
"KOI8-R",
sizeof(
"KOI8-R")-1,
cs_koi8r },
85 {
"koi8-ru",
sizeof(
"koi8-ru")-1,
cs_koi8r },
86 {
"koi8r",
sizeof(
"koi8r")-1,
cs_koi8r },
87 {
"cp1251",
sizeof(
"cp1251")-1,
cs_cp1251 },
88 {
"Windows-1251",
sizeof(
"Windows-1251")-1,
cs_cp1251 },
89 {
"win-1251",
sizeof(
"win-1251")-1,
cs_cp1251 },
90 {
"iso8859-5",
sizeof(
"iso8859-5")-1,
cs_8859_5 },
91 {
"iso-8859-5",
sizeof(
"iso-8859-5")-1,
cs_8859_5 },
92 {
"cp866",
sizeof(
"cp866")-1,
cs_cp866 },
93 {
"866",
sizeof(
"866")-1,
cs_cp866 },
94 {
"ibm866",
sizeof(
"ibm866")-1,
cs_cp866 },
99#define LONGEST_ENTITY_LENGTH 31
105 unsigned short uni_cp[64];
113#define ENT_ENC_TO_UNI_STAGE1(k) ((k & 0xC0) >> 6)
115#define ENT_ENC_TO_UNI_STAGE2(k) ((k) & 0x3F)
124 "ident" =>
"iso88591",
126 "name" =>
"ISO-8859-1",
127 "file" =>
"mappings/8859-1.TXT",
130 "ident" =>
"iso88595",
132 "name" =>
"ISO-8859-5",
133 "file" =>
"mappings/8859-5.TXT",
136 "ident" =>
"iso885915",
138 "name" =>
"ISO-8859-15",
139 "file" =>
"mappings/8859-15.TXT",
142 "ident" =>
"win1252",
144 "enumident" =>
"cp1252",
145 "name" =>
"Windows-1252",
146 "file" =>
"mappings/CP1252.TXT",
149 "ident" =>
"win1251",
151 "enumident" =>
"cp1252",
152 "name" =>
"Windows-1251",
153 "file" =>
"mappings/CP1251.TXT",
159 "file" =>
"mappings/KOI8-R.TXT",
165 "file" =>
"mappings/CP866.TXT",
168 "ident" =>
"macroman",
170 "name" =>
"MacRoman",
171 "file" =>
"mappings/ROMAN.TXT",
179"/* {{{ Mappings *to* Unicode for {$e['name']} */\n\n";
184 foreach ($lines as $l) {
185 if (
preg_match(
"/^0x([0-9A-Z]{2})\t0x([0-9A-Z]{2,})/i", $l, $matches))
186 $map[] = array($matches[1], $matches[2]);
192 $mstable = array(
"ident" => $e[
'ident']);
194 for ($i = 0; $i < 4; $i++) {
195 for ($j = 0; $j < 64; $j++) {
197 $mstable[$i][$j] = isset($mappy[$cp]) ? $mappy[$cp] :
NULL;
202"/* {{{ Stage 2 tables for {$e['name']} */\n\n";
204 $s2tables_idents = array();
205 for ($i = 0; $i < 4; $i++) {
211 $s2tables_idents[$i] = $e[
"ident"];
213 echo
"static const enc_to_uni_stage2 enc_to_uni_s2_{$e['ident']}_".
214 sprintf(
"%02X", $i << 6).
" = { {\n";
215 for ($j = 0; $j < 64; $j++) {
216 if ($j == 0) echo
"\t";
217 elseif ($j % 6 == 0) echo
"\n\t";
230"/* end of stage 2 tables for {$e['name']} }}} */\n\n";
233"/* {{{ Stage 1 table for {$e['name']} */\n";
236"static const enc_to_uni enc_to_uni_{$e['ident']} = { {
237\t&enc_to_uni_s2_{$s2tables_idents[0]}_00,
238\t&enc_to_uni_s2_{$s2tables_idents[1]}_40,
239\t&enc_to_uni_s2_{$s2tables_idents[2]}_80,
240\t&enc_to_uni_s2_{$s2tables_idents[3]}_C0 }
245"/* end of stage 1 table for {$e['name']} }}} */\n\n";
253"/* {{{ Index of tables for encoding conversion */
254static const enc_to_uni *const enc_to_uni_index[cs_numelems] = {\n";
256foreach (
$a as $k => $v) {
260 echo
"\t&enc_to_uni_$v,\n";
272 unsigned short un_code_point;
273 unsigned char cs_code;
283 "ident" =>
"iso885915",
284 "name" =>
"ISO-8859-15",
285 "file" =>
"mappings/8859-15.TXT",
286 "range" => array(0xA4, 0xBE),
289 "ident" =>
"win1252",
290 "name" =>
"Windows-1252",
291 "file" =>
"mappings/CP1252.TXT",
292 "range" => array(0x80, 0x9F),
295 "ident" =>
"win1251",
296 "name" =>
"Windows-1251",
297 "file" =>
"mappings/CP1251.TXT",
298 "range" => array(0x80, 0xFF),
303 "file" =>
"mappings/KOI8-R.TXT",
304 "range" => array(0x80, 0xFF),
309 "file" =>
"mappings/CP866.TXT",
310 "range" => array(0x80, 0xFF),
313 "ident" =>
"macroman",
314 "name" =>
"MacRoman",
315 "file" =>
"mappings/ROMAN.TXT",
316 "range" => array(0x80, 0xFF),
322"/* {{{ Mappings *from* Unicode for {$e['name']} */\n";
327 foreach ($lines as $l) {
328 if (
preg_match(
"/^0x([0-9A-Z]{2})\t0x([0-9A-Z]{2,})\s+#\s*(.*)$/i", $l, $matches))
329 $map[] = array($matches[1], $matches[2],
rtrim($matches[3]));
333 foreach (
$map as $v) {
334 if (
hexdec($v[0]) >= $e[
'range'][0] &&
hexdec($v[0]) <= $e[
'range'][1])
340"static const uni_to_enc unimap_{$e['ident']}[] = {\n";
342 foreach ($mappy as $k => $v) {
343 echo
"\t{ ",
sprintf(
"0x%04X", $k),
", ",
sprintf(
"0x%02X", $v[0]),
" },\t/* ",
349"/* {{{ end of mappings *from* Unicode for {$e['name']} */\n\n";
364#define ENT_STAGE1_INDEX(k) (((k) & 0xFFF000) >> 12)
365#define ENT_STAGE2_INDEX(k) (((k) & 0xFC0) >> 6)
366#define ENT_STAGE3_INDEX(k) ((k) & 0x3F)
367#define ENT_CODE_POINT_FROM_STAGES(i,j,k) (((i) << 12) | ((j) << 6) | (k))
373 const char *default_entity;
375 unsigned short default_entity_len;
380 unsigned short entity_len;
423 if (
preg_match(
'/^(#?[a-z0-9]+)\s+([a-f0-9]+) ([a-f0-9]+)/i', $l, $matches)) {
425 $dp[] = array($matches[1], $matches[2], $matches[3]);
426 }
else if (
preg_match(
'/^(#?[a-z0-9]+)\s+([a-f0-9]+)/i', $l, $matches)) {
427 $dp[] = array($matches[1], $matches[2]);
436foreach (
$dp as $el) {
437 if (
count($el) == 3) {
442foreach (
$dp as $el) {
452 echo
"/* {{{ Start of $name multi-stage table for codepoint -> entity */",
"\n\n";
454 echo
"/* {{{ Start of $name table for codepoint -> entity */",
"\n\n";
464"/* {{{ Start of double code point tables for $name */",
"\n\n";
467 echo
"static const entity_multicodepoint_row multi_cp_{$ident}_",
468 sprintf(
"%05s", $k),
"[] = {",
"\n";
470 if ($v[
'default'] ==
'GT')
471 $v[
'default'] =
"gt";
472 echo
"\t{ {",
sprintf(
"\"%-21s", $v[
"default"].
'",'),
476 echo
"\t{ {",
sprintf(
"%-22s",
'NULL,'),
479 unset($v[
"default"]);
480 foreach ($v as $l =>
$w) {
481 echo
"\t{ {",
sprintf(
"\"%-21s",
$w.
'",'),
"\t",
sprintf(
"0x%05s", $l),
",\t",
486echo
"\n/* End of double code point tables }}} */",
"\n\n";
491 echo
"/* {{{ Stage 3 Tables for $name */",
"\n\n";
496 {0, { {
NULL, 0} } }, {0, { {
NULL, 0} } }, {0, { {
NULL, 0} } }, {0, { {
NULL, 0} } },
497 {0, { {
NULL, 0} } }, {0, { {
NULL, 0} } }, {0, { {
NULL, 0} } }, {0, { {
NULL, 0} } },
498 {0, { {
NULL, 0} } }, {0, { {
NULL, 0} } }, {0, { {
NULL, 0} } }, {0, { {
NULL, 0} } },
499 {0, { {
NULL, 0} } }, {0, { {
NULL, 0} } }, {0, { {
NULL, 0} } }, {0, { {
NULL, 0} } },
500 {0, { {
NULL, 0} } }, {0, { {
NULL, 0} } }, {0, { {
NULL, 0} } }, {0, { {
NULL, 0} } },
501 {0, { {
NULL, 0} } }, {0, { {
NULL, 0} } }, {0, { {
NULL, 0} } }, {0, { {
NULL, 0} } },
502 {0, { {
NULL, 0} } }, {0, { {
NULL, 0} } }, {0, { {
NULL, 0} } }, {0, { {
NULL, 0} } },
503 {0, { {
NULL, 0} } }, {0, { {
NULL, 0} } }, {0, { {
NULL, 0} } }, {0, { {
NULL, 0} } },
504 {0, { {
NULL, 0} } }, {0, { {
NULL, 0} } }, {0, { {
NULL, 0} } }, {0, { {
NULL, 0} } },
505 {0, { {
NULL, 0} } }, {0, { {
NULL, 0} } }, {0, { {
NULL, 0} } }, {0, { {
NULL, 0} } },
506 {0, { {
NULL, 0} } }, {0, { {
NULL, 0} } }, {0, { {
NULL, 0} } }, {0, { {
NULL, 0} } },
507 {0, { {
NULL, 0} } }, {0, { {
NULL, 0} } }, {0, { {
NULL, 0} } }, {0, { {
NULL, 0} } },
508 {0, { {
NULL, 0} } }, {0, { {
NULL, 0} } }, {0, { {
NULL, 0} } }, {0, { {
NULL, 0} } },
509 {0, { {
NULL, 0} } }, {0, { {
NULL, 0} } }, {0, { {
NULL, 0} } }, {0, { {
NULL, 0} } },
510 {0, { {
NULL, 0} } }, {0, { {
NULL, 0} } }, {0, { {
NULL, 0} } }, {0, { {
NULL, 0} } },
511 {0, { {
NULL, 0} } }, {0, { {
NULL, 0} } }, {0, { {
NULL, 0} } }, {0, { {
NULL, 0} } },
520foreach (
$dp as $el) {
531for ($i = 0; $i < 0x1E; $i++) {
532 for ($k = 0; $k < 64; $k++) {
535 for ($l = 0; $l < 64; $l++) {
544 echo
"static const entity_stage3_row stage3_table_{$ident}_",
545 sprintf(
"%02X%03X", $i, $k << 6),
"[] = {\n";
546 foreach ($col3 as $y =>
$z) {
547 if ($y == 0) echo
"\t";
548 elseif ($y % 4 == 0) echo
"\n\t";
551 echo
"{0, { {NULL, 0} } },";
553 echo
"{0, { {\"quot\", 4} } },";
555 echo
"{0, { {\"$z\", ",
strlen(
$z),
"} } },";
557 echo
"{1, { {(void *)",
sprintf(
"multi_cp_{$ident}_%05X",
558 ($i << 12) | ($k << 6) | $y ),
", 0} } },";
567 echo
"/* end of stage 3 Tables for $name }}} */",
"\n\n";
573"/* {{{ Stage 2 Tables for $name */",
"\n\n";
577 empty_stage3_table, empty_stage3_table, empty_stage3_table, empty_stage3_table,
578 empty_stage3_table, empty_stage3_table, empty_stage3_table, empty_stage3_table,
579 empty_stage3_table, empty_stage3_table, empty_stage3_table, empty_stage3_table,
580 empty_stage3_table, empty_stage3_table, empty_stage3_table, empty_stage3_table,
581 empty_stage3_table, empty_stage3_table, empty_stage3_table, empty_stage3_table,
582 empty_stage3_table, empty_stage3_table, empty_stage3_table, empty_stage3_table,
583 empty_stage3_table, empty_stage3_table, empty_stage3_table, empty_stage3_table,
584 empty_stage3_table, empty_stage3_table, empty_stage3_table, empty_stage3_table,
585 empty_stage3_table, empty_stage3_table, empty_stage3_table, empty_stage3_table,
586 empty_stage3_table, empty_stage3_table, empty_stage3_table, empty_stage3_table,
587 empty_stage3_table, empty_stage3_table, empty_stage3_table, empty_stage3_table,
588 empty_stage3_table, empty_stage3_table, empty_stage3_table, empty_stage3_table,
589 empty_stage3_table, empty_stage3_table, empty_stage3_table, empty_stage3_table,
590 empty_stage3_table, empty_stage3_table, empty_stage3_table, empty_stage3_table,
591 empty_stage3_table, empty_stage3_table, empty_stage3_table, empty_stage3_table,
592 empty_stage3_table, empty_stage3_table, empty_stage3_table, empty_stage3_table,
600for ($i = 0; $i < 0x1E; $i++) {
602 for ($k = 0; $k < 64; $k++) {
607 echo
"static const entity_stage2_row stage2_table_{$ident}_",
608 sprintf(
"%02X000", $i),
"[] = {\n";
609 for ($k = 0; $k < 64; $k++) {
610 if ($k == 0) echo
"\t";
611 elseif ($k % 4 == 0) echo
"\n\t";
614 echo
sprintf(
"stage3_table_{$ident}_%05X", ($i << 12) | ($k << 6)),
",";
616 echo
"empty_stage3_table",
",";
624"/* end of stage 2 tables for $name }}} */",
"\n\n";
626echo
"static const entity_stage1_row entity_ms_table_{$ident}[] = {\n";
627for ($i = 0; $i < 0x1E; $i++) {
629 echo
"\t",
sprintf(
"stage2_table_{$ident}_%02X000", $i),
",\n";
631 echo
"\tempty_stage2_table,\n";
636"/* end of $name multi-stage table for codepoint -> entity }}} */\n\n";
696"/* {{{ $name hash table for entity -> codepoint */",
"\n\n";
701 unsigned short entity_len;
702 unsigned int codepoint1;
703 unsigned int codepoint2;
724 $nKeyLength =
strlen($str);
727 for (; $nKeyLength > 0; $nKeyLength--) {
728 $hash = (int)(((
int)(((
int)($hash << 5)) + $hash)) +
ord($str[$pos++]))
741 $entlen =
strlen($e[0]) + 2;
744 if ($utf8len > $entlen*1.2) {
745 die(
"violated assumption for traverse_for_entities");
753 echo
"static const entity_cp_map ht_bucket_{$ident}_",
sprintf(
"%03X", $i) ,
"[] = {";
756 echo
sprintf(
' {"%s", %d, 0x%05X, 0x%05X},',
759 echo
sprintf(
' {"%s", %d, 0x%05X, 0},',
763 echo
" {NULL, 0, 0, 0} };\n";
768"static const entity_cp_map *const ht_buckets_{$ident}[] = {\n";
771 if ($i == 0) echo
"\t";
772 elseif ($i % 4 == 0) echo
"\n\t";
775 echo
"ht_bucket_empty,";
777 echo
"ht_bucket_{$ident}_",
sprintf(
"%03X", $i),
",";
782"static const entity_ht ent_ht_{$ident} = {
788"/* end of $name hash table for entity -> codepoint }}} */\n\n";
799 $name =
"Basic entities (no apos)";
805 $name =
"Basic entities (with apos)";
810echo
"#endif /* HTML_TABLES_H */\n";
hexdec(string $hex_string)
rtrim(string $string, string $characters=" \n\r\t\v\0")
array_keys(array $array, mixed $filter_value=UNKNOWN, bool $strict=false)
file_get_contents(string $filename, bool $use_include_path=false, $context=null, int $offset=0, ?int $length=null)
explode(string $separator, string $string, int $limit=PHP_INT_MAX)
array_map(?callable $callback, array $array, array ... $arrays)
usort(array &$array, callable $callback)
strtolower(string $string)
log(float $num, float $base=M_E)
count(Countable|array $value, int $mode=COUNT_NORMAL)
pow(mixed $num, mixed $exponent)
key_exists($key, array $array)
ksort(array &$array, int $flags=SORT_REGULAR)
for($i=0; $i< 0x100;++$i) $map[chr(0)]
foreach($encodings as $e) $maxencnum
sprintf("0x%X", $numelems)
foreach(explode("\n", $data) as $l) $origdp
if(! $pass2) elseif( $pass2==1) elseif( $pass2==2)
foreach($dp as $el) foreach( $dp as $el) if( $pass2< 2) echo ""
const entity_stage3_row *const * entity_stage1_row
const entity_stage3_row * entity_stage2_row
const entity_cp_map * entity_ht_bucket
enum entity_charset charset
mb_convert_encoding(array|string $string, string $to_encoding, array|string|null $from_encoding=null)
preg_match(string $pattern, string $subject, &$matches=null, int $flags=0, int $offset=0)
unsigned short entity_len
die(string|int $status=0)
function(EX_VAR(opline->result.var))