39#include "config.w32.h"
41#include <php_config.h>
52#define LIMIT_ALL(all, doctype, charset) do { \
53 (all) = (all) && !CHARSET_PARTIAL_SUPPORT((charset)) && ((doctype) != ENT_HTML_DOC_XML1); \
56#define MB_FAILURE(pos, advance) do { \
57 *cursor = pos + (advance); \
62#define CHECK_LEN(pos, chars_need) ((str_len - (pos)) >= (chars_need))
65#define utf8_lead(c) ((c) < 0x80 || ((c) >= 0xC2 && (c) <= 0xF4))
69#define utf8_trail(c) ((c) >= 0x80 && (c) <= 0xBF)
71#define gb2312_lead(c) ((c) != 0x8E && (c) != 0x8F && (c) != 0xA0 && (c) != 0xFF)
72#define gb2312_trail(c) ((c) >= 0xA1 && (c) <= 0xFE)
74#define sjis_lead(c) ((c) != 0x80 && (c) != 0xA0 && (c) < 0xFD)
75#define sjis_trail(c) ((c) >= 0x40 && (c) != 0x7F && (c) < 0xFD)
78static char *get_default_charset(
void) {
81 }
else if (
SG(default_charset) &&
SG(default_charset)[0] ) {
82 return SG(default_charset);
89static inline unsigned int get_next_char(
91 const unsigned char *str,
97 unsigned int this_char = 0;
117 }
else if (c < 0xc2) {
119 }
else if (c < 0xe0) {
126 this_char = ((c & 0x1f) << 6) | (str[
pos + 1] & 0x3f);
127 if (this_char < 0x80) {
131 }
else if (c < 0xf0) {
132 size_t avail = str_len -
pos;
144 this_char = ((c & 0x0f) << 12) | ((str[
pos + 1] & 0x3f) << 6) | (str[
pos + 2] & 0x3f);
145 if (this_char < 0x800) {
147 }
else if (this_char >= 0xd800 && this_char <= 0xdfff) {
151 }
else if (c < 0xf5) {
152 size_t avail = str_len -
pos;
167 this_char = ((c & 0x07) << 18) | ((str[
pos + 1] & 0x3f) << 12) | ((str[
pos + 2] & 0x3f) << 6) | (str[
pos + 3] & 0x3f);
168 if (this_char < 0x10000 || this_char > 0x10FFFF) {
181 unsigned char c = str[
pos];
182 if (c >= 0x81 && c <= 0xFE) {
189 if ((
next >= 0x40 &&
next <= 0x7E) ||
191 this_char = (c << 8) |
next;
205 unsigned char c = str[
pos];
206 if (c >= 0x81 && c <= 0xFE) {
213 if ((
next >= 0x40 &&
next <= 0x7E) ||
215 this_char = (c << 8) |
next;
216 }
else if (
next != 0x80 &&
next != 0xFF) {
231 unsigned char c = str[
pos];
232 if (c >= 0xA1 && c <= 0xFE) {
240 this_char = (c << 8) |
next;
258 unsigned char c = str[
pos];
259 if ((c >= 0x81 && c <= 0x9F) || (c >= 0xE0 && c <= 0xFC)) {
267 this_char = (c << 8) |
next;
274 }
else if (c < 0x80 || (c >= 0xA1 && c <= 0xDF)) {
285 unsigned char c = str[
pos];
287 if (c >= 0xA1 && c <= 0xFE) {
295 this_char = (c << 8) |
next;
300 }
else if (c == 0x8E) {
308 this_char = (c << 8) |
next;
313 }
else if (c == 0x8F) {
314 size_t avail = str_len -
pos;
316 if (avail < 3 || !(str[
pos + 1] >= 0xA1 && str[
pos + 1] <= 0xFE) ||
317 !(str[
pos + 2] >= 0xA1 && str[
pos + 2] <= 0xFE)) {
318 if (avail < 2 || (str[
pos + 1] != 0xA0 && str[
pos + 1] != 0xFF))
320 else if (avail < 3 || (str[
pos + 2] != 0xA0 && str[
pos + 2] != 0xFF))
326 this_char = (c << 16) | (str[
pos + 1] << 8) | str[
pos + 2];
329 }
else if (c != 0xA0 && c != 0xFF) {
340 this_char = str[
pos++];
352 const unsigned char *str,
364static enum entity_charset determine_charset(
const char *charset_hint,
bool quiet)
366 if (!charset_hint || !*charset_hint) {
367 charset_hint = get_default_charset();
370 if (charset_hint && *charset_hint) {
373 for (
size_t i = 0; i <
sizeof(charset_map)/
sizeof(charset_map[0]); i++) {
376 return charset_map[i].charset;
391static inline size_t php_utf32_utf8(
unsigned char *
buf,
unsigned k)
400 }
else if (k < 0x800) {
401 buf[0] = 0xc0 | (k >> 6);
402 buf[1] = 0x80 | (k & 0x3f);
404 }
else if (k < 0x10000) {
405 buf[0] = 0xe0 | (k >> 12);
406 buf[1] = 0x80 | ((k >> 6) & 0x3f);
407 buf[2] = 0x80 | (k & 0x3f);
410 buf[0] = 0xf0 | (k >> 18);
411 buf[1] = 0x80 | ((k >> 12) & 0x3f);
412 buf[2] = 0x80 | ((k >> 6) & 0x3f);
413 buf[3] = 0x80 | (k & 0x3f);
427static inline unsigned char unimap_bsearch(
const uni_to_enc *table,
unsigned code_key_a,
size_t num)
432 unsigned short code_key;
435 if (code_key_a > 0xFFFFU)
438 code_key = (
unsigned short) code_key_a;
442 if (code_key < m->un_code_point)
444 else if (code_key > m->un_code_point)
470 if (code <= 0xA0 || code == 0xAD ) {
472 }
else if (code == 0x2116) {
474 }
else if (code == 0xA7) {
476 }
else if (code >= 0x0401 && code <= 0x045F) {
477 if (code == 0x040D || code == 0x0450 || code == 0x045D)
486 if (code < 0xA4 || (code > 0xBE && code <= 0xFF)) {
489 found = unimap_bsearch(unimap_iso885915,
490 code,
sizeof(unimap_iso885915) /
sizeof(*unimap_iso885915));
499 if (code <= 0x7F || (code >= 0xA0 && code <= 0xFF)) {
502 found = unimap_bsearch(unimap_win1252,
503 code,
sizeof(unimap_win1252) /
sizeof(*unimap_win1252));
514 table = unimap_macroman;
515 table_size =
sizeof(unimap_macroman) /
sizeof(*unimap_macroman);
518 table = unimap_win1251;
519 table_size =
sizeof(unimap_win1251) /
sizeof(*unimap_win1251);
522 table = unimap_koi8r;
523 table_size =
sizeof(unimap_koi8r) /
sizeof(*unimap_koi8r);
526 table = unimap_cp866;
527 table_size =
sizeof(unimap_cp866) /
sizeof(*unimap_cp866);
533 found = unimap_bsearch(table, code, table_size);
548 if (code >= 0x20 && code <= 0x7D) {
560 if (code >= 0x20 && code <= 0x7D) {
576static inline void map_to_unicode(
unsigned code,
const enc_to_uni *table,
unsigned *
res)
584static inline int unicode_cp_is_allowed(
unsigned uni_cp,
int document_type)
610 switch (document_type) {
612 return (uni_cp >= 0x20 && uni_cp <= 0x7E) ||
613 (uni_cp == 0x0A || uni_cp == 0x09 || uni_cp == 0x0D) ||
614 (uni_cp >= 0xA0 && uni_cp <= 0xD7FF) ||
615 (uni_cp >= 0xE000 && uni_cp <= 0x10FFFF);
617 return (uni_cp >= 0x20 && uni_cp <= 0x7E) ||
618 (uni_cp >= 0x09 && uni_cp <= 0x0D && uni_cp != 0x0B) ||
619 (uni_cp >= 0xA0 && uni_cp <= 0xD7FF) ||
620 (uni_cp >= 0xE000 && uni_cp <= 0x10FFFF &&
621 ((uni_cp & 0xFFFF) < 0xFFFE) &&
622 (uni_cp < 0xFDD0 || uni_cp > 0xFDEF));
625 return (uni_cp >= 0x20 && uni_cp <= 0xD7FF) ||
626 (uni_cp == 0x0A || uni_cp == 0x09 || uni_cp == 0x0D) ||
627 (uni_cp >= 0xE000 && uni_cp <= 0x10FFFF && uni_cp != 0xFFFE && uni_cp != 0xFFFF);
635static inline int numeric_entity_is_allowed(
unsigned uni_cp,
int document_type)
638 switch (document_type) {
642 return uni_cp <= 0x10FFFF;
649 return (uni_cp >= 0x20 && uni_cp <= 0x7E) ||
650 (uni_cp >= 0x09 && uni_cp <= 0x0C && uni_cp != 0x0B) ||
651 (uni_cp >= 0xA0 && uni_cp <= 0x10FFFF &&
652 ((uni_cp & 0xFFFF) < 0xFFFE) &&
653 (uni_cp < 0xFDD0 || uni_cp > 0xFDEF));
658 return unicode_cp_is_allowed(uni_cp, document_type);
670static inline int process_numeric_entity(
const char **
buf,
unsigned *code_point)
673 int hexadecimal = (**
buf ==
'x' || **
buf ==
'X');
681 if ((hexadecimal && !isxdigit(**
buf)) ||
682 (!hexadecimal && !isdigit(**
buf))) {
695 if (code_l >
Z_L(0x10FFFF))
698 if (code_point !=
NULL)
699 *code_point = (unsigned)code_l;
706static inline int process_named_entity_html(
const char **
buf,
const char **
start,
size_t *length)
715 while ((**
buf >=
'a' && **
buf <=
'z') ||
716 (**
buf >=
'A' && **
buf <=
'Z') ||
717 (**
buf >=
'0' && **
buf <=
'9')) {
735static int resolve_named_entity_html(
const char *
start,
size_t length,
const entity_ht *
ht,
unsigned *uni_cp1,
unsigned *uni_cp2)
742 if (
s->entity_len == length) {
743 if (memcmp(
start,
s->entity, length) == 0) {
744 *uni_cp1 =
s->codepoint1;
745 *uni_cp2 =
s->codepoint2;
759 return php_utf32_utf8(
buf, code);
781 return php_mb2_int_to_char(
buf, code);
792 return php_mb2_int_to_char(
buf, code);
814#define TRAVERSE_FOR_ENTITIES_EXPAND_SIZE(oldlen) ((oldlen) + (oldlen) / 5 + 2)
815static void traverse_for_entities(
833 unsigned code, code2 = 0;
842 if (
p[0] !=
'&' || (
p + 3 >= lim)) {
852 if (process_numeric_entity(&
next, &code) ==
FAILURE)
857 if (!all && (code > 63U ||
858 stage3_table_be_apos_00000[code].
data.ent.entity ==
NULL))
865 if (!unicode_cp_is_allowed(code, doctype) ||
878 if (resolve_named_entity_html(
start, ent_len, inv_map, &code, &code2) ==
FAILURE) {
883 code = (unsigned)
'\'';
901 if (map_from_unicode(code,
charset, &code) ==
FAILURE || code2 != 0)
905 q += write_octet_sequence((
unsigned char*)q,
charset, code);
907 q += write_octet_sequence((
unsigned char*)q,
charset, code2);
931 switch (document_type) {
934 return &ent_ht_html4;
936 return &ent_ht_html5;
938 return &ent_ht_be_apos;
941 switch (document_type) {
943 return &ent_ht_be_noapos;
945 return &ent_ht_be_apos;
962 entity_ms_table_html5 : entity_ms_table_html4;
965 stage3_table_be_noapos_00000 : stage3_table_be_apos_00000;
984 return zend_string_copy(str);
988 charset = determine_charset(hint_charset, 0);
998 return zend_string_copy(str);
1001 ret = zend_string_alloc(new_size, 0);
1003 inverse_map = unescape_inverse_map(all,
flags);
1018static inline void find_entity_for_char(
1022 const unsigned char **entity,
1024 const unsigned char *old,
1031 if (stage1_idx > 0x1D) {
1044 size_t cursor_before = *cursor;
1048 if (!(*cursor < oldlen))
1049 goto no_suitable_2nd;
1051 next_char = get_next_char(
charset, old, oldlen, cursor, &
status);
1054 goto no_suitable_2nd;
1063 for ( ;
s <= e;
s++) {
1064 if (
s->normal_entry.second_cp == next_char) {
1065 *entity = (
const unsigned char *)
s->normal_entry.entity;
1066 *entity_len =
s->normal_entry.entity_len;
1072 *cursor = cursor_before;
1073 *entity = (
const unsigned char *)
1081static inline void find_entity_for_char_basic(
1084 const unsigned char **entity,
1093 *entity = (
const unsigned char *) table[k].
data.ent.entity;
1094 *entity_len = table[k].data.ent.entity_len;
1109 const unsigned char *replacement =
NULL;
1110 size_t replacement_len = 0;
1115 "substitution is supported for multi-byte encodings other than UTF-8; "
1116 "functionality is equivalent to htmlspecialchars");
1120 entity_table = determine_entity_table(all, doctype);
1122 to_uni_table = enc_to_uni_index[
charset];
1125 if (!double_encode) {
1128 inv_map = unescape_inverse_map(1,
flags);
1133 replacement = (
const unsigned char*)
"\xEF\xBF\xBD";
1134 replacement_len =
sizeof(
"\xEF\xBF\xBD") - 1;
1136 replacement = (
const unsigned char*)
"�";
1137 replacement_len =
sizeof(
"�") - 1;
1145 maxlen = zend_safe_addmult(oldlen, 2, 0,
"html_entities");
1148 replaced = zend_string_alloc(
maxlen, 0);
1151 while (cursor < oldlen) {
1152 const unsigned char *mbsequence =
NULL;
1153 size_t mbseqlen = 0,
1154 cursor_before = cursor;
1156 unsigned int this_char = get_next_char(
charset, old, oldlen, &cursor, &
status);
1161 replaced = zend_string_safe_realloc(replaced,
maxlen, 1, 128, 0);
1171 len += replacement_len;
1174 zend_string_efree(replaced);
1178 mbsequence = &old[cursor_before];
1179 mbseqlen = cursor - cursor_before;
1182 if (this_char !=
'&') {
1183 const unsigned char *rep =
NULL;
1188 goto pass_char_through;
1191 if (to_uni_table !=
NULL) {
1195 map_to_unicode(this_char, to_uni_table, &this_char);
1196 if (this_char == 0xFFFF)
1197 goto pass_char_through;
1201 &rep_len, old, oldlen, &cursor);
1203 find_entity_for_char_basic(this_char, entity_table.
table, &rep, &rep_len);
1216 if (!unicode_cp_is_allowed(this_char, doctype)) {
1217 mbsequence = replacement;
1218 mbseqlen = replacement_len;
1220 }
else if (to_uni_table) {
1222 map_to_unicode(this_char, to_uni_table, &this_char);
1223 if (!unicode_cp_is_allowed(this_char, doctype)) {
1224 mbsequence = replacement;
1225 mbseqlen = replacement_len;
1235 if (this_char <= 0x7D &&
1236 !unicode_cp_is_allowed(this_char, doctype)) {
1237 mbsequence = replacement;
1238 mbseqlen = replacement_len;
1251 if (double_encode) {
1254 len +=
sizeof(
"&") - 1;
1259 if (old[cursor] ==
'#') {
1260 unsigned code_point;
1262 char *
pos = (
char*)&old[cursor+1];
1263 valid = process_numeric_entity((
const char **)&
pos, &code_point);
1267 if (!numeric_entity_is_allowed(code_point, doctype))
1270 ent_len =
pos - (
char*)&old[cursor];
1273 const char *
start = (
const char *) &old[cursor],
1275 unsigned dummy1, dummy2;
1279 if (resolve_named_entity_html(
start, ent_len, inv_map, &dummy1, &dummy2) ==
FAILURE) {
1293 replaced = zend_string_safe_realloc(replaced,
maxlen, 1, ent_len + 128, 0);
1300 cursor += ent_len + 1;
1317 bool double_encode = 1;
1329 hint_charset ?
ZSTR_VAL(hint_charset) :
NULL, double_encode, 0);
1374 str, 1 , (
int)quote_style, hint_charset ?
ZSTR_VAL(hint_charset) :
NULL);
1388static inline void write_s3row_data(
1403 entity[l + 1] =
';';
1410 if (mcpr[0].leading_entry.default_entity !=
NULL) {
1412 memcpy(&entity[1], mcpr[0].leading_entry.default_entity, l);
1413 entity[l + 1] =
';';
1417 for (i = 1; i <= num_entries; i++) {
1433 written_k2 = write_octet_sequence((
unsigned char*)&
key[written_k1],
charset, spe_cp);
1434 memcpy(&entity[1], mcpr[i].normal_entry.entity, l);
1435 entity[l + 1] =
';';
1450 char *charset_hint =
NULL;
1451 size_t charset_hint_len;
1465 charset = determine_charset(charset_hint, 0);
1471 entity_table = determine_entity_table((
int)all, doctype);
1473 to_uni_table = enc_to_uni_index[
charset];
1481 max_i, max_j, max_k;
1484 max_i = 1; max_j = 4; max_k = 64;
1486 max_i = 0x1E; max_j = 64; max_k = 64;
1489 for (i = 0; i < max_i; i++) {
1490 if (ms_table[i] == empty_stage2_table)
1492 for (
j = 0;
j < max_j;
j++) {
1493 if (ms_table[i][
j] == empty_stage3_table)
1495 for (k = 0; k < max_k; k++) {
1514 for (i = 0; i <= 0xFF; i++) {
1523 map_to_unicode(i, to_uni_table, &uni_cp);
1534 numelems =
sizeof(stage3_table_be_noapos_00000) /
1535 sizeof(*stage3_table_be_noapos_00000);
1537 for (
j = 0;
j < numelems;
j++) {
htmlspecialchars_decode(string $string, int $flags=ENT_QUOTES|ENT_SUBSTITUTE|ENT_HTML401)
get_html_translation_table(int $table=HTML_SPECIALCHARS, int $flags=ENT_QUOTES|ENT_SUBSTITUTE|ENT_HTML401, string $encoding="UTF-8")
htmlentities(string $string, int $flags=ENT_QUOTES|ENT_SUBSTITUTE|ENT_HTML401, ?string $encoding=null, bool $double_encode=true)
htmlspecialchars(string $string, int $flags=ENT_QUOTES|ENT_SUBSTITUTE|ENT_HTML401, ?string $encoding=null, bool $double_encode=true)
assert(mixed $assertion, Throwable|string|null $description=null)
html_entity_decode(string $string, int $flags=ENT_QUOTES|ENT_SUBSTITUTE|ENT_HTML401, ?string $encoding=null)
ZEND_TLS const struct php_win32_cp * orig_cp
zend_ffi_ctype_name_buf buf
hash(string $algo, string $data, bool $binary=false, array $options=[])
PHPAPI zend_string * php_unescape_html_entities(zend_string *str, int all, int flags, const char *hint_charset)
#define MB_FAILURE(pos, advance)
PHPAPI zend_string * php_escape_html_entities(const unsigned char *old, size_t oldlen, int all, int flags, const char *hint_charset)
#define CHECK_LEN(pos, chars_need)
#define LIMIT_ALL(all, doctype, charset)
#define TRAVERSE_FOR_ENTITIES_EXPAND_SIZE(oldlen)
PHPAPI unsigned int php_next_utf8_char(const unsigned char *str, size_t str_len, size_t *cursor, zend_result *status)
PHPAPI zend_string * php_escape_html_entities_ex(const unsigned char *old, size_t oldlen, int all, int flags, const char *hint_charset, bool double_encode, bool quiet)
#define ENT_STAGE1_INDEX(k)
#define ENT_CODE_POINT_FROM_STAGES(i, j, k)
#define CHARSET_UNICODE_COMPAT(cs)
#define LONGEST_ENTITY_LENGTH
const entity_stage3_row *const * entity_stage1_row
#define ENT_STAGE3_INDEX(k)
#define CHARSET_SINGLE_BYTE(cs)
#define ENT_ENC_TO_UNI_STAGE1(k)
#define CHARSET_PARTIAL_SUPPORT(cs)
#define ENT_STAGE2_INDEX(k)
#define ENT_ENC_TO_UNI_STAGE2(k)
enum entity_charset charset
PHPAPI ZEND_COLD void php_error_docref(const char *docref, int type, const char *format,...)
const mbfl_encoding * internal_encoding
unsigned const char * pos
unsigned char key[REFLECTION_KEY_LEN]
#define ENT_HTML_QUOTE_SINGLE
#define ENT_HTML_DOC_TYPE_MASK
#define ENT_HTML_IGNORE_ERRORS
#define ENT_HTML_SUBSTITUTE_DISALLOWED_CHARS
#define ENT_HTML_DOC_XHTML
#define PHP_HTML_SPECIALCHARS
#define ENT_HTML_SUBSTITUTE_ERRORS
#define ENT_HTML_DOC_HTML401
#define ENT_HTML_DOC_HTML5
#define ENT_HTML_DOC_XML1
#define ENT_HTML_QUOTE_DOUBLE
union entity_stage3_row::@214357234100363062352162052152361021062044212245 data
unsigned short entity_len
struct entity_stage3_row::@214357234100363062352162052152361021062044212245::@364213017142012276145045127161070207325335136340 ent
const entity_multicodepoint_row * multicodepoint_table
const entity_stage3_row * table
const entity_stage1_row * ms_table
struct entity_multicodepoint_row::@366170117123122361032232327262075152115160367061 leading_entry
const char * default_entity
unsigned short default_entity_len
unsigned short entity_len
struct entity_multicodepoint_row::@041214021362036204215357310010366240214016323110 normal_entry
#define INTERNAL_FUNCTION_PARAMETERS
#define INTERNAL_FUNCTION_PARAM_PASSTHRU
ZEND_API void add_assoc_stringl_ex(zval *arg, const char *key, size_t key_len, const char *str, size_t length)
#define ZEND_PARSE_PARAMETERS_END()
#define Z_PARAM_STR_OR_NULL(dest)
#define Z_PARAM_STRING(dest, dest_len)
#define Z_PARAM_STR(dest)
#define ZEND_PARSE_PARAMETERS_START(min_num_args, max_num_args)
#define Z_PARAM_LONG(dest)
#define Z_PARAM_BOOL(dest)
#define ZEND_STRTOL(s0, s1, base)
struct _zend_string zend_string
ZEND_API int ZEND_FASTCALL zend_binary_strcasecmp(const char *s1, size_t len1, const char *s2, size_t len2)
#define ZSTR_EMPTY_ALLOC()
ZEND_RESULT_CODE zend_result