35static size_t mb_htmlent_to_wchar(
unsigned char **in,
size_t *in_len, uint32_t *
buf,
size_t bufsize,
unsigned int *
state);
38static const int htmlentitifieds[256] = {
39 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
40 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
41 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0,
42 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0,
43 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
44 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
45 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
46 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
47 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
48 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
49 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
50 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
51 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
52 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
53 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
54 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
57static const char *mbfl_encoding_html_ent_aliases[] = {
"HTML",
"html",
NULL};
63 mbfl_encoding_html_ent_aliases,
95#define CK(statement) do { if ((statement) < 0) return (-1); } while (0)
107 if (c <
sizeof(htmlentitifieds) /
sizeof(htmlentitifieds[0]) &&
108 htmlentitifieds[c] != 1) {
116 for (
p = e->
name; *
p !=
'\0';
p++) {
124 int *
p = tmp +
sizeof(tmp) /
sizeof(tmp[0]);
128 uc = (
unsigned int)c;
132 *(--
p) =
"0123456789"[uc % 10];
136 for (; *
p !=
'\0';
p++) {
161#define html_enc_buffer_size 16
162static const char html_entity_chars[] =
"#0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
183 unsigned int ent = 0;
202 if (
v >=
'0' &&
v <=
'9') {
204 }
else if (
v >=
'A' &&
v <=
'F') {
206 }
else if (
v >=
'a' &&
v <=
'f') {
221 if (ent > 0x19999999) {
226 if (
v >=
'0' &&
v <=
'9') {
238 if (ent < 0x110000) {
251 while (entity->
name) {
270 while (filter->
status--) {
290 while (filter->
status--) {
338static bool is_html_entity_char(
unsigned char c)
340 return (c >=
'0' && c <=
'9') || (c >=
'A' && c <=
'Z') || (c >=
'a' && c <=
'z') || c ==
'#';
343static size_t mb_htmlent_to_wchar(
unsigned char **in,
size_t *in_len, uint32_t *
buf,
size_t bufsize,
unsigned int *
state)
345 unsigned char *
p = *in, *e =
p + *in_len;
346 uint32_t *
out =
buf, *limit =
buf + bufsize;
348 while (
p < e &&
out < limit) {
349 unsigned char c = *
p++;
353 unsigned char *terminator =
p;
354 while (terminator < e && is_html_entity_char(*terminator))
356 if (terminator < e && *terminator ==
';') {
357 if (*
p ==
'#' && (e -
p) >= 2) {
359 unsigned int value = 0;
360 unsigned char *digits =
p + 1;
361 if (*digits ==
'x' || *digits ==
'X') {
364 if (digits == terminator) {
367 while (digits < terminator) {
368 unsigned char digit = *digits++;
369 if (digit >=
'0' && digit <=
'9') {
371 }
else if (digit >=
'A' && digit <=
'F') {
373 }
else if (digit >=
'a' && digit <=
'f') {
381 if (digits == terminator) {
384 while (digits < terminator) {
385 unsigned char digit = *digits++;
386 if (digit >=
'0' && digit <=
'9') {
393 if (
value > 0x10FFFF) {
399 }
else if (terminator >
p && terminator < e) {
402 while (entity->
name) {
415 while (
p < terminator &&
out < limit) {
418 if (terminator < e && *terminator ==
';' &&
out < limit) {
435 unsigned char *
out, *limit;
442 if (w <
sizeof(htmlentitifieds) /
sizeof(htmlentitifieds[0]) && htmlentitifieds[w] != 1) {
444 out = mb_convert_buf_add(
out, w);
446 out = mb_convert_buf_add(
out,
'&');
450 while (entity->
name) {
451 if (w == entity->
code) {
453 for (
char *str = entity->
name; *str; str++) {
454 out = mb_convert_buf_add(
out, *str);
456 out = mb_convert_buf_add(
out,
';');
464 out = mb_convert_buf_add(
out,
'#');
467 out = mb_convert_buf_add(
out,
'0');
469 unsigned char buf[12];
470 unsigned char *converted =
buf +
sizeof(
buf);
472 *(--converted) =
"0123456789"[w % 10];
475 while (converted <
buf +
sizeof(
buf)) {
476 out = mb_convert_buf_add(
out, *converted++);
480 out = mb_convert_buf_add(
out,
';');
strchr(string $haystack, string $needle, bool $before_needle=false)
zend_ffi_ctype_name_buf buf
const mbfl_html_entity_entry mbfl_html_entity_list[]
struct _mbfl_html_entity_entry mbfl_html_entity_entry
const mbfl_encoding mbfl_encoding_html_ent
const struct mbfl_convert_vtbl vtbl_wchar_html
const struct mbfl_convert_vtbl vtbl_html_wchar
void mbfl_filt_conv_html_dec_ctor(mbfl_convert_filter *filter)
int mbfl_filt_conv_html_dec(int c, mbfl_convert_filter *filter)
#define html_enc_buffer_size
int mbfl_filt_conv_html_enc(int c, mbfl_convert_filter *filter)
int mbfl_filt_conv_html_enc_flush(mbfl_convert_filter *filter)
int mbfl_filt_conv_html_dec_flush(mbfl_convert_filter *filter)
void mbfl_filt_conv_html_dec_copy(mbfl_convert_filter *src, mbfl_convert_filter *dest)
void mbfl_filt_conv_html_dec_dtor(mbfl_convert_filter *filter)
#define MBFL_ENCTYPE_GL_UNSAFE
void mbfl_filt_conv_common_ctor(mbfl_convert_filter *filter)
struct _mbfl_convert_filter mbfl_convert_filter
@ mbfl_no_encoding_html_ent
#define MB_CONVERT_BUF_STORE(buf, _out, _limit)
#define MB_CONVERT_BUF_ENSURE(buf, out, limit, needed)
#define MB_CONVERT_BUF_LOAD(buf, _out, _limit)
unsigned const char * end
unsigned const char * pos
output_function_t output_function
flush_function_t flush_function
strncmp(string $string1, string $string2, int $length)
strcmp(string $string1, string $string2)