22#if defined(HAVE_LIBXML) && defined(HAVE_DOM)
35#define DOM_FALLBACK_ENCODING_ID LXB_ENCODING_UTF_8
37typedef struct dom_line_column_cache {
41} dom_line_column_cache;
43typedef struct dom_lexbor_libxml2_bridge_application_data {
44 const char *input_name;
46 const char *current_input_characters;
47 size_t current_input_length;
48 size_t current_total_offset;
49 dom_line_column_cache cache_tokenizer;
51} dom_lexbor_libxml2_bridge_application_data;
53typedef struct dom_character_encoding_data {
56} dom_character_encoding_data;
58typedef zend_result (*dom_write_output)(
void*,
const char *, size_t);
60typedef struct dom_output_ctx {
63 lxb_encoding_encode_t *
encode;
64 lxb_encoding_decode_t *decode;
68 dom_write_output write_output;
71typedef struct dom_decoding_encoding_ctx {
75 lxb_encoding_encode_t
encode;
76 lxb_encoding_decode_t decode;
81} dom_decoding_encoding_ctx;
86 const uint32_t PROP_INDEX = 0;
105static void dom_decoding_encoding_ctx_init(dom_decoding_encoding_ctx *ctx)
108 ctx->fast_path =
true;
112 ctx->encoding_output,
113 sizeof(ctx->encoding_output) /
sizeof(*ctx->encoding_output)
120 sizeof(ctx->codepoints) /
sizeof(*ctx->codepoints)
177 default:
return "unknown error";
220 default:
return "unknown error";
231 default:
return "unknown error";
235static void dom_reset_line_column_cache(dom_line_column_cache *cache)
237 cache->last_line = 1;
238 cache->last_column = 1;
239 cache->last_offset = 0;
242static void dom_find_line_and_column_using_cache(
243 const dom_lexbor_libxml2_bridge_application_data *application_data,
244 dom_line_column_cache *cache,
248 offset -= application_data->current_total_offset;
249 if (
offset > application_data->current_input_length) {
251 offset = application_data->current_input_length;
254 size_t last_column = cache->last_column;
256 size_t last_offset = cache->last_offset;
259 if (application_data->current_input_codepoints !=
NULL) {
260 while (last_offset <
offset) {
261 if (application_data->current_input_codepoints[last_offset] == 0x000A ) {
270 while (last_offset <
offset) {
271 const lxb_char_t current = application_data->current_input_characters[last_offset];
280 if ((
current & 0b11000000) != 0b10000000) {
288 cache->last_column = last_column;
290 cache->last_offset = last_offset;
293static void dom_lexbor_libxml2_bridge_tokenizer_error_reporter(
294 void *application_data_voidptr,
299 dom_lexbor_libxml2_bridge_application_data *application_data = application_data_voidptr;
300 dom_find_line_and_column_using_cache(application_data, &application_data->cache_tokenizer,
offset);
301 php_libxml_pretend_ctx_error_ex(application_data->input_name, application_data->cache_tokenizer.last_line, application_data->cache_tokenizer.last_column,
"tokenizer error %s in %s, line: %zu, column: %zu\n", dom_lexbor_tokenizer_error_code_to_string(
error->id), application_data->input_name, application_data->cache_tokenizer.last_line, application_data->cache_tokenizer.last_column);
304static void dom_lexbor_libxml2_bridge_tree_error_reporter(
305 void *application_data_voidptr,
312 dom_lexbor_libxml2_bridge_application_data *application_data = application_data_voidptr;
321 php_libxml_pretend_ctx_error_ex(
322 application_data->input_name,
325 "tree error %s in %s, line: %zu, column: %zu\n",
326 dom_lexbor_tree_error_code_to_string(
error->id),
327 application_data->input_name,
332 php_libxml_pretend_ctx_error_ex(
333 application_data->input_name,
336 "tree error %s in %s, line: %zu, column: %zu-%zu\n",
337 dom_lexbor_tree_error_code_to_string(
error->id),
338 application_data->input_name,
346static xmlNodePtr dom_search_child(xmlNodePtr parent,
const char *searching_for)
348 xmlNodePtr node = parent->children;
349 while (node !=
NULL) {
358static void dom_place_remove_element_and_hoist_children(xmlNodePtr parent,
const char *searching_for)
360 xmlNodePtr node = dom_search_child(parent, searching_for);
364 xmlNodePtr child = node->children;
365 while (child !=
NULL) {
366 xmlUnlinkNode(child);
367 xmlAddChild(parent, child);
368 child = node->children;
375static void dom_post_process_html5_loading(
381 if (
options & HTML_PARSE_NOIMPLIED) {
382 xmlNodePtr html_node = dom_search_child((xmlNodePtr) lxml_doc,
"html");
384 dom_place_remove_element_and_hoist_children(html_node,
"head");
387 dom_place_remove_element_and_hoist_children(html_node,
"body");
390 dom_place_remove_element_and_hoist_children((xmlNodePtr) lxml_doc,
"html");
396static dom_character_encoding_data dom_determine_encoding(
const char *source,
size_t source_len)
398 dom_character_encoding_data
result;
401 if (source_len >= 3 && source[0] ==
'\xEF' && source[1] ==
'\xBB' && source[2] ==
'\xBF') {
405 }
else if (source_len >= 2) {
406 if (source[0] ==
'\xFE' && source[1] ==
'\xFF') {
410 }
else if (source[0] ==
'\xFF' && source[1] ==
'\xFE') {
421 goto fallback_uninit;
424 if (source_len > 1024) {
436 if (!
result.encoding_data) {
451static void dom_setup_parser_encoding_manually(
const lxb_char_t *buf_start,
const lxb_encoding_data_t *encoding_data, dom_decoding_encoding_ctx *decoding_encoding_ctx, dom_lexbor_libxml2_bridge_application_data *application_data)
455 decoding_encoding_ctx->decode_data = encoding_data;
458 &decoding_encoding_ctx->decode,
459 decoding_encoding_ctx->decode_data,
460 decoding_encoding_ctx->codepoints,
461 sizeof(decoding_encoding_ctx->codepoints) /
sizeof(*decoding_encoding_ctx->codepoints)
464 &decoding_encoding_ctx->decode,
465 &replacement_codepoint,
469 decoding_encoding_ctx->fast_path = decoding_encoding_ctx->decode_data == decoding_encoding_ctx->encode_data;
471 if (decoding_encoding_ctx->fast_path) {
472 application_data->current_input_codepoints =
NULL;
473 application_data->current_input_characters = (
const char *) buf_start;
475 application_data->current_input_codepoints = decoding_encoding_ctx->codepoints;
476 application_data->current_input_characters =
NULL;
480static void dom_setup_parser_encoding_implicitly(
483 dom_decoding_encoding_ctx *decoding_encoding_ctx,
484 dom_lexbor_libxml2_bridge_application_data *application_data
487 const char *buf_start = (
const char *) *buf_ref;
488 dom_character_encoding_data dom_encoding_data = dom_determine_encoding(buf_start, *read);
489 *buf_ref += dom_encoding_data.bom_shift;
490 *read -= dom_encoding_data.bom_shift;
491 dom_setup_parser_encoding_manually((
const lxb_char_t *) buf_start, dom_encoding_data.encoding_data, decoding_encoding_ctx, application_data);
494static bool dom_process_parse_chunk(
498 size_t encoded_length,
500 size_t input_buffer_length,
501 size_t *tokenizer_error_offset,
502 size_t *tree_error_offset
505 dom_lexbor_libxml2_bridge_application_data *application_data = ctx->
application_data;
506 application_data->current_input_length = input_buffer_length;
513 dom_find_line_and_column_using_cache(application_data, &application_data->cache_tokenizer, application_data->current_total_offset + input_buffer_length);
515 application_data->current_total_offset += input_buffer_length;
516 application_data->cache_tokenizer.last_offset = 0;
520static bool dom_decode_encode_fast_path(
526 dom_decoding_encoding_ctx *decoding_encoding_ctx,
527 size_t *tokenizer_error_offset,
528 size_t *tree_error_offset
543 if (!dom_process_parse_chunk(
549 buf_ref - *buf_ref_ref,
550 tokenizer_error_offset,
558 while (buf_ref != buf_end) {
560 if (decoding_encoding_ctx->decode.u.utf_8.need == 0 && *buf_ref < 0x80) {
571 size_t skip = buf_ref - buf_ref_backup;
572 if (!dom_process_parse_chunk(
576 buf_ref - last_output - skip,
578 buf_ref - last_output,
579 tokenizer_error_offset,
590 *buf_ref_ref = buf_ref;
595 if (!dom_process_parse_chunk(
602 tokenizer_error_offset,
608 last_output = buf_ref;
611 if (buf_ref != last_output
612 && !dom_process_parse_chunk(
616 buf_ref - last_output,
618 buf_ref - last_output,
619 tokenizer_error_offset,
624 *buf_ref_ref = buf_ref;
627 *buf_ref_ref = buf_ref;
631static bool dom_decode_encode_slow_path(
637 dom_decoding_encoding_ctx *decoding_encoding_ctx,
638 size_t *tokenizer_error_offset,
639 size_t *tree_error_offset
645 decode_status = decoding_encoding_ctx->decode_data->decode(&decoding_encoding_ctx->decode, &buf_ref, buf_end);
649 const lxb_codepoint_t *codepoints_end = decoding_encoding_ctx->codepoints + decoding_buffer_used;
651 encode_status = decoding_encoding_ctx->encode_data->encode(&decoding_encoding_ctx->encode, &codepoints_ref, codepoints_end);
653 if (!dom_process_parse_chunk(
658 decoding_encoding_ctx->encoding_output,
659 decoding_buffer_used,
660 tokenizer_error_offset,
669 *buf_ref_ref = buf_ref;
672 *buf_ref_ref = buf_ref;
676static bool dom_parse_decode_encode_step(
682 dom_decoding_encoding_ctx *decoding_encoding_ctx,
683 size_t *tokenizer_error_offset,
684 size_t *tree_error_offset
687 if (decoding_encoding_ctx->fast_path) {
688 return dom_decode_encode_fast_path(
694 decoding_encoding_ctx,
695 tokenizer_error_offset,
699 return dom_decode_encode_slow_path(
705 decoding_encoding_ctx,
706 tokenizer_error_offset,
712static bool dom_parse_decode_encode_finish(
716 dom_decoding_encoding_ctx *decoding_encoding_ctx,
717 size_t *tokenizer_error_offset,
718 size_t *tree_error_offset
727 if (decoding_buffer_size > 0) {
729 const lxb_codepoint_t *codepoints_end = codepoints_ref + decoding_buffer_size;
730 status = decoding_encoding_ctx->encode_data->encode(&decoding_encoding_ctx->encode, &codepoints_ref, codepoints_end);
738 && !dom_process_parse_chunk(
743 decoding_encoding_ctx->encoding_output,
745 tokenizer_error_offset,
756 if ((
options & ~VALID_OPTIONS) != 0) {
760 "LIBXML_HTML_NOIMPLIED, "
761 "Dom\\HTML_NO_DEFAULT_NS)");
770 size_t encoding_len =
strlen(
"UTF-8");
777 if (encoding_data ==
NULL) {
787 lxml_doc->encoding = xmlStrdup((
const xmlChar *)
encoding);
792 (xmlNodePtr) lxml_doc,
807 if (
options & XML_PARSE_NOERROR) {
816 const char *source, *override_encoding =
NULL;
817 size_t source_len, override_encoding_len;
826 &override_encoding_len
831 if (!check_options_validity(2,
options)) {
835 dom_lexbor_libxml2_bridge_application_data application_data;
836 application_data.input_name =
"Entity";
837 application_data.current_total_offset = 0;
838 application_data.html_no_implied =
options & HTML_PARSE_NOIMPLIED;
839 dom_reset_line_column_cache(&application_data.cache_tokenizer);
842 if (dom_should_register_error_handlers(
options)) {
845 dom_lexbor_libxml2_bridge_tokenizer_error_reporter,
846 dom_lexbor_libxml2_bridge_tree_error_reporter
851 size_t tokenizer_error_offset = 0;
852 size_t tree_error_offset = 0;
856 dom_decoding_encoding_ctx decoding_encoding_ctx;
857 dom_decoding_encoding_ctx_init(&decoding_encoding_ctx);
858 if (override_encoding !=
NULL) {
861 override_encoding_len
863 if (!encoding_data) {
867 dom_setup_parser_encoding_manually(buf_ref, encoding_data, &decoding_encoding_ctx, &application_data);
869 dom_setup_parser_encoding_implicitly(&buf_ref, &source_len, &decoding_encoding_ctx, &application_data);
884 while (source_len > 0) {
885 size_t chunk_size = source_len;
886 const size_t MAX_CHUNK_SIZE =
sizeof(decoding_encoding_ctx.encoding_output) /
sizeof(*decoding_encoding_ctx.encoding_output);
887 if (chunk_size > MAX_CHUNK_SIZE) {
888 chunk_size = MAX_CHUNK_SIZE;
890 source_len -= chunk_size;
892 const lxb_char_t *buf_end = buf_ref + chunk_size;
893 bool result = dom_parse_decode_encode_step(
899 &decoding_encoding_ctx,
900 &tokenizer_error_offset,
910 if (application_data.current_input_characters) {
911 application_data.current_input_characters += chunk_size;
915 if (!dom_parse_decode_encode_finish(&ctx, document, parser, &decoding_encoding_ctx, &tokenizer_error_offset, &tree_error_offset)) {
937 php_libxml_ctx_error(
940 dom_lexbor_libxml2_bridge_status_code_to_string(bridge_status),
941 application_data.input_name
950 if (decoding_encoding_ctx.decode_data) {
951 lxml_doc->encoding = xmlStrdup((
const xmlChar *) decoding_encoding_ctx.decode_data->name);
953 lxml_doc->encoding = xmlStrdup((
const xmlChar *)
"UTF-8");
959 (xmlNodePtr) lxml_doc,
975 const char *filename, *override_encoding =
NULL;
987 &override_encoding_len
993 if (
strstr(filename,
"%00")) {
998 if (!check_options_validity(2,
options)) {
1002 dom_lexbor_libxml2_bridge_application_data application_data;
1003 application_data.input_name = filename;
1004 application_data.current_total_offset = 0;
1005 application_data.html_no_implied =
options & HTML_PARSE_NOIMPLIED;
1006 dom_reset_line_column_cache(&application_data.cache_tokenizer);
1009 if (dom_should_register_error_handlers(
options)) {
1012 dom_lexbor_libxml2_bridge_tokenizer_error_reporter,
1013 dom_lexbor_libxml2_bridge_tree_error_reporter
1021 dom_decoding_encoding_ctx decoding_encoding_ctx;
1022 dom_decoding_encoding_ctx_init(&decoding_encoding_ctx);
1023 bool should_determine_encoding_implicitly =
true;
1024 if (override_encoding !=
NULL) {
1027 override_encoding_len
1029 if (!encoding_data) {
1033 should_determine_encoding_implicitly =
false;
1034 dom_setup_parser_encoding_manually((
const lxb_char_t *)
buf, encoding_data, &decoding_encoding_ctx, &application_data);
1047 if (should_determine_encoding_implicitly) {
1054 if (encoding_data !=
NULL) {
1055 should_determine_encoding_implicitly =
false;
1056 dom_setup_parser_encoding_manually(
1059 &decoding_encoding_ctx,
1077 size_t tokenizer_error_offset = 0;
1078 size_t tree_error_offset = 0;
1085 if (should_determine_encoding_implicitly) {
1086 should_determine_encoding_implicitly =
false;
1087 dom_setup_parser_encoding_implicitly(&buf_ref, (
size_t *) &read, &decoding_encoding_ctx, &application_data);
1091 bool result = dom_parse_decode_encode_step(
1097 &decoding_encoding_ctx,
1098 &tokenizer_error_offset,
1106 if (!dom_parse_decode_encode_finish(&ctx, document, parser, &decoding_encoding_ctx, &tokenizer_error_offset, &tree_error_offset)) {
1127 php_libxml_ctx_error(
NULL,
"%s in %s", dom_lexbor_libxml2_bridge_status_code_to_string(bridge_status), filename);
1135 if (decoding_encoding_ctx.decode_data) {
1136 lxml_doc->encoding = xmlStrdup((
const xmlChar *) decoding_encoding_ctx.decode_data->name);
1138 lxml_doc->encoding = xmlStrdup((
const xmlChar *)
"UTF-8");
1142 xmlChar *converted = xmlPathToURI((
const xmlChar *)
ZSTR_VAL(opened_path));
1147 if (
strncmp((
const char *) converted,
"file:/",
sizeof(
"file:/") - 1) != 0) {
1148 xmlChar *
buffer = xmlStrdup((
const xmlChar *)
"file://");
1153 xmlChar *new_buffer = xmlStrcat(
buffer, converted);
1160 lxml_doc->URL = new_buffer;
1165 lxml_doc->URL = converted;
1168 lxml_doc->URL = xmlStrdup((
const xmlChar *) filename);
1171 if (opened_path !=
NULL) {
1180 (xmlNodePtr) lxml_doc,
1191 if (private_data !=
NULL) {
1196 if (opened_path !=
NULL) {
1201static zend_result dom_write_output_smart_str(
void *ctx,
const char *
buf,
size_t size)
1207static zend_result dom_write_output_stream(
void *application_data,
const char *
buf,
size_t len)
1216static zend_result dom_saveHTML_write_string_len(
void *application_data,
const char *
buf,
size_t len)
1218 dom_output_ctx *output = (dom_output_ctx *) application_data;
1224 decode_status = output->decoding_data->decode(output->decode, &buf_ref, buf_end);
1229 encode_status = output->encoding_data->encode(output->encode, &codepoints_ref, codepoints_end);
1231 output->output_data,
1232 (
const char *) output->encoding_output,
1245static zend_result dom_saveHTML_write_string(
void *application_data,
const char *
buf)
1247 return dom_saveHTML_write_string_len(application_data,
buf,
strlen(
buf));
1250static zend_result dom_common_save(dom_output_ctx *output_ctx,
dom_object *intern,
const xmlDoc *docp,
const xmlNode *node)
1256 strlen((
const char *) docp->encoding)
1272 output_ctx->encoding_data = encoding_data;
1273 output_ctx->decoding_data = decoding_data;
1274 output_ctx->encode = &
encode;
1275 output_ctx->decode = &decode;
1276 output_ctx->codepoints = codepoints;
1277 output_ctx->encoding_output = encoding_output;
1293 output_ctx->output_data,
1294 (
const char *) encoding_output,
1303 output_ctx->output_data,
1304 (
const char *) encoding_output,
1327 if (file_len == 0) {
1339 dom_output_ctx output_ctx;
1340 output_ctx.output_data = stream;
1341 output_ctx.write_output = dom_write_output_stream;
1342 if (
UNEXPECTED(dom_common_save(&output_ctx, intern, docp, (
const xmlNode *) docp) !=
SUCCESS)) {
1357 const xmlNode *node;
1366 if (nodep !=
NULL) {
1368 if (node->doc != docp) {
1373 node = (
const xmlNode *) docp;
1377 dom_output_ctx output_ctx;
1378 output_ctx.output_data = &
buf;
1379 output_ctx.write_output = dom_write_output_smart_str;
1397 if (encoding_data !=
NULL) {
1398 xmlFree(BAD_CAST docp->encoding);
1399 docp->encoding = xmlStrdup((
const xmlChar *) encoding_data->
name);
1408static xmlNodePtr dom_html_document_element_read_raw(
const xmlDoc *docp,
bool (*accept)(
const xmlChar *))
1410 const xmlNode *root = xmlDocGetRootElement(docp);
1415 xmlNodePtr cur = root->children;
1416 while (cur !=
NULL) {
1430 const xmlNode *element = dom_html_document_element_read_raw(docp, accept);
1436static bool dom_accept_body_name(
const xmlChar *
name)
1438 return xmlStrEqual(
name, BAD_CAST
"body") || xmlStrEqual(
name, BAD_CAST
"frameset");
1441static bool dom_accept_head_name(
const xmlChar *
name)
1443 return xmlStrEqual(
name, BAD_CAST
"head");
1449 return dom_html_document_element_read_helper(obj,
retval, dom_accept_body_name);
1455 return dom_html_document_element_read_helper(obj,
retval, dom_accept_head_name);
1466 if (newval_intern->
ptr !=
NULL) {
1467 xmlNodePtr newval_node = ((php_libxml_node_ptr *) newval_intern->
ptr)->node;
1469 if (dom_accept_body_name(newval_node->name)) {
1471 const xmlNode *current_body_element = dom_html_document_element_read_raw(docp, dom_accept_body_name);
1472 if (current_body_element == newval_node) {
1477 if (current_body_element !=
NULL) {
1479 xmlNodePtr old = xmlReplaceNode((xmlNodePtr) current_body_element, newval_node);
1480 if (old !=
NULL && old->_private ==
NULL) {
1481 php_libxml_node_free_resource(old);
1487 xmlNodePtr root = xmlDocGetRootElement(docp);
1495 xmlAddChild(root, newval_node);
1506static zend_string *dom_get_child_text_content(
const xmlNode *node)
1510 const xmlNode *
text = node->children;
1513 smart_str_appends(&content, (
const char *)
text->content);
1518 return smart_str_extract(&content);
1522static xmlNodePtr dom_get_title_element(
const xmlDoc *doc)
1524 xmlNodePtr node = doc->children;
1526 while (node !=
NULL) {
1533 node = php_dom_next_in_tree_order(node,
NULL);
1541static xmlNodePtr dom_get_svg_title_element(xmlNodePtr svg)
1543 xmlNodePtr cur = svg->children;
1545 while (cur !=
NULL) {
1560 xmlNodePtr root = xmlDocGetRootElement(docp);
1572 const xmlNode *title = dom_get_svg_title_element(root);
1573 if (title !=
NULL) {
1574 value = dom_get_child_text_content(title);
1579 const xmlNode *title = dom_get_title_element(docp);
1580 if (title !=
NULL) {
1581 value = dom_get_child_text_content(title);
1594static void dom_string_replace_all(xmlDocPtr docp, xmlNodePtr element,
zval *
zv)
1598 xmlAddChild(element,
text);
1605 xmlNodePtr root = xmlDocGetRootElement(docp);
1614 xmlNodePtr element = dom_get_svg_title_element(root);
1617 if (element ==
NULL) {
1622 xmlNsPtr ns = root->ns;
1623 if (ns->prefix !=
NULL) {
1631 element = xmlNewDocNode(docp, ns, BAD_CAST
"title",
NULL);
1638 if (root->children ==
NULL) {
1639 root->last = element;
1641 element->next = root->children;
1642 root->children->prev = element;
1644 root->children = element;
1645 element->parent = root;
1649 dom_string_replace_all(docp, element, newval);
1654 xmlNodePtr title = dom_get_title_element(docp);
1655 xmlNodePtr
head = dom_html_document_element_read_raw(docp, dom_accept_head_name);
1661 xmlNodePtr element = title;
1664 if (element ==
NULL) {
1675 xmlAddChild(
head, element);
1679 dom_string_replace_all(docp, element, newval);
1686PHP_METHOD(Dom_HTMLDocument, debugGetTemplateCount)
file(string $filename, int $flags=0, $context=null)
strstr(string $haystack, string $needle, bool $before_needle=false)
@ LXB_STATUS_SMALL_BUFFER
lxb_codepoint_t lxb_encoding_decode_utf_8_single(lxb_encoding_decode_t *ctx, const lxb_char_t **data, const lxb_char_t *end)
const lxb_encoding_data_t * lxb_encoding_data_by_pre_name(const lxb_char_t *name, size_t length)
lxb_status_t lxb_html_encoding_init(lxb_html_encoding_t *em)
lxb_html_encoding_t * lxb_html_encoding_destroy(lxb_html_encoding_t *em, bool self_destroy)
lxb_status_t lxb_html_encoding_determine(lxb_html_encoding_t *em, const lxb_char_t *data, const lxb_char_t *end)
PHP_DOM_EXPORT zend_class_entry * dom_modern_node_class_entry
PHP_DOM_EXPORT zend_class_entry * dom_html_document_class_entry
PHP_DOM_EXPORT zend_class_entry * dom_abstract_base_document_class_entry
zend_result dom_html_document_encoding_write(dom_object *obj, zval *retval)
zend_result dom_modern_document_implementation_read(dom_object *obj, zval *retval)
zend_result dom_html_document_body_write(dom_object *obj, zval *newval)
zend_result dom_html_document_title_write(dom_object *obj, zval *newval)
zend_result dom_html_document_head_read(dom_object *obj, zval *retval)
zend_result dom_html_document_body_read(dom_object *obj, zval *retval)
zend_result dom_html_document_title_read(dom_object *obj, zval *retval)
#define DOM_PROP_NODE(type, name, obj)
void php_dom_throw_error_with_message(dom_exception_code error_code, const char *error_message, bool strict_error)
void php_dom_throw_error(dom_exception_code error_code, bool strict_error)
int8_t lxb_encoding_encode_utf_8_single(lxb_encoding_encode_t *ctx, lxb_char_t **data, const lxb_char_t *end, lxb_codepoint_t cp)
@ LXB_ENCODING_DECODE_CONTINUE
#define LXB_ENCODING_REPLACEMENT_BYTES
@ LXB_ENCODING_REPLACEMENT_SIZE
@ LXB_ENCODING_MAX_CODEPOINT
@ LXB_ENCODING_REPLACEMENT_CODEPOINT
#define LXB_ENCODING_REPLACEMENT_BUFFER
struct lxb_encoding_data lxb_encoding_data_t
#define LXB_ENCODING_REPLACEMENT_BUFFER_LEN
lxb_inline lxb_status_t lxb_encoding_encode_finish(lxb_encoding_encode_t *encode)
lxb_inline lxb_status_t lxb_encoding_encode_init(lxb_encoding_encode_t *encode, const lxb_encoding_data_t *encoding_data, lxb_char_t *buffer_out, size_t buffer_length)
lxb_inline const lxb_encoding_data_t * lxb_encoding_data_by_name(const lxb_char_t *name, size_t length)
lxb_inline const lxb_encoding_data_t * lxb_encoding_data(lxb_encoding_t encoding)
lxb_inline lxb_status_t lxb_encoding_decode_replace_set(lxb_encoding_decode_t *decode, const lxb_codepoint_t *replace, size_t length)
lxb_inline lxb_status_t lxb_encoding_decode_init(lxb_encoding_decode_t *decode, const lxb_encoding_data_t *encoding_data, lxb_codepoint_t *buffer_out, size_t buffer_length)
lxb_inline void lxb_encoding_decode_buf_used_set(lxb_encoding_decode_t *decode, size_t buffer_used)
lxb_inline size_t lxb_encoding_decode_buf_used(lxb_encoding_decode_t *decode)
lxb_inline lxb_status_t lxb_encoding_encode_replace_set(lxb_encoding_encode_t *encode, const lxb_char_t *replace, size_t length)
lxb_inline lxb_status_t lxb_encoding_decode_finish(lxb_encoding_decode_t *decode)
lxb_inline size_t lxb_encoding_encode_buf_used(lxb_encoding_encode_t *encode)
lxb_inline void lxb_encoding_encode_buf_used_set(lxb_encoding_encode_t *encode, size_t buffer_used)
zend_ffi_ctype_name_buf buf
void lexbor_libxml2_bridge_parse_set_error_callbacks(lexbor_libxml2_bridge_parse_context *ctx, lexbor_libxml2_bridge_tokenizer_error_reporter tokenizer_error_reporter, lexbor_libxml2_bridge_tree_error_reporter tree_error_reporter)
lexbor_libxml2_bridge_status lexbor_libxml2_bridge_convert_document(lxb_html_document_t *document, xmlDocPtr *doc_out, bool compact_text_nodes, bool create_default_ns, php_dom_private_data *private_data)
lexbor_libxml2_bridge_status
@ LEXBOR_LIBXML2_BRIDGE_STATUS_OVERFLOW
@ LEXBOR_LIBXML2_BRIDGE_STATUS_FATAL_PARSE
@ LEXBOR_LIBXML2_BRIDGE_STATUS_OOM
@ LEXBOR_LIBXML2_BRIDGE_STATUS_OK
@ LEXBOR_LIBXML2_BRIDGE_STATUS_CANNOT_INIT
void lexbor_libxml2_bridge_copy_observations(lxb_html_tree_t *tree, lexbor_libxml2_bridge_extracted_observations *observations)
void lexbor_libxml2_bridge_parse_context_init(lexbor_libxml2_bridge_parse_context *ctx)
void lexbor_libxml2_bridge_report_errors(const lexbor_libxml2_bridge_parse_context *ctx, lxb_html_parser_t *parser, const lxb_char_t *input_html, size_t chunk_offset, size_t *error_index_offset_tokenizer, size_t *error_index_offset_tree)
zend_result dom_html5_serialize_outer(dom_html5_serialize_context *ctx, const xmlNode *node)
lxb_inline lxb_html_encoding_entry_t * lxb_html_encoding_meta_entry(lxb_html_encoding_t *em, size_t idx)
struct lxb_html_document lxb_html_document_t
lxb_html_tokenizer_error_id_t
@ LXB_HTML_TOKENIZER_ERROR_UNCHINUNATVA
@ LXB_HTML_TOKENIZER_ERROR_SUININST
@ LXB_HTML_TOKENIZER_ERROR_INCHSEAFDONA
@ LXB_HTML_TOKENIZER_ERROR_MIDOPUID
@ LXB_HTML_TOKENIZER_ERROR_MIDOSYID
@ LXB_HTML_TOKENIZER_ERROR_INCLCO
@ LXB_HTML_TOKENIZER_ERROR_ABDOPUID
@ LXB_HTML_TOKENIZER_ERROR_NECO
@ LXB_HTML_TOKENIZER_ERROR_MIATVA
@ LXB_HTML_TOKENIZER_ERROR_UNNACHRE
@ LXB_HTML_TOKENIZER_ERROR_MIWHBEDONA
@ LXB_HTML_TOKENIZER_ERROR_UNEQSIBEATNA
@ LXB_HTML_TOKENIZER_ERROR_SUCHRE
@ LXB_HTML_TOKENIZER_ERROR_ABCLOFEMCO
@ LXB_HTML_TOKENIZER_ERROR_COCHRE
@ LXB_HTML_TOKENIZER_ERROR_MIWHAFDOPUKE
@ LXB_HTML_TOKENIZER_ERROR_MIWHAFDOSYKE
@ LXB_HTML_TOKENIZER_ERROR_MISEAFCHRE
@ LXB_HTML_TOKENIZER_ERROR_UNCHINATNA
@ LXB_HTML_TOKENIZER_ERROR_ENTAWIAT
@ LXB_HTML_TOKENIZER_ERROR_EOINTA
@ LXB_HTML_TOKENIZER_ERROR_ENTAWITRSO
@ LXB_HTML_TOKENIZER_ERROR_UNNUCH
@ LXB_HTML_TOKENIZER_ERROR_UNCHAFDOSYID
@ LXB_HTML_TOKENIZER_ERROR_EOINCO
@ LXB_HTML_TOKENIZER_ERROR_EOINSCHTCOLITE
@ LXB_HTML_TOKENIZER_ERROR_MIDONA
@ LXB_HTML_TOKENIZER_ERROR_DUAT
@ LXB_HTML_TOKENIZER_ERROR_INFICHOFTANA
@ LXB_HTML_TOKENIZER_ERROR_ABOFDIINNUCHRE
@ LXB_HTML_TOKENIZER_ERROR_INOPCO
@ LXB_HTML_TOKENIZER_ERROR_MIENTANA
@ LXB_HTML_TOKENIZER_ERROR_NOININST
@ LXB_HTML_TOKENIZER_ERROR_EOBETANA
@ LXB_HTML_TOKENIZER_ERROR_MIQUBEDOPUID
@ LXB_HTML_TOKENIZER_ERROR_COCHININST
@ LXB_HTML_TOKENIZER_ERROR_CHREOUUNRA
@ LXB_HTML_TOKENIZER_ERROR_MIWHBEAT
@ LXB_HTML_TOKENIZER_ERROR_EOINCD
@ LXB_HTML_TOKENIZER_ERROR_MIQUBEDOSYID
@ LXB_HTML_TOKENIZER_ERROR_CDINHTCO
@ LXB_HTML_TOKENIZER_ERROR_MIWHBEDOPUANSYID
@ LXB_HTML_TOKENIZER_ERROR_UNSOINTA
@ LXB_HTML_TOKENIZER_ERROR_ABDOSYID
@ LXB_HTML_TOKENIZER_ERROR_NOCHRE
@ LXB_HTML_TOKENIZER_ERROR_EOINDO
@ LXB_HTML_TOKENIZER_ERROR_UNQUMAINOFTANA
@ LXB_HTML_TOKENIZER_ERROR_NUCHRE
@ LXB_HTML_TOKENIZER_ERROR_NOVOHTELSTTAWITRSO
@ LXB_HTML_RULES_ERROR_DOTOAFHEMO
@ LXB_HTML_RULES_ERROR_TECLTOWIOPINHEMO
@ LXB_HTML_RULES_ERROR_NUCH
@ LXB_HTML_RULES_ERROR_NOVOHTELSTTAWITRSO
@ LXB_HTML_RULES_ERROR_CHINTATE
@ LXB_HTML_RULES_ERROR_DOTOINFRMO
@ LXB_HTML_RULES_ERROR_UNCLTO
@ LXB_HTML_RULES_ERROR_DOTOINBEHEMO
@ LXB_HTML_RULES_ERROR_UNELINOPELST
@ LXB_HTML_RULES_ERROR_UNTO
@ LXB_HTML_RULES_ERROR_MIELINOPELST
@ LXB_HTML_RULES_ERROR_DOTOAFBOMO
@ LXB_HTML_RULES_ERROR_MIELINSC
@ LXB_HTML_RULES_ERROR_NOBOELINSC
@ LXB_HTML_RULES_ERROR_BADOTOININMO
@ LXB_HTML_RULES_ERROR_TEELISNOCUINHEMO
@ LXB_HTML_RULES_ERROR_UNCHTO
@ LXB_HTML_RULES_ERROR_DOTOAFFRMO
@ LXB_HTML_RULES_ERROR_DOTOINBOMO
@ LXB_HTML_RULES_ERROR_UNTOININMO
@ LXB_HTML_RULES_ERROR_BAENOPELISWR
@ LXB_HTML_RULES_ERROR_DOTOFOCOMO
@ LXB_HTML_RULES_ERROR_DOTOINSEMO
@ LXB_HTML_RULES_ERROR_DOTOINTAMO
@ LXB_HTML_RULES_ERROR_UNCLTOINBEHEMO
@ LXB_HTML_RULES_ERROR_DOTOINHEMO
@ LXB_HTML_RULES_ERROR_UNELINACFOST
@ LXB_HTML_RULES_ERROR_DOTOINBEHTMO
@ LXB_HTML_RULES_ERROR_UNELINSC
@ LXB_HTML_RULES_ERROR_HETOAFHEMO
@ LXB_HTML_RULES_ERROR_HETOINHEMO
@ LXB_HTML_RULES_ERROR_UNCLTOINHEMO
@ LXB_HTML_RULES_ERROR_UNENOFFI
@ LXB_HTML_RULES_ERROR_UNCLTOINBEHTMO
@ LXB_HTML_RULES_ERROR_DOTOINHENOMO
@ LXB_HTML_RULES_ERROR_OPELISWR
enum entity_charset charset
zend_string * dom_strip_and_collapse_ascii_whitespace(zend_string *input)
lxb_html_document_t * lxb_html_document_create(void)
lxb_status_t lxb_html_document_parse_chunk_begin(lxb_html_document_t *document)
lxb_status_t lxb_html_document_parse_chunk(lxb_html_document_t *document, const lxb_char_t *html, size_t size)
lxb_status_t lxb_html_document_parse_chunk_end(lxb_html_document_t *document)
lxb_html_document_t * lxb_html_document_destroy(lxb_html_document_t *document)
PHP_DOM_EXPORT const php_dom_ns_magic_token * php_dom_ns_is_html_magic_token
PHP_DOM_EXPORT xmlNsPtr php_dom_libxml_ns_mapper_ensure_html_ns(php_dom_libxml_ns_mapper *mapper)
PHP_DOM_EXPORT bool php_dom_ns_is_fast(const xmlNode *nodep, const php_dom_ns_magic_token *magic_token)
PHP_DOM_EXPORT xmlNsPtr php_dom_libxml_ns_mapper_get_ns(php_dom_libxml_ns_mapper *mapper, zend_string *prefix, zend_string *uri)
PHP_DOM_EXPORT const php_dom_ns_magic_token * php_dom_ns_is_svg_magic_token
PHP_DOM_EXPORT php_dom_libxml_ns_mapper * php_dom_get_ns_mapper(dom_object *object)
dom_object * php_dom_instantiate_object_helper(zval *return_value, zend_class_entry *ce, xmlNodePtr obj, dom_object *parent)
void php_dom_create_implementation(zval *retval, bool modern)
xmlDocPtr php_dom_create_html_doc(void)
#define DOM_HTML_NO_DEFAULT_NS
void dom_set_xml_class(php_libxml_ref_obj *document)
bool php_dom_adopt_node(xmlNodePtr nodep, dom_object *dom_object_new_document, xmlDocPtr new_document)
#define DOM_GET_OBJ(__ptr, __id, __prtype, __intern)
bool php_dom_create_nullable_object(xmlNodePtr obj, zval *return_value, dom_object *domobj)
xmlChar * php_dom_libxml_fix_file_path(xmlChar *path)
void dom_remove_all_children(xmlNodePtr nodep)
const XML_CDATA_SECTION_NODE
unsigned const char * text
PHP_JSON_API size_t int options
struct php_pcntl_pending_signal * head
xmlCharEncodingHandlerPtr encoding
PHPAPI php_stream_wrapper php_plain_files_wrapper
struct _php_stream php_stream
#define php_stream_read(stream, buf, count)
#define php_stream_open_wrapper_ex(path, mode, options, opened, context)
#define php_stream_close(stream)
#define php_stream_tell(stream)
#define php_stream_write(stream, buf, count)
void php_dom_private_data_destroy(php_dom_private_data *data)
php_dom_private_data * php_dom_private_data_create(void)
uint32_t php_dom_get_template_count(const php_dom_private_data *private_data)
php_libxml_private_data_header * php_dom_libxml_private_data_header(php_dom_private_data *private_data)
php_libxml_ref_obj * document
php_stream_wrapper * wrapper
zend_result(* write_string_len)(void *application_data, const char *buf, size_t len)
php_dom_private_data * private_data
zend_result(* write_string)(void *application_data, const char *buf)
lexbor_libxml2_bridge_extracted_observations observations
lexbor_libxml2_bridge_tree_error_reporter tree_error_reporter
lexbor_libxml2_bridge_tokenizer_error_reporter tokenizer_error_reporter
lxb_encoding_encode_f encode
lxb_dom_document_t dom_document
unsigned int lxb_status_t
struct _dom_object dom_object
ZEND_API ZEND_COLD void zend_value_error(const char *format,...)
ZEND_API zend_result zend_parse_parameters(uint32_t num_args, const char *type_spec,...)
ZEND_API ZEND_COLD void zend_argument_must_not_be_empty_error(uint32_t arg_num)
ZEND_API ZEND_COLD void zend_argument_value_error(uint32_t arg_num, const char *format,...)
#define ZEND_PARSE_PARAMETERS_NONE()
#define ZVAL_EMPTY_STRING(z)
error_reporting(?int $error_level=null)
strncmp(string $string1, string $string2, int $length)
strcmp(string $string1, string $string2)
zend_string_release_ex(func->internal_function.function_name, 0)
#define OBJ_PROP_TO_NUM(offset)
#define OBJ_PROP_NUM(obj, num)
struct _zend_property_info zend_property_info
ZEND_API ZEND_COLD zend_object * zend_throw_exception_ex(zend_class_entry *exception_ce, zend_long code, const char *format,...)
ZEND_API void(ZEND_FASTCALL *zend_touch_vm_stack_data)(void *vm_stack_data)
struct _zend_string zend_string
ZEND_API zend_property_info * zend_get_property_info(const zend_class_entry *ce, zend_string *member, int silent)
#define ZEND_IGNORE_VALUE(x)
#define UNEXPECTED(condition)
ZEND_API zend_string * zend_empty_string
#define ZSTR_INIT_LITERAL(s, persistent)
#define Z_STRVAL_P(zval_p)
#define Z_ISUNDEF_P(zval_p)
#define ZVAL_OBJ_COPY(z, o)
ZEND_RESULT_CODE zend_result
zend_property_info * prop_info