php-internal-docs 8.4.8
Unofficial docs for php/php-src
Loading...
Searching...
No Matches
inner_html_mixin.c
Go to the documentation of this file.
1/*
2 +----------------------------------------------------------------------+
3 | Copyright (c) The PHP Group |
4 +----------------------------------------------------------------------+
5 | This source file is subject to version 3.01 of the PHP license, |
6 | that is bundled with this package in the file LICENSE, and is |
7 | available through the world-wide-web at the following url: |
8 | https://www.php.net/license/3_01.txt |
9 | If you did not receive a copy of the PHP license and are unable to |
10 | obtain it through the world-wide-web, please send a note to |
11 | license@php.net so we can mail you a copy immediately. |
12 +----------------------------------------------------------------------+
13 | Authors: Niels Dossche <nielsdos@php.net> |
14 +----------------------------------------------------------------------+
15*/
16
17#ifdef HAVE_CONFIG_H
18#include "config.h"
19#endif
20
21#include "php.h"
22#if defined(HAVE_LIBXML) && defined(HAVE_DOM)
23#include "php_dom.h"
24#include "dom_properties.h"
25#include "html5_parser.h"
26#include "html5_serializer.h"
27#include "xml_serializer.h"
28#include "domexception.h"
29#include <libxml/xmlsave.h>
32#include <lexbor/tag/tag.h>
34
35/* Spec date: 2024-04-14 */
36
37static zend_result dom_inner_html_write_string(void *application_data, const char *buf)
38{
39 smart_str *output = application_data;
40 smart_str_appends(output, buf);
41 return SUCCESS;
42}
43
44static zend_result dom_inner_html_write_string_len(void *application_data, const char *buf, size_t len)
45{
46 smart_str *output = application_data;
47 smart_str_appendl(output, buf, len);
48 return SUCCESS;
49}
50
51static int dom_write_smart_str(void *context, const char *buffer, int len)
52{
53 smart_str *str = context;
54 smart_str_appendl(str, buffer, len);
55 return len;
56}
57
58/* https://w3c.github.io/DOM-Parsing/#the-innerhtml-mixin
59 * and https://w3c.github.io/DOM-Parsing/#dfn-fragment-serializing-algorithm */
61{
62 DOM_PROP_NODE(xmlNodePtr, node, obj);
63
64 /* 1. Let context document be the value of node's node document. */
65 const xmlDoc *context_document = node->doc;
66
67 /* 2. If context document is an HTML document, return an HTML serialization of node. */
68 if (context_document->type == XML_HTML_DOCUMENT_NODE) {
69 smart_str output = {0};
71 ctx.private_data = php_dom_get_private_data(obj);
72 ctx.application_data = &output;
73 ctx.write_string = dom_inner_html_write_string;
74 ctx.write_string_len = dom_inner_html_write_string_len;
75 dom_html5_serialize(&ctx, node);
76 ZVAL_STR(retval, smart_str_extract(&output));
77 }
78 /* 3. Otherwise, context document is an XML document; return an XML serialization of node passing the flag require well-formed. */
79 else {
80 ZEND_ASSERT(context_document->type == XML_DOCUMENT_NODE);
81
82 int status = -1;
83 smart_str str = {0};
84 /* No need to check buf's return value, as xmlSaveToBuffer() will fail instead. */
85 xmlSaveCtxtPtr ctxt = xmlSaveToIO(dom_write_smart_str, NULL, &str, "UTF-8", XML_SAVE_AS_XML);
86 if (EXPECTED(ctxt != NULL)) {
87 xmlCharEncodingHandlerPtr handler = xmlFindCharEncodingHandler("UTF-8");
88 xmlOutputBufferPtr out = xmlOutputBufferCreateIO(dom_write_smart_str, NULL, &str, handler);
89 if (EXPECTED(out != NULL)) {
90 php_dom_private_data *private_data = php_dom_get_private_data(obj);
91 /* Note: the innerHTML mixin sets the well-formed flag to true. */
92 xmlNodePtr child = node->children;
93 status = 0;
94 while (child != NULL && status == 0) {
95 status = dom_xml_serialize(ctxt, out, child, false, true, private_data);
96 child = child->next;
97 }
98 status |= xmlOutputBufferFlush(out);
99 status |= xmlOutputBufferClose(out);
100 }
101 (void) xmlSaveClose(ctxt);
102 xmlCharEncCloseFunc(handler);
103 }
104 if (UNEXPECTED(status < 0)) {
105 smart_str_free_ex(&str, false);
106 php_dom_throw_error_with_message(SYNTAX_ERR, "The resulting XML serialization is not well-formed", true);
107 return FAILURE;
108 }
109 ZVAL_STR(retval, smart_str_extract(&str));
110 }
111
112 return SUCCESS;
113}
114
115static lxb_dom_node_t *dom_html_fragment_lexbor_parse(lxb_html_document_t *document, lxb_dom_element_t *element, const zend_string *input)
116{
118 if (status != LXB_STATUS_OK) {
119 return NULL;
120 }
121
124 lxb_encoding_decode_init_single(&decode, encoding_data);
125
126 const lxb_char_t *buf_ref = (const lxb_char_t *) ZSTR_VAL(input);
127 if (ZSTR_IS_VALID_UTF8(input)) {
128 /* If we know the input is valid UTF-8, we don't have to perform checks and replace invalid sequences. */
129 status = lxb_html_document_parse_fragment_chunk(document, buf_ref, ZSTR_LEN(input));
131 return NULL;
132 }
133 } else {
134 /* See dom_decode_encode_fast_path(), simplified version for in-memory use-case. */
135 const lxb_char_t *buf_end = buf_ref + ZSTR_LEN(input);
136 const lxb_char_t *last_output = buf_ref;
137 while (buf_ref < buf_end) {
138 if (decode.u.utf_8.need == 0 && *buf_ref < 0x80) {
139 buf_ref++;
140 continue;
141 }
142
143 const lxb_char_t *buf_ref_backup = buf_ref;
144 lxb_codepoint_t codepoint = lxb_encoding_decode_utf_8_single(&decode, &buf_ref, buf_end);
145 if (UNEXPECTED(codepoint > LXB_ENCODING_MAX_CODEPOINT)) {
146 status = lxb_html_document_parse_fragment_chunk(document, last_output, buf_ref_backup - last_output);
148 return NULL;
149 }
150
153 return NULL;
154 }
155
156 last_output = buf_ref;
157 }
158 }
159
160 if (buf_ref != last_output) {
161 status = lxb_html_document_parse_fragment_chunk(document, last_output, buf_ref - last_output);
163 return NULL;
164 }
165 }
166 }
167
169}
170
171static lxb_dom_document_cmode_t dom_translate_quirks_mode(php_libxml_quirks_mode quirks_mode)
172{
173 switch (quirks_mode) {
174 case PHP_LIBXML_NO_QUIRKS: return LXB_DOM_DOCUMENT_CMODE_NO_QUIRKS;
175 case PHP_LIBXML_LIMITED_QUIRKS: return LXB_DOM_DOCUMENT_CMODE_LIMITED_QUIRKS;
176 case PHP_LIBXML_QUIRKS: return LXB_DOM_DOCUMENT_CMODE_QUIRKS;
178 }
179}
180
181/* https://html.spec.whatwg.org/#html-fragment-parsing-algorithm */
182static xmlNodePtr dom_html_fragment_parsing_algorithm(dom_object *obj, xmlNodePtr context_node, const zend_string *input, php_libxml_quirks_mode quirks_mode)
183{
184 /* The whole algorithm is implemented in Lexbor, we just have to be the adapter between the
185 * data structures used in PHP and what Lexbor expects. */
186
188 document->dom_document.compat_mode = dom_translate_quirks_mode(quirks_mode);
190
191 const lxb_tag_data_t *tag_data = lxb_tag_data_by_name(document->dom_document.tags, (lxb_char_t *) context_node->name, xmlStrlen(context_node->name));
192 element->node.local_name = tag_data == NULL ? LXB_TAG__UNDEF : tag_data->tag_id;
193
194 const lxb_char_t *ns_uri;
195 size_t ns_uri_len;
196 if (context_node->ns == NULL || context_node->ns->href == NULL) {
197 ns_uri = (lxb_char_t *) "";
198 ns_uri_len = 0;
199 } else {
200 ns_uri = context_node->ns->href;
201 ns_uri_len = xmlStrlen(ns_uri);
202 }
203 const lxb_ns_data_t *ns_data = lxb_ns_data_by_link(document->dom_document.ns, ns_uri, ns_uri_len);
204 element->node.ns = ns_data == NULL ? LXB_NS__UNDEF : ns_data->ns_id;
205
206 lxb_dom_node_t *node = dom_html_fragment_lexbor_parse(document, element, input);
207 xmlNodePtr fragment = NULL;
208 if (node != NULL) {
209 /* node->last_child could be NULL, but that is allowed. */
210 lexbor_libxml2_bridge_status status = lexbor_libxml2_bridge_convert_fragment(node->last_child, context_node->doc, &fragment, true, true, php_dom_get_private_data(obj));
213 }
214 } else {
216 }
217
219
220 return fragment;
221}
222
223static void dom_xml_parser_tag_name(const xmlNode *context_node, xmlParserCtxtPtr parser)
224{
225 if (context_node->ns != NULL && context_node->ns->prefix != NULL) {
226 xmlParseChunk(parser, (const char *) context_node->ns->prefix, xmlStrlen(context_node->ns->prefix), 0);
227 xmlParseChunk(parser, ":", 1, 0);
228 }
229
230 xmlParseChunk(parser, (const char *) context_node->name, xmlStrlen(context_node->name), 0);
231}
232
233static void dom_xml_fragment_parsing_algorithm_parse(php_dom_libxml_ns_mapper *ns_mapper, const xmlNode *context_node, const zend_string *input, xmlParserCtxtPtr parser)
234{
235 xmlParseChunk(parser, "<", 1, 0);
236 dom_xml_parser_tag_name(context_node, parser);
237
238 /* Namespaces: we have to declare all in-scope namespaces including the default namespace */
239 /* xmlns attributes */
240 php_dom_in_scope_ns in_scope_ns = php_dom_get_in_scope_ns(ns_mapper, context_node, true);
241 for (size_t i = 0; i < in_scope_ns.count; i++) {
242 const xmlNs *ns = in_scope_ns.list[i];
243 xmlParseChunk(parser, " xmlns:", 7, 0);
244 ZEND_ASSERT(ns->prefix != NULL);
245 xmlParseChunk(parser, (const char *) ns->prefix, xmlStrlen(ns->prefix), 0);
246 xmlParseChunk(parser, "=\"", 2, 0);
247 xmlParseChunk(parser, (const char *) ns->href, xmlStrlen(ns->href), 0);
248 xmlParseChunk(parser, "\"", 1, 0);
249 }
250 php_dom_in_scope_ns_destroy(&in_scope_ns);
251 /* default namespace */
252 const char *default_ns = dom_locate_a_namespace(context_node, NULL);
253 if (default_ns != NULL) {
254 xmlParseChunk(parser, " xmlns=\"", 8, 0);
255 xmlParseChunk(parser, default_ns, strlen(default_ns), 0);
256 xmlParseChunk(parser, "\"", 1, 0);
257 }
258
259 xmlParseChunk(parser, ">", 1, 0);
260
261 xmlParseChunk(parser, (const char *) ZSTR_VAL(input), ZSTR_LEN(input), 0);
262
263 xmlParseChunk(parser, "</", 2, 0);
264 dom_xml_parser_tag_name(context_node, parser);
265 xmlParseChunk(parser, ">", 1, 1);
266}
267
268/* https://html.spec.whatwg.org/#xml-fragment-parsing-algorithm */
269static xmlNodePtr dom_xml_fragment_parsing_algorithm(dom_object *obj, const xmlNode *context_node, const zend_string *input)
270{
271 /* Steps 1-4 below */
272 xmlParserCtxtPtr parser = xmlCreatePushParserCtxt(NULL, NULL, NULL, 0, NULL);
273 if (UNEXPECTED(parser == NULL)) {
275 return NULL;
276 }
277
278 /* This is not only good to avoid a performance cost of changing the tree, but also to work around an old bug
279 * in xmlSetTreeDoc(). */
280 xmlDictFree(parser->dict);
281 if (context_node->doc->dict == NULL) {
282 context_node->doc->dict = xmlDictCreate();
283 xmlDictSetLimit(context_node->doc->dict, XML_MAX_DICTIONARY_LIMIT);
284 }
285 parser->dict = context_node->doc->dict;
286
287 php_libxml_sanitize_parse_ctxt_options(parser);
288 xmlCtxtUseOptions(parser, XML_PARSE_IGNORE_ENC | XML_PARSE_NOERROR | XML_PARSE_NOWARNING);
289
290 xmlCharEncodingHandlerPtr encoding = xmlFindCharEncodingHandler("UTF-8");
291 (void) xmlSwitchToEncoding(parser, encoding);
292
294 dom_xml_fragment_parsing_algorithm_parse(ns_mapper, context_node, input, parser);
295
296 /* 5. If there is an XML well-formedness or XML namespace well-formedness error, then throw a "SyntaxError" DOMException. */
297 if (!parser->wellFormed || !parser->nsWellFormed) {
298 parser->dict = NULL;
299 xmlFreeDoc(parser->myDoc);
300 xmlFreeParserCtxt(parser);
301 php_dom_throw_error_with_message(SYNTAX_ERR, "XML fragment is not well-formed", true);
302 return NULL;
303 }
304
305 xmlDocPtr doc = parser->myDoc;
306 xmlFreeParserCtxt(parser);
307
308 if (EXPECTED(doc != NULL)) {
309 doc->dict = NULL;
310
311 /* 6. If the document element of the resulting Document has any sibling nodes, then throw a "SyntaxError" DOMException. */
312 xmlNodePtr document_element = doc->children;
313 if (document_element == NULL || document_element->next != NULL) {
314 xmlFreeDoc(doc);
315 php_dom_throw_error_with_message(SYNTAX_ERR, "XML fragment is not well-formed", true);
316 return NULL;
317 }
318
319 /* 7. Return the child nodes of the document element of the resulting Document, in tree order. */
320 xmlNodePtr fragment = xmlNewDocFragment(context_node->doc);
321 if (EXPECTED(fragment != NULL)) {
322 xmlNodePtr child = document_element->children;
323 /* Yes, we have to call both xmlSetTreeDoc() prior to xmlAddChildList()
324 * because xmlAddChildList() _only_ sets the tree for the topmost elements in the subtree! */
325 xmlSetTreeDoc(document_element, context_node->doc);
326 xmlAddChildList(fragment, child);
328 document_element->children = NULL;
329 document_element->last = NULL;
330 }
331 xmlFreeDoc(doc);
332 return fragment;
333 }
334 return NULL;
335}
336
337/* https://w3c.github.io/DOM-Parsing/#the-innerhtml-mixin
338 * and https://w3c.github.io/DOM-Parsing/#dfn-fragment-parsing-algorithm */
340{
341 DOM_PROP_NODE(xmlNodePtr, context_node, obj);
342
343 xmlNodePtr fragment;
344 if (context_node->doc->type == XML_DOCUMENT_NODE) {
345 fragment = dom_xml_fragment_parsing_algorithm(obj, context_node, Z_STR_P(newval));
346 } else {
347 fragment = dom_html_fragment_parsing_algorithm(obj, context_node, Z_STR_P(newval), obj->document->quirks_mode);
348 }
349
350 if (fragment == NULL) {
351 return FAILURE;
352 }
353
354 if (php_dom_ns_is_fast(context_node, php_dom_ns_is_html_magic_token) && xmlStrEqual(context_node->name, BAD_CAST "template")) {
355 context_node = php_dom_ensure_templated_content(php_dom_get_private_data(obj), context_node);
356 if (context_node == NULL) {
357 xmlFreeNode(fragment);
358 return FAILURE;
359 }
360 }
361
362 ZEND_ASSERT(obj->document != NULL);
363 php_libxml_invalidate_node_list_cache(obj->document);
364
365 dom_remove_all_children(context_node);
366 return php_dom_pre_insert(obj->document, fragment, context_node, NULL) ? SUCCESS : FAILURE;
367}
368
369#endif
size_t len
Definition apprentice.c:174
@ LXB_STATUS_OK
Definition base.h:49
lxb_codepoint_t lxb_encoding_decode_utf_8_single(lxb_encoding_decode_t *ctx, const lxb_char_t **data, const lxb_char_t *end)
Definition decode.c:2780
DNS_STATUS status
Definition dns_win32.c:49
struct lxb_dom_node lxb_dom_node_t
Definition interface.h:38
struct lxb_dom_element lxb_dom_element_t
Definition interface.h:39
lxb_dom_document_cmode_t
Definition document.h:21
@ LXB_DOM_DOCUMENT_CMODE_QUIRKS
Definition document.h:23
@ LXB_DOM_DOCUMENT_CMODE_LIMITED_QUIRKS
Definition document.h:24
@ LXB_DOM_DOCUMENT_CMODE_NO_QUIRKS
Definition document.h:22
zend_result dom_element_inner_html_read(dom_object *obj, zval *retval)
zend_result dom_element_inner_html_write(dom_object *obj, zval *newval)
#define DOM_PROP_NODE(type, name, obj)
void php_dom_throw_error_with_message(dom_exception_code error_code, const char *error_message, bool strict_error)
void php_dom_throw_error(dom_exception_code error_code, bool strict_error)
@ INVALID_STATE_ERR
@ SYNTAX_ERR
#define LXB_ENCODING_REPLACEMENT_BYTES
Definition base.h:28
@ LXB_ENCODING_REPLACEMENT_SIZE
Definition base.h:39
@ LXB_ENCODING_MAX_CODEPOINT
Definition base.h:41
struct lxb_encoding_data lxb_encoding_data_t
Definition base.h:103
@ LXB_ENCODING_UTF_8
Definition const.h:45
lxb_inline const lxb_encoding_data_t * lxb_encoding_data(lxb_encoding_t encoding)
Definition encoding.h:315
lxb_inline lxb_status_t lxb_encoding_decode_init_single(lxb_encoding_decode_t *decode, const lxb_encoding_data_t *encoding_data)
Definition encoding.h:262
zend_ffi_ctype_name_buf buf
Definition ffi.c:4685
#define NULL
Definition gdcache.h:45
#define SUCCESS
Definition hash_sha3.c:261
lexbor_libxml2_bridge_status lexbor_libxml2_bridge_convert_fragment(lxb_dom_node_t *start_node, xmlDocPtr lxml_doc, xmlNodePtr *fragment_out, bool compact_text_nodes, bool create_default_ns, php_dom_private_data *private_data)
lexbor_libxml2_bridge_status
@ LEXBOR_LIBXML2_BRIDGE_STATUS_OK
zend_result dom_html5_serialize(dom_html5_serialize_context *ctx, const xmlNode *node)
struct lxb_html_document lxb_html_document_t
Definition interface.h:95
lxb_dom_element_t * lxb_dom_element_interface_create(lxb_dom_document_t *document)
Definition element.c:33
lxb_html_document_t * lxb_html_document_create(void)
Definition document.c:189
lxb_status_t lxb_html_document_parse_fragment_chunk(lxb_html_document_t *document, const lxb_char_t *html, size_t size)
Definition document.c:860
lxb_status_t lxb_html_document_parse_fragment_chunk_begin(lxb_html_document_t *document, lxb_dom_element_t *element)
Definition document.c:841
lxb_html_document_t * lxb_html_document_destroy(lxb_html_document_t *document)
Definition document.c:721
lxb_dom_node_t * lxb_html_document_parse_fragment_chunk_end(lxb_html_document_t *document)
Definition document.c:868
PHP_DOM_EXPORT const php_dom_ns_magic_token * php_dom_ns_is_html_magic_token
PHP_DOM_EXPORT php_dom_in_scope_ns php_dom_get_in_scope_ns(php_dom_libxml_ns_mapper *ns_mapper, const xmlNode *node, bool ignore_elements)
PHP_DOM_EXPORT void php_dom_in_scope_ns_destroy(php_dom_in_scope_ns *in_scope_ns)
PHP_DOM_EXPORT bool php_dom_ns_is_fast(const xmlNode *nodep, const php_dom_ns_magic_token *magic_token)
PHP_DOM_EXPORT php_dom_libxml_ns_mapper * php_dom_get_ns_mapper(dom_object *object)
@ LXB_NS__UNDEF
Definition const.h:24
const lxb_ns_data_t * lxb_ns_data_by_link(lexbor_hash_t *hash, const lxb_char_t *link, size_t length)
Definition ns.c:78
bool php_dom_pre_insert(php_libxml_ref_obj *document, xmlNodePtr node, xmlNodePtr parent, xmlNodePtr insertion_point)
const char * dom_locate_a_namespace(const xmlNode *node, const zend_string *prefix)
void dom_mark_namespaces_as_attributes_too(php_dom_libxml_ns_mapper *ns_mapper, xmlDocPtr doc)
void dom_remove_all_children(xmlNodePtr nodep)
const XML_HTML_DOCUMENT_NODE
const XML_DOCUMENT_NODE
xmlCharEncodingHandlerPtr encoding
Definition php_soap.h:170
xmlNodePtr php_dom_ensure_templated_content(php_dom_private_data *private_data, xmlNodePtr template_node)
php_libxml_ref_obj * document
Definition xml_common.h:27
Definition file.h:177
Definition dce.c:49
zend_result(* write_string_len)(void *application_data, const char *buf, size_t len)
php_dom_private_data * private_data
zend_result(* write_string)(void *application_data, const char *buf)
lexbor_hash_t * ns
Definition document.h:58
lxb_dom_document_cmode_t compat_mode
Definition document.h:38
lexbor_hash_t * tags
Definition document.h:55
lxb_dom_node_t node
Definition element.h:33
uintptr_t ns
Definition node.h:48
lxb_dom_node_t * last_child
Definition node.h:56
uintptr_t local_name
Definition node.h:46
lxb_encoding_ctx_utf_8_t utf_8
Definition base.h:129
union lxb_encoding_decode_t::@302274252113053227061303304053361346350151303155 u
lxb_dom_document_t dom_document
Definition document.h:58
lxb_ns_id_t ns_id
Definition ns.h:23
lxb_tag_id_t tag_id
Definition tag.h:24
@ LXB_TAG__UNDEF
Definition const.h:24
const lxb_tag_data_t * lxb_tag_data_by_name(lexbor_hash_t *hash, const lxb_char_t *name, size_t len)
Definition tag.c:77
unsigned int lxb_status_t
Definition types.h:28
unsigned char lxb_char_t
Definition types.h:27
uint32_t lxb_codepoint_t
Definition types.h:26
struct _dom_object dom_object
int dom_xml_serialize(xmlSaveCtxtPtr ctx, xmlOutputBufferPtr out, xmlNodePtr node, bool format, bool require_well_formed, php_dom_private_data *private_data)
struct _zval_struct zval
strlen(string $string)
ZEND_API void(ZEND_FASTCALL *zend_touch_vm_stack_data)(void *vm_stack_data)
struct _zend_string zend_string
#define EXPECTED(condition)
#define ZEND_ASSERT(c)
#define EMPTY_SWITCH_DEFAULT_CASE()
#define UNEXPECTED(condition)
#define ZSTR_IS_VALID_UTF8(s)
Definition zend_string.h:85
#define ZSTR_VAL(zstr)
Definition zend_string.h:68
#define ZSTR_LEN(zstr)
Definition zend_string.h:69
#define ZVAL_STR(z, s)
#define Z_STR_P(zval_p)
Definition zend_types.h:972
@ FAILURE
Definition zend_types.h:61
ZEND_RESULT_CODE zend_result
Definition zend_types.h:64
zval retval
fbc internal_function handler(call, ret)
out($f, $s)