php-internal-docs 8.4.8
Unofficial docs for php/php-src
Loading...
Searching...
No Matches
xml_document.c
Go to the documentation of this file.
1/*
2 +----------------------------------------------------------------------+
3 | Copyright (c) The PHP Group |
4 +----------------------------------------------------------------------+
5 | This source file is subject to version 3.01 of the PHP license, |
6 | that is bundled with this package in the file LICENSE, and is |
7 | available through the world-wide-web at the following url: |
8 | https://www.php.net/license/3_01.txt |
9 | If you did not receive a copy of the PHP license and are unable to |
10 | obtain it through the world-wide-web, please send a note to |
11 | license@php.net so we can mail you a copy immediately. |
12 +----------------------------------------------------------------------+
13 | Authors: Niels Dossche <nielsdos@php.net> |
14 +----------------------------------------------------------------------+
15*/
16
17#ifdef HAVE_CONFIG_H
18#include <config.h>
19#endif
20
21#include "php.h"
22#if defined(HAVE_LIBXML) && defined(HAVE_DOM)
23#include "php_dom.h"
24#include "namespace_compat.h"
25#include "private_data.h"
26#include "xml_serializer.h"
27#include <libxml/xmlsave.h>
28
29static bool check_options_validity(uint32_t arg_num, zend_long options)
30{
31 const zend_long VALID_OPTIONS = XML_PARSE_RECOVER
32 | XML_PARSE_NOENT
33#if LIBXML_VERSION >= 21300
34 | XML_PARSE_NO_XXE
35#endif
36 | XML_PARSE_DTDLOAD
37 | XML_PARSE_DTDATTR
38 | XML_PARSE_DTDVALID
39 | XML_PARSE_NOERROR
40 | XML_PARSE_NOWARNING
41 | XML_PARSE_NOBLANKS
42 | XML_PARSE_XINCLUDE
43 | XML_PARSE_NSCLEAN
44 | XML_PARSE_NOCDATA
45 | XML_PARSE_NONET
46 | XML_PARSE_PEDANTIC
47 | XML_PARSE_COMPACT
48 | XML_PARSE_HUGE
49 | XML_PARSE_BIG_LINES;
50 if ((options & ~VALID_OPTIONS) != 0) {
51 zend_argument_value_error(arg_num, "contains invalid flags (allowed flags: "
52 "LIBXML_RECOVER, "
53 "LIBXML_NOENT, "
54#if LIBXML_VERSION >= 21300
55 "LIBXML_NO_XXE, "
56#endif
57 "LIBXML_DTDLOAD, "
58 "LIBXML_DTDATTR, "
59 "LIBXML_DTDVALID, "
60 "LIBXML_NOERROR, "
61 "LIBXML_NOWARNING, "
62 "LIBXML_NOBLANKS, "
63 "LIBXML_XINCLUDE, "
64 "LIBXML_NSCLEAN, "
65 "LIBXML_NOCDATA, "
66 "LIBXML_NONET, "
67 "LIBXML_PEDANTIC, "
68 "LIBXML_COMPACT, "
69 "LIBXML_PARSEHUGE, "
70 "LIBXML_BIGLINES)");
71 return false;
72 }
73 return true;
74}
75
76/* Living spec never creates explicit namespace declaration nodes.
77 * They are only written upon serialization but never appear in the tree.
78 * So in principle we could just ignore them outright.
79 * However, step 10 in https://html.spec.whatwg.org/multipage/parsing.html#create-an-element-for-the-token (Date 2023-12-15)
80 * requires us to have the declaration as an attribute available */
82{
83 xmlNodePtr node = doc->children;
84 while (node != NULL) {
85 if (node->type == XML_ELEMENT_NODE) {
87 }
88
89 node = php_dom_next_in_tree_order(node, NULL);
90 }
91}
92
93PHP_METHOD(Dom_XMLDocument, createEmpty)
94{
95 const char *version = NULL;
96 size_t encoding_len = strlen("UTF-8");
97 const char *encoding = "UTF-8";
98 size_t version_len;
99 if (zend_parse_parameters(ZEND_NUM_ARGS(), "|sp", &version, &version_len, &encoding, &encoding_len) == FAILURE) {
101 }
102
103 xmlCharEncodingHandlerPtr handler = xmlFindCharEncodingHandler(encoding);
104
105 if (handler != NULL) {
106 xmlCharEncCloseFunc(handler);
107 } else {
108 zend_argument_value_error(2, "is not a valid document encoding");
110 }
111
112 xmlDocPtr lxml_doc = xmlNewDoc((const xmlChar *) version);
113 if (UNEXPECTED(lxml_doc == NULL)) {
114 goto oom;
115 }
116
117 lxml_doc->encoding = xmlStrdup((const xmlChar *) encoding);
118
122 (xmlNodePtr) lxml_doc,
123 NULL
124 );
127 return;
128
129oom:
132}
133
134static void load_from_helper(INTERNAL_FUNCTION_PARAMETERS, int mode)
135{
136 const char *source, *override_encoding = NULL;
137 size_t source_len, override_encoding_len;
138 zend_long options = 0;
141 "s|lp!",
142 &source,
143 &source_len,
144 &options,
145 &override_encoding,
146 &override_encoding_len
147 ) == FAILURE) {
149 }
150
151 if (!source_len) {
152 zend_argument_value_error(1, "must not be empty");
154 }
155
156 if (ZEND_SIZE_T_INT_OVFL(source_len)) {
157 zend_argument_value_error(1, "is too long");
159 }
160
161 /* See php_libxml_streams_IO_open_wrapper(), apparently this caused issues in the past. */
162 if (mode == DOM_LOAD_FILE && strstr(source, "%00")) {
163 zend_argument_value_error(1, "must not contain percent-encoded NUL bytes");
165 }
166
167 if (!check_options_validity(2, options)) {
169 }
170
171 xmlCharEncodingHandlerPtr encoding = NULL;
172 if (override_encoding != NULL) {
173 encoding = xmlFindCharEncodingHandler(override_encoding);
174 if (!encoding) {
175 zend_argument_value_error(3, "must be a valid document encoding");
177 }
178 options |= XML_PARSE_IGNORE_ENC;
179 }
180
181 xmlDocPtr lxml_doc = dom_document_parser(NULL, mode, source, source_len, options, encoding);
182 if (UNEXPECTED(lxml_doc == NULL || lxml_doc == DOM_DOCUMENT_MALFORMED)) {
183 if (!EG(exception)) {
184 if (lxml_doc == DOM_DOCUMENT_MALFORMED) {
185 php_dom_throw_error_with_message(SYNTAX_ERR, "XML fragment is not well-formed", true);
186 } else {
187 if (mode == DOM_LOAD_FILE) {
188 zend_throw_exception_ex(NULL, 0, "Cannot open file '%s'", source);
189 } else {
191 }
192 }
193 }
195 }
196 if (lxml_doc->encoding == NULL) {
197 if (override_encoding) {
198 lxml_doc->encoding = xmlStrdup((const xmlChar *) override_encoding);
199 } else {
200 lxml_doc->encoding = xmlStrdup((const xmlChar *) "UTF-8");
201 }
202 }
203 if (mode == DOM_LOAD_FILE && lxml_doc->URL != NULL) {
204 if (!php_is_stream_path((char *) lxml_doc->URL)) {
205 /* Check for "file:/" instead of "file://" because of libxml2 quirk */
206 if (strncmp((const char *) lxml_doc->URL, "file:/", sizeof("file:/") - 1) != 0) {
207#ifdef PHP_WIN32
208 xmlChar *buffer = xmlStrdup((const xmlChar *) "file:///");
209#else
210 xmlChar *buffer = xmlStrdup((const xmlChar *) "file://");
211#endif
212 if (buffer != NULL) {
213 xmlChar *new_buffer = xmlStrcat(buffer, lxml_doc->URL);
214 if (new_buffer != NULL) {
215 xmlFree(BAD_CAST lxml_doc->URL);
216 lxml_doc->URL = new_buffer;
217 } else {
218 xmlFree(buffer);
219 }
220 }
221 } else {
222#ifdef PHP_WIN32
223 lxml_doc->URL = php_dom_libxml_fix_file_path(BAD_CAST lxml_doc->URL);
224#endif
225 }
226 }
227 }
231 (xmlNodePtr) lxml_doc,
232 NULL
233 );
235 dom_document_convert_to_modern(intern->document, lxml_doc);
236}
237
238void dom_document_convert_to_modern(php_libxml_ref_obj *document, xmlDocPtr lxml_doc)
239{
242 document->private_data = php_dom_libxml_private_data_header(private_data);
243 dom_mark_namespaces_as_attributes_too(ns_mapper, lxml_doc);
244}
245
246PHP_METHOD(Dom_XMLDocument, createFromString)
247{
249}
250
251PHP_METHOD(Dom_XMLDocument, createFromFile)
252{
254}
255
256static int php_new_dom_write_smart_str(void *context, const char *buffer, int len)
257{
258 smart_str *str = context;
259 smart_str_appendl(str, buffer, len);
260 return len;
261}
262
263static php_dom_private_data *get_private_data_from_node(xmlNodePtr node)
264{
265 dom_object *intern = php_dom_object_get_data(node);
266 return intern != NULL ? php_dom_get_private_data(intern) : NULL;
267}
268
269static zend_string *php_new_dom_dump_node_to_str_ex(xmlNodePtr node, int options, bool format, const char *encoding)
270{
271 smart_str str = {0};
272
273 int status = -1;
274 xmlSaveCtxtPtr ctxt = xmlSaveToIO(php_new_dom_write_smart_str, NULL, &str, encoding, XML_SAVE_AS_XML | options);
275 if (EXPECTED(ctxt != NULL)) {
276 xmlCharEncodingHandlerPtr handler = xmlFindCharEncodingHandler(encoding);
277 xmlOutputBufferPtr out = xmlOutputBufferCreateIO(php_new_dom_write_smart_str, NULL, &str, handler);
278 if (EXPECTED(out != NULL)) {
279 status = dom_xml_serialize(ctxt, out, node, format, false, get_private_data_from_node(node));
280 status |= xmlOutputBufferFlush(out);
281 status |= xmlOutputBufferClose(out);
282 } else {
283 xmlCharEncCloseFunc(handler);
284 }
285 (void) xmlSaveClose(ctxt);
286 }
287
288 if (UNEXPECTED(status < 0)) {
289 smart_str_free_ex(&str, false);
290 return NULL;
291 }
292
293 return smart_str_extract(&str);
294}
295
296static zend_string *php_new_dom_dump_node_to_str(xmlDocPtr doc, xmlNodePtr node, bool format, const char *encoding)
297{
298 return php_new_dom_dump_node_to_str_ex(node, 0, format, encoding);
299}
300
301static zend_string *php_new_dom_dump_doc_to_str(xmlDocPtr doc, int options, const char *encoding)
302{
303 return php_new_dom_dump_node_to_str_ex((xmlNodePtr) doc, options, options & XML_SAVE_FORMAT, encoding);
304}
305
306zend_long php_new_dom_dump_node_to_file(const char *filename, xmlDocPtr doc, xmlNodePtr node, bool format, const char *encoding)
307{
308 xmlCharEncodingHandlerPtr handler = xmlFindCharEncodingHandler(encoding);
309 xmlOutputBufferPtr out = xmlOutputBufferCreateFilename(filename, handler, 0);
310 if (!out) {
311 xmlCharEncCloseFunc(handler);
312 return -1;
313 }
314
315 php_stream *stream = out->context;
316
317 int status = -1;
318 xmlSaveCtxtPtr ctxt = xmlSaveToIO(out->writecallback, NULL, stream, encoding, XML_SAVE_AS_XML);
319 if (EXPECTED(ctxt != NULL)) {
320 status = dom_xml_serialize(ctxt, out, node, format, false, get_private_data_from_node(node));
321 status |= xmlOutputBufferFlush(out);
322 (void) xmlSaveClose(ctxt);
323 }
324
325 size_t offset = php_stream_tell(stream);
326
327 (void) xmlOutputBufferClose(out);
328
329 return status < 0 ? status : (zend_long) offset;
330}
331
332static zend_long php_new_dom_dump_doc_to_file(const char *filename, xmlDocPtr doc, bool format, const char *encoding)
333{
334 return php_new_dom_dump_node_to_file(filename, doc, (xmlNodePtr) doc, format, encoding);
335}
336
337static const php_libxml_document_handlers php_new_dom_default_document_handlers = {
338 .dump_node_to_str = php_new_dom_dump_node_to_str,
339 .dump_doc_to_str = php_new_dom_dump_doc_to_str,
340 .dump_node_to_file = php_new_dom_dump_node_to_file,
341 .dump_doc_to_file = php_new_dom_dump_doc_to_file,
342};
343
344void dom_set_xml_class(php_libxml_ref_obj *document)
345{
346 document->class_type = PHP_LIBXML_CLASS_MODERN;
347 document->handlers = &php_new_dom_default_document_handlers;
348}
349
350#endif /* HAVE_LIBXML && HAVE_DOM */
size_t len
Definition apprentice.c:174
bool exception
Definition assert.c:30
strstr(string $haystack, string $needle, bool $before_needle=false)
DNS_STATUS status
Definition dns_win32.c:49
PHP_DOM_EXPORT zend_class_entry * dom_xml_document_class_entry
void php_dom_throw_error_with_message(dom_exception_code error_code, const char *error_message, bool strict_error)
void php_dom_throw_error(dom_exception_code error_code, bool strict_error)
@ INVALID_STATE_ERR
@ SYNTAX_ERR
zend_long offset
char * mode
#define NULL
Definition gdcache.h:45
const LIBXML_VERSION
PHP_DOM_EXPORT void php_dom_ns_compat_mark_attribute_list(php_dom_libxml_ns_mapper *mapper, xmlNodePtr node)
#define PHP_METHOD
Definition php.h:365
dom_object * php_dom_instantiate_object_helper(zval *return_value, zend_class_entry *ce, xmlNodePtr obj, dom_object *parent)
@ DOM_LOAD_FILE
Definition php_dom.h:193
@ DOM_LOAD_STRING
Definition php_dom.h:192
#define DOM_DOCUMENT_MALFORMED
Definition php_dom.h:196
void dom_set_xml_class(php_libxml_ref_obj *document)
xmlDocPtr dom_document_parser(zval *id, dom_load_mode mode, const char *source, size_t source_len, size_t options, xmlCharEncodingHandlerPtr encoding)
void dom_document_convert_to_modern(php_libxml_ref_obj *document, xmlDocPtr lxml_doc)
void dom_mark_namespaces_as_attributes_too(php_dom_libxml_ns_mapper *ns_mapper, xmlDocPtr doc)
xmlChar * php_dom_libxml_fix_file_path(xmlChar *path)
const XML_ELEMENT_NODE
PHP_JSON_API size_t int options
Definition php_json.h:102
xmlCharEncodingHandlerPtr encoding
Definition php_soap.h:170
struct _php_stream php_stream
Definition php_streams.h:96
#define php_stream_tell(stream)
php_dom_private_data * php_dom_private_data_create(void)
php_dom_libxml_ns_mapper * php_dom_ns_mapper_from_private(php_dom_private_data *private_data)
php_libxml_private_data_header * php_dom_libxml_private_data_header(php_dom_private_data *private_data)
php_libxml_ref_obj * document
Definition xml_common.h:27
Definition file.h:177
Definition dce.c:49
struct _dom_object dom_object
PHP_DOM_EXPORT dom_object * php_dom_object_get_data(xmlNodePtr obj)
int dom_xml_serialize(xmlSaveCtxtPtr ctx, xmlOutputBufferPtr out, xmlNodePtr node, bool format, bool require_well_formed, php_dom_private_data *private_data)
#define INTERNAL_FUNCTION_PARAMETERS
Definition zend.h:49
#define INTERNAL_FUNCTION_PARAM_PASSTHRU
Definition zend.h:50
ZEND_API zend_result zend_parse_parameters(uint32_t num_args, const char *type_spec,...)
Definition zend_API.c:1300
ZEND_API ZEND_COLD void zend_argument_value_error(uint32_t arg_num, const char *format,...)
Definition zend_API.c:433
#define ZEND_NUM_ARGS()
Definition zend_API.h:530
#define RETURN_THROWS()
Definition zend_API.h:1060
strlen(string $string)
strncmp(string $string1, string $string2, int $length)
ZEND_API ZEND_COLD zend_object * zend_throw_exception_ex(zend_class_entry *exception_ce, zend_long code, const char *format,...)
ZEND_API void(ZEND_FASTCALL *zend_touch_vm_stack_data)(void *vm_stack_data)
#define EG(v)
int32_t zend_long
Definition zend_long.h:42
struct _zend_string zend_string
#define EXPECTED(condition)
#define UNEXPECTED(condition)
#define ZEND_SIZE_T_INT_OVFL(size)
@ FAILURE
Definition zend_types.h:61
zval * return_value
uint32_t arg_num
fbc internal_function handler(call, ret)
out($f, $s)