php-internal-docs 8.4.8
Unofficial docs for php/php-src
Loading...
Searching...
No Matches
parser.c
Go to the documentation of this file.
1/*
2 * Copyright (C) 2018-2021 Alexander Borisov
3 *
4 * Author: Alexander Borisov <borisov@lexbor.com>
5 */
6
8#include "lexbor/html/node.h"
15
16#define LXB_HTML_TAG_RES_DATA
17#define LXB_HTML_TAG_RES_SHS_DATA
18#include "lexbor/html/tag_res.h"
19
20
21static void
22lxb_html_parse_fragment_chunk_destroy(lxb_html_parser_t *parser);
23
24
27{
28 return lexbor_calloc(1, sizeof(lxb_html_parser_t));
29}
30
33{
34 if (parser == NULL) {
36 }
37
38 /* Tokenizer */
41
42 if (status != LXB_STATUS_OK) {
43 return status;
44 }
45
46 /* Tree */
47 parser->tree = lxb_html_tree_create();
48 status = lxb_html_tree_init(parser->tree, parser->tkz);
49
50 if (status != LXB_STATUS_OK) {
51 return status;
52 }
53
54 parser->original_tree = NULL;
55 parser->form = NULL;
56 parser->root = NULL;
57
59
60 parser->ref_count = 1;
61
62 return LXB_STATUS_OK;
63}
64
65void
67{
68 parser->original_tree = NULL;
69 parser->form = NULL;
70 parser->root = NULL;
71
73
76}
77
80{
81 if (parser == NULL) {
82 return NULL;
83 }
84
85 parser->tkz = lxb_html_tokenizer_unref(parser->tkz);
86 parser->tree = lxb_html_tree_unref(parser->tree);
87
88 return lexbor_free(parser);
89}
90
93{
94 if (parser == NULL) {
95 return NULL;
96 }
97
98 parser->ref_count++;
99
100 return parser;
101}
102
105{
106 if (parser == NULL || parser->ref_count == 0) {
107 return NULL;
108 }
109
110 parser->ref_count--;
111
112 if (parser->ref_count == 0) {
114 }
115
116 return NULL;
117}
118
119
121lxb_html_parse(lxb_html_parser_t *parser, const lxb_char_t *html, size_t size)
122{
124 if (document == NULL) {
125 return NULL;
126 }
127
128 lxb_html_parse_chunk_process(parser, html, size);
129 if (parser->status != LXB_STATUS_OK) {
130 goto failed;
131 }
132
134 if (parser->status != LXB_STATUS_OK) {
135 goto failed;
136 }
137
138 return document;
139
140failed:
141
143
144 return NULL;
145}
146
149 const lxb_char_t *html, size_t size)
150{
152 parser->tree->document,
153 element->element.node.local_name,
154 element->element.node.ns,
155 html, size);
156}
157
160 lxb_html_document_t *document,
161 lxb_tag_id_t tag_id, lxb_ns_id_t ns,
162 const lxb_char_t *html, size_t size)
163{
164 lxb_html_parse_fragment_chunk_begin(parser, document, tag_id, ns);
165 if (parser->status != LXB_STATUS_OK) {
166 return NULL;
167 }
168
170 if (parser->status != LXB_STATUS_OK) {
171 return NULL;
172 }
173
175}
176
179 lxb_html_document_t *document,
180 lxb_tag_id_t tag_id, lxb_ns_id_t ns)
181{
183 lxb_html_document_t *new_doc;
184
185 if (parser->state != LXB_HTML_PARSER_STATE_BEGIN) {
186 lxb_html_parser_clean(parser);
187 }
188
190
191 new_doc = lxb_html_document_interface_create(document);
192 if (new_doc == NULL) {
194 return parser->status;
195 }
196
197 doc = lxb_dom_interface_document(new_doc);
198
199 if (document == NULL) {
200 doc->scripting = parser->tree->scripting;
202 }
203
204 lxb_html_tokenizer_set_state_by_tag(parser->tkz, doc->scripting, tag_id, ns);
206
208 if (parser->root == NULL) {
210
211 goto done;
212 }
213
215 parser->root);
217
218 parser->tree->fragment = lxb_html_interface_create(new_doc, tag_id, ns);
219 if (parser->tree->fragment == NULL) {
221
222 goto done;
223 }
224
225 /* Contains just the single element root */
226 parser->status = lxb_html_tree_open_elements_push(parser->tree, parser->root);
227 if (parser->status != LXB_STATUS_OK) {
228 goto done;
229 }
230
231 if (tag_id == LXB_TAG_TEMPLATE && ns == LXB_NS_HTML) {
234 if (parser->status != LXB_STATUS_OK) {
235 goto done;
236 }
237 }
238
239 lxb_html_tree_attach_document(parser->tree, new_doc);
241
242 if (tag_id == LXB_TAG_FORM && ns == LXB_NS_HTML) {
243 parser->form = lxb_html_interface_create(new_doc,
245 if (parser->form == NULL) {
247
248 goto done;
249 }
250
251 parser->tree->form = lxb_html_interface_form(parser->form);
252 }
253
254 parser->original_tree = lxb_html_tokenizer_tree(parser->tkz);
255 lxb_html_tokenizer_tree_set(parser->tkz, parser->tree);
256
257 lxb_html_tokenizer_tags_set(parser->tkz, doc->tags);
260
261 parser->status = lxb_html_tree_begin(parser->tree, new_doc);
262
263done:
264
265 if (parser->status != LXB_STATUS_OK) {
266 if (parser->root != NULL) {
268 }
269
271 parser->root = NULL;
272
273 lxb_html_parse_fragment_chunk_destroy(parser);
274 }
275
276 return parser->status;
277}
278
281 const lxb_char_t *html, size_t size)
282{
285 }
286
287 parser->status = lxb_html_tree_chunk(parser->tree, html, size);
288 if (parser->status != LXB_STATUS_OK) {
290
292 parser->root = NULL;
293
294 lxb_html_parse_fragment_chunk_destroy(parser);
295 }
296
297 return parser->status;
298}
299
302{
305
306 return NULL;
307 }
308
309 parser->status = lxb_html_tree_end(parser->tree);
310 if (parser->status != LXB_STATUS_OK) {
312
313 parser->root = NULL;
314 }
315
316 lxb_html_parse_fragment_chunk_destroy(parser);
317
319
321
322 return parser->root;
323}
324
325static void
326lxb_html_parse_fragment_chunk_destroy(lxb_html_parser_t *parser)
327{
329
330 if (parser->form != NULL) {
332
333 parser->form = NULL;
334 }
335
336 if (parser->tree->fragment != NULL) {
338
339 parser->tree->fragment = NULL;
340 }
341
342 if (lxb_html_document_is_original(parser->tree->document) == false) {
343 if (parser->root != NULL) {
344 doc = lxb_dom_interface_node(parser->tree->document)->owner_document;
345 parser->root->parent = &doc->node;
346 }
347
349
350 parser->tree->document = NULL;
351 }
352}
353
356 lxb_html_document_t *document)
357{
359
360 parser->original_tree = lxb_html_tokenizer_tree(parser->tkz);
361 lxb_html_tokenizer_tree_set(parser->tkz, parser->tree);
362
366
367 parser->status = lxb_html_tree_begin(parser->tree, document);
368 if (parser->status != LXB_STATUS_OK) {
370 }
371
372 return parser->status;
373}
374
377{
378 lxb_html_document_t *document;
379
380 if (parser->state != LXB_HTML_PARSER_STATE_BEGIN) {
381 lxb_html_parser_clean(parser);
382 }
383
385 if (document == NULL) {
388
389 return lxb_html_document_destroy(document);
390 }
391
392 document->dom_document.scripting = parser->tree->scripting;
393
394 parser->status = lxb_html_parse_chunk_prepare(parser, document);
395 if (parser->status != LXB_STATUS_OK) {
396 return lxb_html_document_destroy(document);
397 }
398
399 return document;
400}
401
404 const lxb_char_t *html, size_t size)
405{
406 if (parser->state != LXB_HTML_PARSER_STATE_PROCESS) {
408 }
409
410 parser->status = lxb_html_tree_chunk(parser->tree, html, size);
411 if (parser->status != LXB_STATUS_OK) {
413 }
414
415 return parser->status;
416}
417
420{
421 if (parser->state != LXB_HTML_PARSER_STATE_PROCESS) {
423 }
424
425 parser->status = lxb_html_tree_end(parser->tree);
426
428
430
431 return parser->status;
432}
433
434/*
435 * No inline functions for ABI.
436 */
442
448
454
460
461bool
466
467void
469{
470 lxb_html_parser_scripting_set(parser, scripting);
471}
@ LXB_STATUS_ERROR_MEMORY_ALLOCATION
Definition base.h:51
@ LXB_STATUS_ERROR_OBJECT_IS_NULL
Definition base.h:52
@ LXB_STATUS_ERROR_WRONG_STAGE
Definition base.h:59
@ LXB_STATUS_OK
Definition base.h:49
#define LXB_API
Definition def.h:48
DNS_STATUS status
Definition dns_win32.c:49
struct lxb_dom_document lxb_dom_document_t
Definition interface.h:41
#define lxb_dom_interface_element(obj)
Definition interface.h:28
#define lxb_dom_interface_node(obj)
Definition interface.h:31
#define lxb_dom_interface_document(obj)
Definition interface.h:25
struct lxb_dom_node lxb_dom_node_t
Definition interface.h:38
@ LXB_DOM_DOCUMENT_CMODE_NO_QUIRKS
Definition document.h:22
lxb_dom_interface_t * lxb_html_interface_destroy(lxb_dom_interface_t *intrfc)
Definition interface.c:120
lxb_dom_interface_t * lxb_html_interface_create(lxb_html_document_t *document, lxb_tag_id_t tag_id, lxb_ns_id_t ns)
Definition interface.c:25
lxb_status_t lxb_html_tokenizer_init(lxb_html_tokenizer_t *tkz)
Definition tokenizer.c:45
lxb_html_tokenizer_t * lxb_html_tokenizer_unref(lxb_html_tokenizer_t *tkz)
Definition tokenizer.c:179
lxb_html_tokenizer_t * lxb_html_tokenizer_create(void)
Definition tokenizer.c:39
void lxb_html_tokenizer_set_state_by_tag(lxb_html_tokenizer_t *tkz, bool scripting, lxb_tag_id_t tag_id, lxb_ns_id_t ns)
Definition tokenizer.c:405
void lxb_html_tokenizer_clean(lxb_html_tokenizer_t *tkz)
Definition tokenizer.c:199
new_type size
Definition ffi.c:4365
lxb_html_form_element_t * lxb_html_form_element_interface_destroy(lxb_html_form_element_t *form_element)
#define NULL
Definition gdcache.h:45
struct lxb_html_tokenizer lxb_html_tokenizer_t
Definition base.h:26
struct lxb_html_tree lxb_html_tree_t
Definition base.h:28
#define lxb_html_interface_html(obj)
Definition interface.h:46
#define lxb_html_interface_form(obj)
Definition interface.h:40
struct lxb_html_element lxb_html_element_t
Definition interface.h:111
struct lxb_html_document lxb_html_document_t
Definition interface.h:95
lxb_inline bool lxb_html_document_is_original(lxb_html_document_t *document)
Definition document.h:230
lxb_status_t lxb_html_parse_fragment_chunk_begin(lxb_html_parser_t *parser, lxb_html_document_t *document, lxb_tag_id_t tag_id, lxb_ns_id_t ns)
Definition parser.c:178
lxb_html_tokenizer_t * lxb_html_parser_tokenizer_noi(lxb_html_parser_t *parser)
Definition parser.c:438
lxb_dom_node_t * lxb_html_parse_fragment_chunk_end(lxb_html_parser_t *parser)
Definition parser.c:301
lxb_dom_node_t * lxb_html_parse_fragment(lxb_html_parser_t *parser, lxb_html_element_t *element, const lxb_char_t *html, size_t size)
Definition parser.c:148
lxb_html_parser_t * lxb_html_parser_ref(lxb_html_parser_t *parser)
Definition parser.c:92
lxb_html_parser_t * lxb_html_parser_destroy(lxb_html_parser_t *parser)
Definition parser.c:79
lxb_status_t lxb_html_parse_chunk_end(lxb_html_parser_t *parser)
Definition parser.c:419
lxb_dom_node_t * lxb_html_parse_fragment_by_tag_id(lxb_html_parser_t *parser, lxb_html_document_t *document, lxb_tag_id_t tag_id, lxb_ns_id_t ns, const lxb_char_t *html, size_t size)
Definition parser.c:159
lxb_status_t lxb_html_parse_chunk_process(lxb_html_parser_t *parser, const lxb_char_t *html, size_t size)
Definition parser.c:403
lxb_html_document_t * lxb_html_parse_chunk_begin(lxb_html_parser_t *parser)
Definition parser.c:376
LXB_API lxb_status_t lxb_html_parse_chunk_prepare(lxb_html_parser_t *parser, lxb_html_document_t *document)
Definition parser.c:355
void lxb_html_parser_clean(lxb_html_parser_t *parser)
Definition parser.c:66
lxb_status_t lxb_html_parser_state_noi(lxb_html_parser_t *parser)
Definition parser.c:456
lxb_html_document_t * lxb_html_parse(lxb_html_parser_t *parser, const lxb_char_t *html, size_t size)
Definition parser.c:121
lxb_status_t lxb_html_parser_init(lxb_html_parser_t *parser)
Definition parser.c:32
lxb_html_parser_t * lxb_html_parser_unref(lxb_html_parser_t *parser)
Definition parser.c:104
lxb_status_t lxb_html_parser_status_noi(lxb_html_parser_t *parser)
Definition parser.c:450
bool lxb_html_parser_scripting_noi(lxb_html_parser_t *parser)
Definition parser.c:462
lxb_status_t lxb_html_parse_fragment_chunk_process(lxb_html_parser_t *parser, const lxb_char_t *html, size_t size)
Definition parser.c:280
lxb_html_parser_t * lxb_html_parser_create(void)
Definition parser.c:26
lxb_html_tree_t * lxb_html_parser_tree_noi(lxb_html_parser_t *parser)
Definition parser.c:444
void lxb_html_parser_scripting_set_noi(lxb_html_parser_t *parser, bool scripting)
Definition parser.c:468
lxb_inline lxb_status_t lxb_html_parser_status(lxb_html_parser_t *parser)
Definition parser.h:120
lxb_inline lxb_html_tokenizer_t * lxb_html_parser_tokenizer(lxb_html_parser_t *parser)
Definition parser.h:108
lxb_inline bool lxb_html_parser_scripting(lxb_html_parser_t *parser)
Definition parser.h:132
lxb_inline lxb_html_tree_t * lxb_html_parser_tree(lxb_html_parser_t *parser)
Definition parser.h:114
@ LXB_HTML_PARSER_STATE_ERROR
Definition parser.h:26
@ LXB_HTML_PARSER_STATE_BEGIN
Definition parser.h:22
@ LXB_HTML_PARSER_STATE_END
Definition parser.h:24
@ LXB_HTML_PARSER_STATE_FRAGMENT_PROCESS
Definition parser.h:25
@ LXB_HTML_PARSER_STATE_PROCESS
Definition parser.h:23
lxb_inline void lxb_html_parser_scripting_set(lxb_html_parser_t *parser, bool scripting)
Definition parser.h:138
lxb_inline lxb_status_t lxb_html_parser_state(lxb_html_parser_t *parser)
Definition parser.h:126
lxb_inline void lxb_html_tokenizer_attrs_set(lxb_html_tokenizer_t *tkz, lexbor_hash_t *attrs)
Definition tokenizer.h:182
lxb_inline void lxb_html_tokenizer_tree_set(lxb_html_tokenizer_t *tkz, lxb_html_tree_t *tree)
Definition tokenizer.h:242
lxb_inline void lxb_html_tokenizer_tmp_tag_id_set(lxb_html_tokenizer_t *tkz, lxb_tag_id_t tag_id)
Definition tokenizer.h:229
lxb_inline lxb_html_tree_t * lxb_html_tokenizer_tree(lxb_html_tokenizer_t *tkz)
Definition tokenizer.h:236
lxb_inline void lxb_html_tokenizer_attrs_mraw_set(lxb_html_tokenizer_t *tkz, lexbor_mraw_t *mraw)
Definition tokenizer.h:194
lxb_inline void lxb_html_tokenizer_tags_set(lxb_html_tokenizer_t *tkz, lexbor_hash_t *tags)
Definition tokenizer.h:170
lxb_html_html_element_t * lxb_html_html_element_interface_destroy(lxb_html_html_element_t *html_element)
LXB_API bool lxb_html_tree_insertion_mode_in_template(lxb_html_tree_t *tree, lxb_html_token_t *token)
void lxb_dom_document_attach_element(lxb_dom_document_t *document, lxb_dom_element_t *element)
Definition document.c:231
void lxb_dom_node_insert_child_wo_events(lxb_dom_node_t *to, lxb_dom_node_t *node)
Definition node.c:368
lxb_html_document_t * lxb_html_document_interface_create(lxb_html_document_t *document)
Definition document.c:122
lxb_html_document_t * lxb_html_document_destroy(lxb_html_document_t *document)
Definition document.c:721
lxb_html_document_t * lxb_html_document_interface_destroy(lxb_html_document_t *document)
Definition document.c:166
void lxb_html_tree_clean(lxb_html_tree_t *tree)
Definition tree.c:156
lxb_html_tree_t * lxb_html_tree_create(void)
Definition tree.c:47
lxb_html_tree_t * lxb_html_tree_unref(lxb_html_tree_t *tree)
Definition tree.c:140
lxb_status_t lxb_html_tree_init(lxb_html_tree_t *tree, lxb_html_tokenizer_t *tkz)
Definition tree.c:53
void lxb_html_tree_reset_insertion_mode_appropriately(lxb_html_tree_t *tree)
Definition tree.c:999
LXB_API void * lexbor_free(void *dst)
Definition memory.c:33
LXB_API void * lexbor_calloc(size_t num, size_t size)
Definition memory.c:27
uintptr_t lxb_ns_id_t
Definition const.h:20
@ LXB_NS_HTML
Definition const.h:26
lxb_inline lxb_status_t lxb_html_tree_open_elements_push(lxb_html_tree_t *tree, lxb_dom_node_t *node)
lxb_dom_node_t node
Definition document.h:36
lxb_dom_document_cmode_t compat_mode
Definition document.h:38
lexbor_hash_t * tags
Definition document.h:55
lexbor_mraw_t * text
Definition document.h:54
lexbor_hash_t * attrs
Definition document.h:56
lxb_dom_node_t node
Definition element.h:33
uintptr_t ns
Definition node.h:48
uintptr_t local_name
Definition node.h:46
lxb_dom_node_t * parent
Definition node.h:54
lxb_dom_document_t dom_document
Definition document.h:58
lxb_dom_element_t element
Definition element.h:23
lxb_html_tree_t * original_tree
Definition parser.h:33
lxb_html_parser_state_t state
Definition parser.h:38
lxb_dom_node_t * root
Definition parser.h:35
lxb_html_tokenizer_t * tkz
Definition parser.h:31
lxb_html_tree_t * tree
Definition parser.h:32
size_t ref_count
Definition parser.h:41
lxb_dom_node_t * form
Definition parser.h:36
lxb_status_t status
Definition parser.h:39
bool scripting
Definition tree.h:57
lxb_html_form_element_t * form
Definition tree.h:45
lxb_html_document_t * document
Definition tree.h:42
lxb_dom_node_t * fragment
Definition tree.h:43
@ LXB_TAG_TEMPLATE
Definition const.h:203
@ LXB_TAG_HTML
Definition const.h:125
@ LXB_TAG_FORM
Definition const.h:111
@ LXB_TAG__UNDEF
Definition const.h:24
uintptr_t lxb_tag_id_t
Definition const.h:21
lxb_inline lxb_status_t lxb_html_tree_template_insertion_push(lxb_html_tree_t *tree, lxb_html_tree_insertion_mode_f mode)
lxb_inline void lxb_html_tree_attach_document(lxb_html_tree_t *tree, lxb_html_document_t *doc)
Definition tree.h:372
lxb_inline lxb_status_t lxb_html_tree_begin(lxb_html_tree_t *tree, lxb_html_document_t *document)
Definition tree.h:231
lxb_inline lxb_status_t lxb_html_tree_chunk(lxb_html_tree_t *tree, const lxb_char_t *html, size_t size)
Definition tree.h:239
lxb_inline lxb_status_t lxb_html_tree_end(lxb_html_tree_t *tree)
Definition tree.h:245
unsigned int lxb_status_t
Definition types.h:28
unsigned char lxb_char_t
Definition types.h:27