php-internal-docs 8.4.8
Unofficial docs for php/php-src
Loading...
Searching...
No Matches
foreign_content.c
Go to the documentation of this file.
1/*
2 * Copyright (C) 2018-2020 Alexander Borisov
3 *
4 * Author: Alexander Borisov <borisov@lexbor.com>
5 */
6
10
11#define LEXBOR_TOKENIZER_CHARS_MAP
12#define LEXBOR_STR_RES_ANSI_REPLACEMENT_CHARACTER
13#include "lexbor/core/str_res.h"
14
15
18 const lxb_char_t *prefix, size_t prefix_len,
19 const lxb_char_t *lname, size_t lname_len);
20
21
22lxb_inline bool
24 lxb_html_token_t *token)
25{
26 if (tree->open_elements->length == 0) {
27 return tree->mode(tree, token);
28 }
29
31
32 size_t idx = tree->open_elements->length - 1;
33
34 if (idx > 0 && list[idx]->local_name != token->tag_id) {
35 lxb_html_tree_parse_error(tree, token,
37 }
38
39 while (idx != 0) {
40 if (list[idx]->local_name == token->tag_id) {
41 lxb_html_tree_open_elements_pop_until_node(tree, list[idx], true);
42
43 return true;
44 }
45
46 idx--;
47
48 if (list[idx]->ns == LXB_NS_HTML) {
49 break;
50 }
51 }
52
53 return tree->mode(tree, token);
54}
55
56/*
57 * TODO: Need to process script
58 */
59lxb_inline bool
74
75lxb_inline bool
77 lxb_html_token_t *token)
78{
79 lxb_html_element_t *element;
80 const lxb_html_tag_fixname_t *fixname_svg;
82
83 if (node->ns == LXB_NS_MATH) {
85 }
86 else if (node->ns == LXB_NS_SVG) {
88 }
89
90 element = lxb_html_tree_insert_foreign_element(tree, token, node->ns);
91 if (element == NULL) {
94
95 return lxb_html_tree_process_abort(tree);
96 }
97
98 if (node->ns == LXB_NS_SVG) {
99 fixname_svg = lxb_html_tag_fixname_svg(element->element.node.local_name);
100 if (fixname_svg != NULL && fixname_svg->name != NULL) {
102 fixname_svg->name,
103 (size_t) fixname_svg->len);
104 }
105 }
106
107 tree->before_append_attr = NULL;
108
109 if ((token->type & LXB_HTML_TOKEN_TYPE_CLOSE_SELF) == 0) {
110 return true;
111 }
112
113 node = lxb_html_tree_current_node(tree);
114
115 if (token->tag_id == LXB_TAG_SCRIPT && node->ns == LXB_NS_SVG) {
117 }
118 else {
120 }
121
122 return true;
123}
124
125lxb_inline bool
127 lxb_html_token_t *token)
128{
129 lexbor_str_t str;
130
131 if (token->null_count != 0) {
133
135 tree->document->dom_document.text);
136 }
137 else {
138 tree->status = lxb_html_token_make_text(token, &str,
139 tree->document->dom_document.text);
140 }
141
142 if (tree->status != LXB_STATUS_OK) {
143 return lxb_html_tree_process_abort(tree);
144 }
145
146 /* Can be zero only if all NULL are gone */
147 if (str.length == 0) {
148 lexbor_str_destroy(&str, tree->document->dom_document.text, false);
149
150 return true;
151 }
152
153 if (tree->frameset_ok) {
154 const lxb_char_t *pos = str.data;
155 const lxb_char_t *end = str.data + str.length;
156
157 static const lxb_char_t *rep = lexbor_str_res_ansi_replacement_character;
158 static const unsigned rep_len = sizeof(lexbor_str_res_ansi_replacement_character) - 1;
159
160 while (pos != end) {
161 /* Need skip U+FFFD REPLACEMENT CHARACTER */
162 if (*pos == *rep) {
163 if ((end - pos) < rep_len) {
164 tree->frameset_ok = false;
165
166 break;
167 }
168
169 if (memcmp(pos, rep, sizeof(lxb_char_t) * rep_len) != 0) {
170 tree->frameset_ok = false;
171
172 break;
173 }
174
175 pos = pos + rep_len;
176
177 continue;
178 }
179
180 if (lexbor_tokenizer_chars_map[*pos]
182 {
183 tree->frameset_ok = false;
184
185 break;
186 }
187
188 pos++;
189 }
190 }
191
193 if (tree->status != LXB_STATUS_OK) {
194 return lxb_html_tree_process_abort(tree);
195 }
196
197 return true;
198}
199
200lxb_inline bool
202 lxb_html_token_t *token)
203{
204 lxb_dom_comment_t *comment;
205
206 comment = lxb_html_tree_insert_comment(tree, token, NULL);
207 if (comment == NULL) {
209
210 return lxb_html_tree_process_abort(tree);
211 }
212
213 return true;
214}
215
216lxb_inline bool
224
225/*
226 * "b", "big", "blockquote", "body", "br", "center", "code", "dd", "div", "dl",
227 * "dt", "em", "embed", "h1", "h2", "h3", "h4", "h5", "h6", "head", "hr", "i",
228 * "img", "li", "listing", "menu", "meta", "nobr", "ol", "p", "pre", "ruby",
229 * "s", "small", "span", "strong", "strike", "sub", "sup", "table", "tt", "u",
230 * "ul", "var"
231 * "font", if the token has any attributes named "color", "face", or "size"
232 */
233lxb_inline bool
235 lxb_html_token_t *token)
236{
237 lxb_dom_node_t *node;
238
239 if (token->tag_id == LXB_TAG_FONT) {
241
242 while (attr != NULL) {
243 if (attr->name != NULL
244 && (attr->name->attr_id == LXB_DOM_ATTR_COLOR
245 || attr->name->attr_id == LXB_DOM_ATTR_FACE
246 || attr->name->attr_id == LXB_DOM_ATTR_SIZE))
247 {
248 goto go_next;
249 }
250
251 attr = attr->next;
252 }
253
255 token);
256 }
257
258go_next:
259
261
262 if (tree->fragment != NULL) {
264 token);
265 }
266
267 do {
269
270 node = lxb_html_tree_current_node(tree);
271 }
272 while (node &&
275 || node->ns == LXB_NS_HTML));
276
277 return false;
278}
279
280bool
282 lxb_html_token_t *token)
283{
284 if (token->type & LXB_HTML_TOKEN_TYPE_CLOSE) {
285 switch (token->tag_id) {
286 case LXB_TAG_SCRIPT:
288 token);
289 default:
291 token);
292 }
293 }
294
295 switch (token->tag_id) {
296 case LXB_TAG__TEXT:
298 token);
301 token);
304 token);
305
306 case LXB_TAG_B:
307 case LXB_TAG_BIG:
309 case LXB_TAG_BODY:
310 case LXB_TAG_BR:
311 case LXB_TAG_CENTER:
312 case LXB_TAG_CODE:
313 case LXB_TAG_DD:
314 case LXB_TAG_DIV:
315 case LXB_TAG_DL:
316 case LXB_TAG_DT:
317 case LXB_TAG_EM:
318 case LXB_TAG_EMBED:
319 case LXB_TAG_H1:
320 case LXB_TAG_H2:
321 case LXB_TAG_H3:
322 case LXB_TAG_H4:
323 case LXB_TAG_H5:
324 case LXB_TAG_H6:
325 case LXB_TAG_HEAD:
326 case LXB_TAG_HR:
327 case LXB_TAG_I:
328 case LXB_TAG_IMG:
329 case LXB_TAG_LI:
330 case LXB_TAG_LISTING:
331 case LXB_TAG_MENU:
332 case LXB_TAG_META:
333 case LXB_TAG_NOBR:
334 case LXB_TAG_OL:
335 case LXB_TAG_P:
336 case LXB_TAG_PRE:
337 case LXB_TAG_RUBY:
338 case LXB_TAG_S:
339 case LXB_TAG_SMALL:
340 case LXB_TAG_SPAN:
341 case LXB_TAG_STRONG:
342 case LXB_TAG_STRIKE:
343 case LXB_TAG_SUB:
344 case LXB_TAG_TABLE:
345 case LXB_TAG_TT:
346 case LXB_TAG_U:
347 case LXB_TAG_UL:
348 case LXB_TAG_VAR:
349 case LXB_TAG_FONT:
351 token);
352 default:
354 token);
355 }
356}
@ LXB_DOM_ATTR_SIZE
Definition attr_const.h:49
@ LXB_DOM_ATTR_FACE
Definition attr_const.h:31
@ LXB_DOM_ATTR_COLOR
Definition attr_const.h:27
@ LXB_STATUS_ERROR_MEMORY_ALLOCATION
Definition base.h:51
@ LXB_STATUS_OK
Definition base.h:49
struct lxb_dom_comment lxb_dom_comment_t
Definition interface.h:49
struct lxb_dom_node lxb_dom_node_t
Definition interface.h:38
struct lxb_dom_element lxb_dom_element_t
Definition interface.h:39
new_type attr
Definition ffi.c:4364
lxb_inline bool lxb_html_tree_insertion_mode_foreign_content_text(lxb_html_tree_t *tree, lxb_html_token_t *token)
lxb_inline bool lxb_html_tree_insertion_mode_foreign_content_anything_else(lxb_html_tree_t *tree, lxb_html_token_t *token)
lxb_inline bool lxb_html_tree_insertion_mode_foreign_content_all(lxb_html_tree_t *tree, lxb_html_token_t *token)
lxb_status_t lxb_dom_element_qualified_name_set(lxb_dom_element_t *element, const lxb_char_t *prefix, size_t prefix_len, const lxb_char_t *lname, size_t lname_len)
Definition element.c:119
bool lxb_html_tree_insertion_mode_foreign_content(lxb_html_tree_t *tree, lxb_html_token_t *token)
lxb_inline bool lxb_html_tree_insertion_mode_foreign_content_script_closed(lxb_html_tree_t *tree, lxb_html_token_t *token)
lxb_inline bool lxb_html_tree_insertion_mode_foreign_content_comment(lxb_html_tree_t *tree, lxb_html_token_t *token)
lxb_inline bool lxb_html_tree_insertion_mode_foreign_content_anything_else_closed(lxb_html_tree_t *tree, lxb_html_token_t *token)
lxb_inline bool lxb_html_tree_insertion_mode_foreign_content_doctype(lxb_html_tree_t *tree, lxb_html_token_t *token)
#define NULL
Definition gdcache.h:45
#define prefix
struct lxb_html_tree lxb_html_tree_t
Definition base.h:28
struct lxb_html_element lxb_html_element_t
Definition interface.h:111
lxb_inline const lxb_html_tag_fixname_t * lxb_html_tag_fixname_svg(lxb_tag_id_t tag_id)
Definition tag.h:62
lxb_status_t lxb_html_token_make_text(lxb_html_token_t *token, lexbor_str_t *str, lexbor_mraw_t *mraw)
Definition token.c:91
lxb_status_t lxb_html_token_make_text_replace_null(lxb_html_token_t *token, lexbor_str_t *str, lexbor_mraw_t *mraw)
Definition token.c:141
@ LXB_HTML_TOKEN_TYPE_CLOSE
Definition token.h:27
@ LXB_HTML_TOKEN_TYPE_CLOSE_SELF
Definition token.h:28
@ LXB_HTML_RULES_ERROR_NUCH
Definition error.h:26
@ LXB_HTML_RULES_ERROR_UNELINOPELST
Definition error.h:66
@ LXB_HTML_RULES_ERROR_UNTO
Definition error.h:22
@ LXB_HTML_RULES_ERROR_DOTOFOCOMO
Definition error.h:92
lxb_status_t lxb_html_tree_adjust_attributes_mathml(lxb_html_tree_t *tree, lxb_dom_attr_t *attr, void *ctx)
Definition tree.c:1706
lxb_html_element_t * lxb_html_tree_insert_foreign_element(lxb_html_tree_t *tree, lxb_html_token_t *token, lxb_ns_id_t ns)
Definition tree.c:392
bool lxb_html_tree_process_abort(lxb_html_tree_t *tree)
Definition tree.c:224
bool lxb_html_tree_html_integration_point(lxb_dom_node_t *node)
Definition tree.c:1664
lxb_dom_comment_t * lxb_html_tree_insert_comment(lxb_html_tree_t *tree, lxb_html_token_t *token, lxb_dom_node_t *pos)
Definition tree.c:790
lxb_status_t lxb_html_tree_insert_character_for_data(lxb_html_tree_t *tree, lexbor_str_t *str, lxb_dom_node_t **ret_node)
Definition tree.c:697
lxb_status_t lxb_html_tree_adjust_attributes_svg(lxb_html_tree_t *tree, lxb_dom_attr_t *attr, void *ctx)
Definition tree.c:1720
void lxb_html_tree_parse_error(lxb_html_tree_t *tree, lxb_html_token_t *token, lxb_html_tree_error_id_t id)
Definition tree.c:237
@ LXB_NS_MATH
Definition const.h:27
@ LXB_NS_SVG
Definition const.h:28
@ LXB_NS_HTML
Definition const.h:26
void lxb_html_tree_open_elements_pop_until_node(lxb_html_tree_t *tree, lxb_dom_node_t *node, bool exclude)
lxb_inline lxb_dom_node_t * lxb_html_tree_open_elements_pop(lxb_html_tree_t *tree)
unsigned const char * end
Definition php_ffi.h:51
unsigned const char * pos
Definition php_ffi.h:52
lexbor_str_t * lexbor_str_destroy(lexbor_str_t *str, lexbor_mraw_t *mraw, bool destroy_obj)
Definition str.c:76
#define LEXBOR_STR_RES_MAP_CHAR_WHITESPACE
Definition str_res.h:12
size_t length
Definition array.h:20
void ** list
Definition array.h:18
lxb_char_t * data
Definition str.h:47
size_t length
Definition str.h:48
lexbor_mraw_t * text
Definition document.h:54
lxb_dom_node_t node
Definition element.h:33
uintptr_t ns
Definition node.h:48
uintptr_t local_name
Definition node.h:46
lxb_dom_document_t dom_document
Definition document.h:58
lxb_dom_element_t element
Definition element.h:23
const lxb_char_t * name
Definition tag.h:36
unsigned int len
Definition tag.h:37
lxb_html_token_type_t type
Definition token.h:49
size_t null_count
Definition token.h:47
lxb_tag_id_t tag_id
Definition token.h:48
lxb_html_token_attr_t * attr_first
Definition token.h:42
lxb_html_tree_append_attr_f before_append_attr
Definition tree.h:64
bool frameset_ok
Definition tree.h:56
lxb_status_t status
Definition tree.h:66
lxb_html_tree_insertion_mode_f mode
Definition tree.h:62
lexbor_array_t * open_elements
Definition tree.h:47
lxb_html_document_t * document
Definition tree.h:42
lxb_dom_node_t * fragment
Definition tree.h:43
@ LXB_TAG_OL
Definition const.h:165
@ LXB_TAG_DD
Definition const.h:68
@ LXB_TAG__EM_DOCTYPE
Definition const.h:29
@ LXB_TAG_CODE
Definition const.h:63
@ LXB_TAG_VAR
Definition const.h:216
@ LXB_TAG_HR
Definition const.h:124
@ LXB_TAG_SUB
Definition const.h:196
@ LXB_TAG_H4
Definition const.h:118
@ LXB_TAG_PRE
Definition const.h:174
@ LXB_TAG_UL
Definition const.h:215
@ LXB_TAG_H5
Definition const.h:119
@ LXB_TAG_STRIKE
Definition const.h:193
@ LXB_TAG_H1
Definition const.h:115
@ LXB_TAG_HEAD
Definition const.h:121
@ LXB_TAG_LI
Definition const.h:137
@ LXB_TAG_BLOCKQUOTE
Definition const.h:54
@ LXB_TAG_EM
Definition const.h:78
@ LXB_TAG__EM_COMMENT
Definition const.h:28
@ LXB_TAG_SMALL
Definition const.h:189
@ LXB_TAG_STRONG
Definition const.h:194
@ LXB_TAG_BODY
Definition const.h:55
@ LXB_TAG_META
Definition const.h:148
@ LXB_TAG_B
Definition const.h:46
@ LXB_TAG_CENTER
Definition const.h:60
@ LXB_TAG_I
Definition const.h:126
@ LXB_TAG_U
Definition const.h:214
@ LXB_TAG_DL
Definition const.h:76
@ LXB_TAG_H2
Definition const.h:116
@ LXB_TAG_IMG
Definition const.h:129
@ LXB_TAG_TABLE
Definition const.h:200
@ LXB_TAG_NOBR
Definition const.h:160
@ LXB_TAG_H6
Definition const.h:120
@ LXB_TAG_SCRIPT
Definition const.h:185
@ LXB_TAG_TT
Definition const.h:213
@ LXB_TAG_DT
Definition const.h:77
@ LXB_TAG_P
Definition const.h:169
@ LXB_TAG_LISTING
Definition const.h:140
@ LXB_TAG_BIG
Definition const.h:52
@ LXB_TAG_FONT
Definition const.h:108
@ LXB_TAG_DIV
Definition const.h:75
@ LXB_TAG_BR
Definition const.h:56
@ LXB_TAG_EMBED
Definition const.h:79
@ LXB_TAG_RUBY
Definition const.h:182
@ LXB_TAG_SPAN
Definition const.h:192
@ LXB_TAG_MENU
Definition const.h:147
@ LXB_TAG_S
Definition const.h:183
@ LXB_TAG__TEXT
Definition const.h:26
@ LXB_TAG_H3
Definition const.h:117
struct lxb_html_token_attr lxb_html_token_attr_t
Definition token_attr.h:22
lxb_inline bool lxb_html_tree_mathml_text_integration_point(lxb_dom_node_t *node)
Definition tree.h:343
lxb_inline lxb_dom_node_t * lxb_html_tree_current_node(lxb_html_tree_t *tree)
Definition tree.h:286
lxb_inline lxb_dom_node_t * lxb_html_tree_adjusted_current_node(lxb_html_tree_t *tree)
Definition tree.h:297
unsigned int lxb_status_t
Definition types.h:28
#define lxb_inline
Definition types.h:21
unsigned char lxb_char_t
Definition types.h:27