php-internal-docs 8.4.8
Unofficial docs for php/php-src
Loading...
Searching...
No Matches
initial.c
Go to the documentation of this file.
1/*
2 * Copyright (C) 2018-2020 Alexander Borisov
3 *
4 * Author: Alexander Borisov <borisov@lexbor.com>
5 */
6
9
10
11typedef struct {
12 const char *data;
13 size_t len;
14}
16
17
19lxb_html_tree_insertion_mode_initial_doctype_public_is[] =
20{
21 {"-//W3O//DTD W3 HTML Strict 3.0//EN//", 36},
22 {"-/W3C/DTD HTML 4.0 Transitional/EN", 34},
23 {"HTML", 4}
24};
25
27lxb_html_tree_insertion_mode_initial_doctype_system_is[] =
28{
29 {"http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd", 58}
30};
31
33lxb_html_tree_insertion_mode_initial_doctype_public_start[] =
34{
35 {"+//Silmaril//dtd html Pro v0r11 19970101//", 42},
36 {"-//AS//DTD HTML 3.0 asWedit + extensions//", 42},
37 {"-//AdvaSoft Ltd//DTD HTML 3.0 asWedit + extensions//", 52},
38 {"-//IETF//DTD HTML 2.0 Level 1//", 31},
39 {"-//IETF//DTD HTML 2.0 Level 2//", 31},
40 {"-//IETF//DTD HTML 2.0 Strict Level 1//", 38},
41 {"-//IETF//DTD HTML 2.0 Strict Level 2//", 38},
42 {"-//IETF//DTD HTML 2.0 Strict//", 30},
43 {"-//IETF//DTD HTML 2.0//", 23},
44 {"-//IETF//DTD HTML 2.1E//", 24},
45 {"-//IETF//DTD HTML 3.0//", 23},
46 {"-//IETF//DTD HTML 3.2 Final//", 29},
47 {"-//IETF//DTD HTML 3.2//", 23},
48 {"-//IETF//DTD HTML 3//", 21},
49 {"-//IETF//DTD HTML Level 0//", 27},
50 {"-//IETF//DTD HTML Level 1//", 27},
51 {"-//IETF//DTD HTML Level 2//", 27},
52 {"-//IETF//DTD HTML Level 3//", 27},
53 {"-//IETF//DTD HTML Strict Level 0//", 34},
54 {"-//IETF//DTD HTML Strict Level 1//", 34},
55 {"-//IETF//DTD HTML Strict Level 2//", 34},
56 {"-//IETF//DTD HTML Strict Level 3//", 34},
57 {"-//IETF//DTD HTML Strict//", 26},
58 {"-//IETF//DTD HTML//", 19},
59 {"-//Metrius//DTD Metrius Presentational//", 40},
60 {"-//Microsoft//DTD Internet Explorer 2.0 HTML Strict//", 53},
61 {"-//Microsoft//DTD Internet Explorer 2.0 HTML//", 46},
62 {"-//Microsoft//DTD Internet Explorer 2.0 Tables//", 48},
63 {"-//Microsoft//DTD Internet Explorer 3.0 HTML Strict//", 53},
64 {"-//Microsoft//DTD Internet Explorer 3.0 HTML//", 46},
65 {"-//Microsoft//DTD Internet Explorer 3.0 Tables//", 48},
66 {"-//Netscape Comm. Corp.//DTD HTML//", 35},
67 {"-//Netscape Comm. Corp.//DTD Strict HTML//", 42},
68 {"-//O'Reilly and Associates//DTD HTML 2.0//", 42},
69 {"-//O'Reilly and Associates//DTD HTML Extended 1.0//", 51},
70 {"-//O'Reilly and Associates//DTD HTML Extended Relaxed 1.0//", 59},
71 {"-//SQ//DTD HTML 2.0 HoTMetaL + extensions//", 43},
72 {"-//SoftQuad Software//DTD HoTMetaL PRO 6.0::19990601::extensions to HTML 4.0//", 78},
73 {"-//SoftQuad//DTD HoTMetaL PRO 4.0::19971010::extensions to HTML 4.0//", 69},
74 {"-//Spyglass//DTD HTML 2.0 Extended//", 36},
75 {"-//Sun Microsystems Corp.//DTD HotJava HTML//", 45},
76 {"-//Sun Microsystems Corp.//DTD HotJava Strict HTML//", 52},
77 {"-//W3C//DTD HTML 3 1995-03-24//", 31},
78 {"-//W3C//DTD HTML 3.2 Draft//", 28},
79 {"-//W3C//DTD HTML 3.2 Final//", 28},
80 {"-//W3C//DTD HTML 3.2//", 22},
81 {"-//W3C//DTD HTML 3.2S Draft//", 29},
82 {"-//W3C//DTD HTML 4.0 Frameset//", 31},
83 {"-//W3C//DTD HTML 4.0 Transitional//", 35},
84 {"-//W3C//DTD HTML Experimental 19960712//", 40},
85 {"-//W3C//DTD HTML Experimental 970421//", 38},
86 {"-//W3C//DTD W3 HTML//", 21},
87 {"-//W3O//DTD W3 HTML 3.0//", 25},
88 {"-//WebTechs//DTD Mozilla HTML 2.0//", 35},
89 {"-//WebTechs//DTD Mozilla HTML//", 31}
90};
91
93lxb_html_tree_insertion_mode_initial_doctype_sys_pub_start[] =
94{
95 {"-//W3C//DTD HTML 4.01 Frameset//", 32},
96 {"-//W3C//DTD HTML 4.01 Transitional//", 36}
97};
98
100lxb_html_tree_insertion_mode_initial_doctype_lim_pub_start[] =
101{
102 {"-//W3C//DTD XHTML 1.0 Frameset//", 32},
103 {"-//W3C//DTD XHTML 1.0 Transitional//", 36}
104};
105
106
107static bool
108lxb_html_tree_insertion_mode_initial_doctype(lxb_html_tree_t *tree,
109 lxb_html_token_t *token);
110
111static void
112lxb_html_tree_insertion_mode_initial_doctype_ckeck(lxb_html_tree_t *tree,
113 lxb_dom_document_type_t *doc_type,
114 lxb_html_token_t *token, bool is_html);
115
116static bool
117lxb_html_tree_insertion_mode_initial_doctype_ckeck_public(
118 lxb_dom_document_type_t *doc_type);
119
120static bool
121lxb_html_tree_insertion_mode_initial_doctype_ckeck_system(
122 lxb_dom_document_type_t *doc_type);
123
124static bool
125lxb_html_tree_insertion_mode_initial_doctype_ckeck_pubsys(
126 lxb_dom_document_type_t *doc_type);
127
128static bool
129lxb_html_tree_insertion_mode_initial_doctype_check_limq(
130 lxb_dom_document_type_t *doc_type);
131
132
133bool
135 lxb_html_token_t *token)
136{
137 switch (token->tag_id) {
138 case LXB_TAG__EM_COMMENT: {
139 lxb_dom_comment_t *comment;
140
141 comment = lxb_html_tree_insert_comment(tree, token,
143 if (comment == NULL) {
144 return lxb_html_tree_process_abort(tree);
145 }
146
147 break;
148 }
149
152
153 return lxb_html_tree_insertion_mode_initial_doctype(tree, token);
154
155 case LXB_TAG__TEXT:
157 if (tree->status != LXB_STATUS_OK) {
158 return lxb_html_tree_process_abort(tree);
159 }
160
161 if (token->text_start == token->text_end) {
162 return true;
163 }
164 /* fall through */
165
166 default: {
167 lxb_dom_document_t *document = &tree->document->dom_document;
168
169 if (tree->document->iframe_srcdoc == NULL) {
170 lxb_html_tree_parse_error(tree, token,
172
174 }
175
177
178 return false;
179 }
180 }
181
182 return true;
183}
184
185static bool
186lxb_html_tree_insertion_mode_initial_doctype(lxb_html_tree_t *tree,
187 lxb_html_token_t *token)
188{
189 lxb_dom_document_type_t *doc_type;
190
191 /* Create */
192 doc_type = lxb_html_tree_create_document_type_from_token(tree, token);
193 if (doc_type == NULL) {
195
196 return lxb_html_tree_process_abort(tree);
197 }
198
199 /* Check */
200 bool is_html = (doc_type->name == LXB_DOM_ATTR_HTML);
201
202 if (is_html == false
203 || doc_type->public_id.length != 0
204 || (doc_type->system_id.length == 19
205 && strncmp("about:legacy-compat",
206 (const char *) doc_type->system_id.data, 19) != 0)
207 )
208 {
209 lxb_html_tree_parse_error(tree, token,
211 }
212
213 lxb_html_tree_insertion_mode_initial_doctype_ckeck(tree, doc_type,
214 token, is_html);
215
217 lxb_dom_interface_node(doc_type));
218
220
221 return true;
222}
223
224static void
225lxb_html_tree_insertion_mode_initial_doctype_ckeck(lxb_html_tree_t *tree,
226 lxb_dom_document_type_t *doc_type,
227 lxb_html_token_t *token, bool is_html)
228{
229 if (tree->document->iframe_srcdoc != NULL) {
230 return;
231 }
232
233 bool quirks;
234 lxb_dom_document_t *document = &tree->document->dom_document;
235
237 goto set_quirks;
238 }
239
240 if (is_html == false) {
241 goto set_quirks;
242 }
243
244 if (doc_type->public_id.length != 0) {
245 quirks =
246 lxb_html_tree_insertion_mode_initial_doctype_ckeck_public(doc_type);
247
248 if (quirks) {
249 goto set_quirks;
250 }
251 }
252
253 if (doc_type->system_id.length != 0) {
254 quirks =
255 lxb_html_tree_insertion_mode_initial_doctype_ckeck_system(doc_type);
256
257 if (quirks) {
258 goto set_quirks;
259 }
260 }
261
262 if (doc_type->public_id.length != 0 && doc_type->system_id.length == 0) {
263 quirks =
264 lxb_html_tree_insertion_mode_initial_doctype_ckeck_pubsys(doc_type);
265
266 if (quirks) {
267 goto set_quirks;
268 }
269 }
270
271 if (doc_type->public_id.length != 0) {
272 quirks =
273 lxb_html_tree_insertion_mode_initial_doctype_check_limq(doc_type);
274
275 if (quirks) {
277 return;
278 }
279 }
280
281 return;
282
283set_quirks:
284
286}
287
288static bool
289lxb_html_tree_insertion_mode_initial_doctype_ckeck_public(
290 lxb_dom_document_type_t *doc_type)
291{
292 size_t size, i;
294
295 /* The public identifier is set to */
296 size = sizeof(lxb_html_tree_insertion_mode_initial_doctype_public_is)
298
299 for (i = 0; i < size; i++) {
300 str = &lxb_html_tree_insertion_mode_initial_doctype_public_is[i];
301
302 if (str->len == doc_type->public_id.length
303 && lexbor_str_data_casecmp((const lxb_char_t *) str->data,
304 doc_type->public_id.data))
305 {
306 return true;
307 }
308 }
309
310 /* The public identifier starts with */
311 size = sizeof(lxb_html_tree_insertion_mode_initial_doctype_public_start)
313
314 for (i = 0; i < size; i++) {
315 str = &lxb_html_tree_insertion_mode_initial_doctype_public_start[i];
316
317 if (str->len <= doc_type->public_id.length
318 && lexbor_str_data_ncasecmp((const lxb_char_t *) str->data,
319 doc_type->public_id.data, str->len))
320 {
321 return true;
322 }
323 }
324
325 return false;
326}
327
328static bool
329lxb_html_tree_insertion_mode_initial_doctype_ckeck_system(
330 lxb_dom_document_type_t *doc_type)
331{
332 size_t size;
334
335 /* The system identifier is set to */
336 size = sizeof(lxb_html_tree_insertion_mode_initial_doctype_system_is)
338
339 for (size_t i = 0; i < size; i++) {
340 str = &lxb_html_tree_insertion_mode_initial_doctype_system_is[i];
341
342 if (str->len == doc_type->system_id.length
343 && lexbor_str_data_casecmp((const lxb_char_t *) str->data,
344 doc_type->system_id.data))
345 {
346 return true;
347 }
348 }
349
350 return false;
351}
352
353static bool
354lxb_html_tree_insertion_mode_initial_doctype_ckeck_pubsys(
355 lxb_dom_document_type_t *doc_type)
356{
357 size_t size;
359
360 /* The system identifier is missing and the public identifier starts with */
361 size = sizeof(lxb_html_tree_insertion_mode_initial_doctype_sys_pub_start)
363
364 for (size_t i = 0; i < size; i++) {
365 str = &lxb_html_tree_insertion_mode_initial_doctype_sys_pub_start[i];
366
367 if (str->len <= doc_type->public_id.length
368 && lexbor_str_data_ncasecmp((const lxb_char_t *) str->data,
369 doc_type->public_id.data, str->len))
370 {
371 return true;
372 }
373 }
374
375 return false;
376}
377
378static bool
379lxb_html_tree_insertion_mode_initial_doctype_check_limq(
380 lxb_dom_document_type_t *doc_type)
381{
382 bool quirks;
383 size_t size;
385
386 if (doc_type->system_id.length != 0) {
387 quirks =
388 lxb_html_tree_insertion_mode_initial_doctype_ckeck_pubsys(doc_type);
389
390 if (quirks) {
391 return true;
392 }
393 }
394
395 /* The public identifier starts with */
396 size = sizeof(lxb_html_tree_insertion_mode_initial_doctype_lim_pub_start)
398
399 for (size_t i = 0; i < size; i++) {
400 str = &lxb_html_tree_insertion_mode_initial_doctype_lim_pub_start[i];
401
402 if (str->len <= doc_type->public_id.length
403 && lexbor_str_data_ncasecmp((const lxb_char_t *) str->data,
404 doc_type->public_id.data, str->len))
405 {
406 return true;
407 }
408 }
409
410 return false;
411}
@ LXB_DOM_ATTR_HTML
Definition attr_const.h:37
@ LXB_STATUS_ERROR_MEMORY_ALLOCATION
Definition base.h:51
@ LXB_STATUS_OK
Definition base.h:49
struct lxb_dom_document lxb_dom_document_t
Definition interface.h:41
struct lxb_dom_document_type lxb_dom_document_type_t
Definition interface.h:42
#define lxb_dom_interface_node(obj)
Definition interface.h:31
struct lxb_dom_comment lxb_dom_comment_t
Definition interface.h:49
@ LXB_DOM_DOCUMENT_CMODE_QUIRKS
Definition document.h:23
@ LXB_DOM_DOCUMENT_CMODE_LIMITED_QUIRKS
Definition document.h:24
new_type size
Definition ffi.c:4365
#define NULL
Definition gdcache.h:45
struct lxb_html_tree lxb_html_tree_t
Definition base.h:28
lxb_status_t lxb_html_token_data_skip_ws_begin(lxb_html_token_t *token)
Definition token.c:179
@ LXB_HTML_TOKEN_TYPE_FORCE_QUIRKS
Definition token.h:29
@ LXB_HTML_RULES_ERROR_BADOTOININMO
Definition error.h:32
@ LXB_HTML_RULES_ERROR_UNTOININMO
Definition error.h:30
bool lxb_html_tree_insertion_mode_initial(lxb_html_tree_t *tree, lxb_html_token_t *token)
Definition initial.c:134
LXB_API bool lxb_html_tree_insertion_mode_before_html(lxb_html_tree_t *tree, lxb_html_token_t *token)
Definition before_html.c:29
void lxb_dom_document_attach_doctype(lxb_dom_document_t *document, lxb_dom_document_type_t *doctype)
Definition document.c:224
void lxb_dom_node_insert_child_wo_events(lxb_dom_node_t *to, lxb_dom_node_t *node)
Definition node.c:368
bool lxb_html_tree_process_abort(lxb_html_tree_t *tree)
Definition tree.c:224
lxb_dom_comment_t * lxb_html_tree_insert_comment(lxb_html_tree_t *tree, lxb_html_token_t *token, lxb_dom_node_t *pos)
Definition tree.c:790
void lxb_html_tree_parse_error(lxb_html_tree_t *tree, lxb_html_token_t *token, lxb_html_tree_error_id_t id)
Definition tree.c:237
lxb_dom_document_type_t * lxb_html_tree_create_document_type_from_token(lxb_html_tree_t *tree, lxb_html_token_t *token)
Definition tree.c:828
bool lexbor_str_data_ncasecmp(const lxb_char_t *first, const lxb_char_t *sec, size_t size)
Definition str.c:435
bool lexbor_str_data_casecmp(const lxb_char_t *first, const lxb_char_t *sec)
Definition str.c:476
lxb_char_t * data
Definition str.h:47
size_t length
Definition str.h:48
lxb_dom_attr_id_t name
lxb_dom_node_t node
Definition document.h:36
lxb_dom_document_cmode_t compat_mode
Definition document.h:38
lxb_dom_document_t dom_document
Definition document.h:58
void * iframe_srcdoc
Definition document.h:60
lxb_html_token_type_t type
Definition token.h:49
const lxb_char_t * text_end
Definition token.h:40
lxb_tag_id_t tag_id
Definition token.h:48
const lxb_char_t * text_start
Definition token.h:39
lxb_status_t status
Definition tree.h:66
lxb_html_tree_insertion_mode_f mode
Definition tree.h:62
lxb_html_document_t * document
Definition tree.h:42
@ LXB_TAG__EM_DOCTYPE
Definition const.h:29
@ LXB_TAG__EM_COMMENT
Definition const.h:28
@ LXB_TAG__TEXT
Definition const.h:26
unsigned char lxb_char_t
Definition types.h:27
strncmp(string $string1, string $string2, int $length)