php-internal-docs 8.4.8
Unofficial docs for php/php-src
Loading...
Searching...
No Matches
token.c
Go to the documentation of this file.
1/*
2 * Copyright (C) 2018-2020 Alexander Borisov
3 *
4 * Author: Alexander Borisov <borisov@lexbor.com>
5 */
6
7#include "lexbor/html/token.h"
9
10#define LEXBOR_STR_RES_MAP_LOWERCASE
11#define LEXBOR_STR_RES_ANSI_REPLACEMENT_CHARACTER
12#define LEXBOR_STR_RES_MAP_HEX
13#define LEXBOR_STR_RES_MAP_NUM
14#include "lexbor/core/str_res.h"
15
17
18
19const lxb_tag_data_t *
21 const lxb_char_t *name, size_t length);
22
23
29
32{
33 return lexbor_dobject_free(dobj, token);
34}
35
38{
40 if (attr == NULL) {
41 return NULL;
42 }
43
44 if (token->attr_last == NULL) {
45 token->attr_first = attr;
46 token->attr_last = attr;
47
48 return attr;
49 }
50
51 token->attr_last->next = attr;
52 attr->prev = token->attr_last;
53
54 token->attr_last = attr;
55
56 return attr;
57}
58
59void
61{
62 if (token->attr_first == attr) {
63 token->attr_first = attr->next;
64 }
65
66 if (token->attr_last == attr) {
67 token->attr_last = attr->prev;
68 }
69
70 if (attr->next != NULL) {
71 attr->next->prev = attr->prev;
72 }
73
74 if (attr->prev != NULL) {
75 attr->prev->next = attr->next;
76 }
77
78 attr->next = NULL;
79 attr->prev = NULL;
80}
81
82void
89
92 lexbor_mraw_t *mraw)
93{
94 size_t len = token->text_end - token->text_start;
95
96 (void) lexbor_str_init(str, mraw, len);
97 if (str->data == NULL) {
99 }
100
101 memcpy(str->data, token->text_start, len);
102
103 str->data[len] = 0x00;
104 str->length = len;
105
106 return LXB_STATUS_OK;
107}
108
111 lexbor_mraw_t *mraw)
112{
113 lxb_char_t *p, c;
114 const lxb_char_t *data = token->text_start;
115 const lxb_char_t *end = token->text_end;
116
117 size_t len = (end - data) - token->null_count;
118
119 (void) lexbor_str_init(str, mraw, len);
120 if (str->data == NULL) {
122 }
123
124 p = str->data;
125
126 while (data < end) {
127 c = *data++;
128
129 if (c != 0x00) {
130 *p++ = c;
131 }
132 }
133
134 str->data[len] = 0x00;
135 str->length = len;
136
137 return LXB_STATUS_OK;
138}
139
142 lexbor_str_t *str, lexbor_mraw_t *mraw)
143{
144 lxb_char_t *p, c;
145 const lxb_char_t *data = token->text_start;
146 const lxb_char_t *end = token->text_end;
147
148 static const unsigned rep_len = sizeof(lexbor_str_res_ansi_replacement_character) - 1;
149
150 size_t len = (end - data) + (token->null_count * rep_len) - token->null_count;
151
152 (void) lexbor_str_init(str, mraw, len);
153 if (str->data == NULL) {
155 }
156
157 p = str->data;
158
159 while (data < end) {
160 c = *data++;
161
162 if (c == 0x00) {
163 memcpy(p, lexbor_str_res_ansi_replacement_character, rep_len);
164 p += rep_len;
165
166 continue;
167 }
168
169 *p++ = c;
170 }
171
172 str->data[len] = 0x00;
173 str->length = len;
174
175 return LXB_STATUS_OK;
176}
177
180{
181 const lxb_char_t *data = token->text_start;
182 const lxb_char_t *end = token->text_end;
183
184 while (data < end) {
185 switch (*data) {
186 /*
187 * U+0009 CHARACTER TABULATION (tab)
188 * U+000A LINE FEED (LF)
189 * U+000C FORM FEED (FF)
190 * U+0020 SPACE
191 */
192 case 0x09:
193 case 0x0A:
194 case 0x0D:
195 case 0x20:
196 break;
197
198 default:
199 token->begin += data - token->text_start;
200 token->text_start = data;
201
202 return LXB_STATUS_OK;
203 }
204
205 data++;
206 }
207
208 token->begin += data - token->text_start;
209 token->text_start = data;
210
211 return LXB_STATUS_OK;
212}
213
216{
217 const lxb_char_t *data = token->text_start;
218 const lxb_char_t *end = token->text_end;
219
220 if (data < end) {
221 /* U+000A LINE FEED (LF) */
222 if (*data == 0x0A) {
223 token->begin++;
224 token->text_start++;
225 }
226 }
227
228 return LXB_STATUS_OK;
229}
230
233 lxb_html_token_t *ws_token)
234{
235 *ws_token = *token;
236
238 if (status != LXB_STATUS_OK) {
239 return status;
240 }
241
242 if (token->text_start == token->text_end) {
243 return LXB_STATUS_OK;
244 }
245
246 if (token->text_start == ws_token->text_start) {
247 memset(ws_token, 0, sizeof(lxb_html_token_t));
248
249 return LXB_STATUS_OK;
250 }
251
252 ws_token->end = token->begin;
253 ws_token->text_end = token->text_start;
254
255 return LXB_STATUS_OK;
256}
257
260 lxb_dom_document_type_t *doc_type)
261{
263 lexbor_mraw_t *mraw = doc_type->node.owner_document->mraw;
264
265 /* Set all to empty string if attr not exist */
266 if (token->attr_first == NULL) {
267 goto set_name_pub_sys_empty;
268 }
269
270 /* Name */
271 attr = token->attr_first;
272
273 doc_type->name = attr->name->attr_id;
274
275 /* PUBLIC or SYSTEM */
276 attr = attr->next;
277 if (attr == NULL) {
278 goto set_pub_sys_empty;
279 }
280
281 if (attr->name->attr_id == LXB_DOM_ATTR_PUBLIC) {
282 (void) lexbor_str_init(&doc_type->public_id, mraw, attr->value_size);
283 if (doc_type->public_id.data == NULL) {
285 }
286
287 if (attr->value_begin == NULL) {
288 return LXB_STATUS_OK;
289 }
290
291 (void) lexbor_str_append(&doc_type->public_id, mraw, attr->value,
292 attr->value_size);
293 }
294 else if (attr->name->attr_id == LXB_DOM_ATTR_SYSTEM) {
295 (void) lexbor_str_init(&doc_type->system_id, mraw, attr->value_size);
296 if (doc_type->system_id.data == NULL) {
298 }
299
300 if (attr->value_begin == NULL) {
301 return LXB_STATUS_OK;
302 }
303
304 (void) lexbor_str_append(&doc_type->system_id, mraw, attr->value,
305 attr->value_size);
306
307 return LXB_STATUS_OK;
308 }
309 else {
310 goto set_pub_sys_empty;
311 }
312
313 /* SUSTEM */
314 attr = attr->next;
315 if (attr == NULL) {
316 goto set_sys_empty;
317 }
318
319 (void) lexbor_str_init(&doc_type->system_id, mraw, attr->value_size);
320 if (doc_type->system_id.data == NULL) {
322 }
323
324 (void) lexbor_str_append(&doc_type->system_id, mraw, attr->value,
325 attr->value_size);
326
327 return LXB_STATUS_OK;
328
329set_name_pub_sys_empty:
330
331 doc_type->name = LXB_DOM_ATTR__UNDEF;
332
333set_pub_sys_empty:
334
335 (void) lexbor_str_init(&doc_type->public_id, mraw, 0);
336 if (doc_type->public_id.data == NULL) {
338 }
339
340set_sys_empty:
341
342 (void) lexbor_str_init(&doc_type->system_id, mraw, 0);
343 if (doc_type->system_id.data == NULL) {
345 }
346
347 return LXB_HTML_STATUS_OK;
348}
349
352 const lxb_char_t *name, size_t name_len)
353{
356
358 if (data == NULL) {
359 return NULL;
360 }
361
362 while (attr != NULL) {
363 if (attr->name->attr_id == data->attr_id) {
364 return attr;
365 }
366
367 attr = attr->next;
368 }
369
370 return NULL;
371}
372
373/*
374 * No inline functions for ABI.
375 */
376void
381
size_t len
Definition apprentice.c:174
@ LXB_DOM_ATTR__UNDEF
Definition attr_const.h:21
@ LXB_DOM_ATTR_SYSTEM
Definition attr_const.h:53
@ LXB_DOM_ATTR_PUBLIC
Definition attr_const.h:44
@ LXB_STATUS_ERROR_MEMORY_ALLOCATION
Definition base.h:51
@ LXB_STATUS_OK
Definition base.h:49
DNS_STATUS status
Definition dns_win32.c:49
void * lexbor_dobject_calloc(lexbor_dobject_t *dobject)
Definition dobject.c:123
void * lexbor_dobject_free(lexbor_dobject_t *dobject, void *data)
Definition dobject.c:135
struct lxb_dom_document_type lxb_dom_document_type_t
Definition interface.h:42
memcpy(ptr1, ptr2, size)
memset(ptr, 0, type->size)
new_type attr
Definition ffi.c:4364
#define NULL
Definition gdcache.h:45
struct lexbor_hash lexbor_hash_t
Definition hash.h:41
hash(string $algo, string $data, bool $binary=false, array $options=[])
Definition hash.stub.php:12
struct lxb_html_tokenizer lxb_html_tokenizer_t
Definition base.h:26
@ LXB_HTML_STATUS_OK
Definition base.h:34
void lxb_html_token_attr_remove(lxb_html_token_t *token, lxb_html_token_attr_t *attr)
Definition token.c:60
lxb_html_token_attr_t * lxb_html_token_find_attr(lxb_html_tokenizer_t *tkz, lxb_html_token_t *token, const lxb_char_t *name, size_t name_len)
Definition token.c:351
lxb_status_t lxb_html_token_data_skip_ws_begin(lxb_html_token_t *token)
Definition token.c:179
lxb_status_t lxb_html_token_make_text_drop_null(lxb_html_token_t *token, lexbor_str_t *str, lexbor_mraw_t *mraw)
Definition token.c:110
lxb_html_token_t * lxb_html_token_destroy(lxb_html_token_t *token, lexbor_dobject_t *dobj)
Definition token.c:31
void lxb_html_token_attr_delete(lxb_html_token_t *token, lxb_html_token_attr_t *attr, lexbor_dobject_t *dobj)
Definition token.c:83
lxb_status_t lxb_html_token_data_split_ws_begin(lxb_html_token_t *token, lxb_html_token_t *ws_token)
Definition token.c:232
lxb_html_token_t * lxb_html_token_create_eof_noi(lexbor_dobject_t *dobj)
Definition token.c:383
lxb_html_token_t * lxb_html_token_create(lexbor_dobject_t *dobj)
Definition token.c:25
lxb_status_t lxb_html_token_make_text(lxb_html_token_t *token, lexbor_str_t *str, lexbor_mraw_t *mraw)
Definition token.c:91
lxb_html_token_attr_t * lxb_html_token_attr_append(lxb_html_token_t *token, lexbor_dobject_t *dobj)
Definition token.c:37
lxb_status_t lxb_html_token_doctype_parse(lxb_html_token_t *token, lxb_dom_document_type_t *doc_type)
Definition token.c:259
void lxb_html_token_clean_noi(lxb_html_token_t *token)
Definition token.c:377
lxb_status_t lxb_html_token_make_text_replace_null(lxb_html_token_t *token, lexbor_str_t *str, lexbor_mraw_t *mraw)
Definition token.c:141
const lxb_tag_data_t * lxb_tag_append_lower(lexbor_hash_t *hash, const lxb_char_t *name, size_t length)
Definition tag.c:41
lxb_status_t lxb_html_token_data_skip_one_newline_begin(lxb_html_token_t *token)
Definition token.c:215
lxb_inline lxb_html_token_t * lxb_html_token_create_eof(lexbor_dobject_t *dobj)
Definition token.h:112
lxb_inline void lxb_html_token_clean(lxb_html_token_t *token)
Definition token.h:106
const lxb_dom_attr_data_t * lxb_dom_attr_data_by_local_name(lexbor_hash_t *hash, const lxb_char_t *name, size_t length)
Definition attr.c:425
unsigned const char * end
Definition php_ffi.h:51
zend_constant * data
p
Definition session.c:1105
lxb_char_t * lexbor_str_append(lexbor_str_t *str, lexbor_mraw_t *mraw, const lxb_char_t *buff, size_t length)
Definition str.c:131
lxb_char_t * lexbor_str_init(lexbor_str_t *str, lexbor_mraw_t *mraw, size_t size)
Definition str.c:22
lxb_char_t * data
Definition str.h:47
size_t length
Definition str.h:48
lxb_dom_node_t node
lxb_dom_attr_id_t name
lexbor_mraw_t * mraw
Definition document.h:53
lxb_dom_document_t * owner_document
Definition node.h:50
lxb_html_token_attr_t * next
Definition token_attr.h:42
const lxb_char_t * end
Definition token.h:35
size_t null_count
Definition token.h:47
const lxb_char_t * text_end
Definition token.h:40
const lxb_char_t * text_start
Definition token.h:39
lxb_html_token_attr_t * attr_first
Definition token.h:42
lxb_html_token_attr_t * attr_last
Definition token.h:43
const lxb_char_t * begin
Definition token.h:34
lexbor_hash_t * attrs
Definition tokenizer.h:42
lxb_html_token_attr_t * lxb_html_token_attr_destroy(lxb_html_token_attr_t *attr, lexbor_dobject_t *dobj)
Definition token_attr.c:23
lxb_html_token_attr_t * lxb_html_token_attr_create(lexbor_dobject_t *dobj)
Definition token_attr.c:11
struct lxb_html_token_attr lxb_html_token_attr_t
Definition token_attr.h:22
unsigned int lxb_status_t
Definition types.h:28
unsigned char lxb_char_t
Definition types.h:27
ZEND_API void(ZEND_FASTCALL *zend_touch_vm_stack_data)(void *vm_stack_data)
zend_string * name