php-internal-docs 8.4.8
Unofficial docs for php/php-src
Loading...
Searching...
No Matches
tokenizer.c
Go to the documentation of this file.
1/*
2 * Copyright (C) 2018-2020 Alexander Borisov
3 *
4 * Author: Alexander Borisov <borisov@lexbor.com>
5 */
6
11
12#include "lexbor/core/array.h"
13
14#define LEXBOR_STR_RES_MAP_LOWERCASE
15#include "lexbor/core/str_res.h"
16
17
18static const lxb_char_t lxb_css_syntax_tokenizer_important[] = "important";
19
21lxb_css_syntax_tokenizer_cache_create(void);
22
23static lxb_status_t
24lxb_css_syntax_tokenizer_cache_init(lxb_css_syntax_tokenizer_cache_t *cache,
25 size_t size);
26
27static void
28lxb_css_syntax_tokenizer_cache_clean(lxb_css_syntax_tokenizer_cache_t *cache);
29
31lxb_css_syntax_tokenizer_cache_destroy(lxb_css_syntax_tokenizer_cache_t *cache);
32
36
37static lxb_status_t
38lxb_css_syntax_tokenizer_blank(lxb_css_syntax_tokenizer_t *tkz,
39 const lxb_char_t **data, const lxb_char_t **end,
40 void *ctx);
41
42static bool
43lxb_css_syntax_tokenizer_lookup_important_ch(lxb_css_syntax_tokenizer_t *tkz,
44 const lxb_char_t *p,
45 const lxb_char_t *end,
46 const lxb_char_t stop_ch,
48 bool skip_first);
49
50static bool
51lxb_css_syntax_tokenizer_lookup_important_end(lxb_css_syntax_tokenizer_t *tkz,
52 const lxb_char_t *p,
53 const lxb_char_t *end,
54 const lxb_char_t stop_ch,
56 bool skip_first);
57
58static bool
59lxb_css_syntax_tokenizer_lookup_important_tokens(lxb_css_syntax_tokenizer_t *tkz,
61 bool skip_first);
62
63
69
72{
74 static const unsigned tmp_size = 1024;
75
76 if (tkz == NULL) {
78 }
79
80 /* Tokens. */
81
85 if (status != LXB_STATUS_OK) {
86 return status;
87 }
88
89 /* Cache for Tokens. */
90
91 tkz->cache = lxb_css_syntax_tokenizer_cache_create();
92 status = lxb_css_syntax_tokenizer_cache_init(tkz->cache, 128);
93 if (status != LXB_STATUS_OK) {
94 return status;
95 }
96
97 /* Memory for text. */
98
99 tkz->mraw = lexbor_mraw_create();
100 status = lexbor_mraw_init(tkz->mraw, 4096);
101 if (status != LXB_STATUS_OK) {
102 return status;
103 }
104
105 /* Temp */
106 tkz->start = lexbor_malloc(tmp_size);
107 if (tkz->start == NULL) {
109 }
110
111 tkz->pos = tkz->start;
112 tkz->end = tkz->start + tmp_size;
113
114 /* Parse errors */
118 if (status != LXB_STATUS_OK) {
119 return status;
120 }
121
122 tkz->offset = 0;
123 tkz->cache_pos = 0;
124 tkz->prepared = 0;
125
126 tkz->eof = false;
127 tkz->with_comment = false;
128 tkz->status = LXB_STATUS_OK;
130 tkz->chunk_cb = lxb_css_syntax_tokenizer_blank;
131
132 return LXB_STATUS_OK;
133}
134
137{
140 lxb_css_syntax_tokenizer_cache_clean(tkz->cache);
142
143 tkz->offset = 0;
144 tkz->cache_pos = 0;
145 tkz->prepared = 0;
146
147 tkz->eof = false;
148 tkz->status = LXB_STATUS_OK;
149 tkz->in_begin = NULL;
150 tkz->in_end = NULL;
151 tkz->pos = tkz->start;
152
153 return LXB_STATUS_OK;
154}
155
158{
159 if (tkz == NULL) {
160 return NULL;
161 }
162
163 if (tkz->tokens != NULL) {
164 tkz->tokens = lexbor_dobject_destroy(tkz->tokens, true);
165 tkz->cache = lxb_css_syntax_tokenizer_cache_destroy(tkz->cache);
166 }
167
168 tkz->mraw = lexbor_mraw_destroy(tkz->mraw, true);
170
171 if (tkz->start != NULL) {
172 tkz->start = lexbor_free(tkz->start);
173 }
174
175 return lexbor_free(tkz);
176}
177
179lxb_css_syntax_tokenizer_cache_create(void)
180{
182}
183
184static lxb_status_t
185lxb_css_syntax_tokenizer_cache_init(lxb_css_syntax_tokenizer_cache_t *cache,
186 size_t size)
187{
188 cache->length = 0;
189 cache->size = size;
190
191 cache->list = lexbor_malloc(sizeof(lxb_css_syntax_token_t *) * size);
192 if (cache->list == NULL) {
194 }
195
196 return LXB_STATUS_OK;
197}
198
199static void
200lxb_css_syntax_tokenizer_cache_clean(lxb_css_syntax_tokenizer_cache_t *cache)
201{
202 if (cache != NULL) {
203 cache->length = 0;
204 }
205}
206
208lxb_css_syntax_tokenizer_cache_destroy(lxb_css_syntax_tokenizer_cache_t *cache)
209{
210 if (cache == NULL) {
211 return NULL;
212 }
213
214 if (cache->list) {
215 lexbor_free(cache->list);
216 }
217
218 return lexbor_free(cache);
219}
220
222lxb_css_syntax_tokenizer_cache_expand(lxb_css_syntax_tokenizer_cache_t *cache,
223 size_t up_to)
224{
225 size_t new_size;
227
228 if (cache == NULL) {
229 return NULL;
230 }
231
232 new_size = cache->length + up_to;
233 list = lexbor_realloc(cache->list,
234 sizeof(lxb_css_syntax_token_t *) * new_size);
235 if (list == NULL) {
236 return NULL;
237 }
238
239 cache->list = list;
240 cache->size = new_size;
241
242 return list;
243}
244
248{
249 if (cache->length >= cache->size) {
250 if ((lxb_css_syntax_tokenizer_cache_expand(cache, 128) == NULL)) {
252 }
253 }
254
255 cache->list[ cache->length ] = value;
256 cache->length++;
257
258 return LXB_STATUS_OK;
259}
260
261
262static lxb_status_t
263lxb_css_syntax_tokenizer_blank(lxb_css_syntax_tokenizer_t *tkz,
264 const lxb_char_t **data, const lxb_char_t **end,
265 void *ctx)
266{
267 return LXB_STATUS_OK;
268}
269
272 const lxb_char_t *data, size_t size)
273{
274 return tkz->status;
275}
276
279{
282 const lxb_char_t *begin, *end;
283
284 begin = tkz->in_begin;
285 end = tkz->in_end;
286
287 if (tkz->prepared != 0) {
288 if (tkz->cache_pos < tkz->prepared) {
289 token = tkz->cache->list[tkz->prepared - 1];
290
292 if (status != LXB_STATUS_OK) {
293 return NULL;
294 }
295 }
296
297 token = tkz->cache->list[tkz->prepared];
298 token->offset = tkz->offset;
299
300 tkz->prepared += 1;
301
302 if (tkz->prepared >= tkz->cache->length) {
303 tkz->prepared = 0;
304 }
305
306 if (lxb_css_syntax_token_base(token)->length != 0) {
307 tkz->offset += lxb_css_syntax_token_base(token)->length;
308 token->cloned = false;
309 return token;
310 }
311
312 if (begin >= end) {
314 if (status != LXB_STATUS_OK) {
315 return NULL;
316 }
317
318 if (begin >= end) {
319 lxb_css_syntax_token_base(token)->length = 1;
320 goto done;
321 }
322 }
323
324 if (lxb_css_syntax_token_delim(token)->character == '-') {
326 }
327 else {
329 }
330
331 goto done;
332 }
333
334 if (tkz->cache_pos < tkz->cache->length) {
335 token = tkz->cache->list[tkz->cache->length - 1];
336
338 if (status != LXB_STATUS_OK) {
339 return NULL;
340 }
341 }
342
344 if (token == NULL) {
345 return NULL;
346 }
347
348 token->offset = tkz->offset;
349
350again:
351
352 if (begin >= end) {
354 if (status != LXB_STATUS_OK) {
355 return NULL;
356 }
357
358 if (begin >= end) {
360
361 lxb_css_syntax_token_base(token)->begin = begin;
362 lxb_css_syntax_token_base(token)->length = 0;
363
364 token->cloned = false;
365
366 return token;
367 }
368 }
369
370 begin = lxb_css_syntax_state_res_map[*begin](tkz, token, begin, end);
371
372done:
373
374 token->cloned = false;
375
376 if (begin == NULL) {
377 return NULL;
378 }
379
380 tkz->in_begin = begin;
381 tkz->offset += lxb_css_syntax_token_base(token)->length;
382
383 if (token->type == LXB_CSS_SYNTAX_TOKEN_COMMENT && !tkz->with_comment) {
384 end = tkz->in_end;
385 goto again;
386 }
387
388 return token;
389}
390
393 const lxb_char_t **data, const lxb_char_t **end)
394{
395 const lxb_char_t *begin;
396
397 if (tkz->eof == false) {
398 begin = *data;
399
400 tkz->status = tkz->chunk_cb(tkz, data, end, tkz->chunk_ctx);
401 if (tkz->status != LXB_STATUS_OK) {
402 return tkz->status;
403 }
404
405 if (*data >= *end) {
406 *data = begin;
407 *end = begin;
408
409 tkz->in_begin = begin;
410 tkz->in_end = begin;
411
412 tkz->eof = true;
413 }
414 else {
415 tkz->in_begin = *data;
416 tkz->in_end = *end;
417 }
418 }
419
420 return LXB_STATUS_OK;
421}
422
423bool
425{
426 const lxb_char_t *p, *end;
428
429 if (tkz->cache_pos + 1 < tkz->cache->length) {
430 token = tkz->cache->list[tkz->cache_pos + 1];
431
433 if (tkz->cache_pos + 2 < tkz->cache->length) {
434 token = tkz->cache->list[tkz->cache_pos + 2];
435
436 return token->type == LXB_CSS_SYNTAX_TOKEN_COLON;
437 }
438 }
439 else if (token->type == LXB_CSS_SYNTAX_TOKEN_COLON) {
440 return true;
441 }
442
443 return false;
444 }
445
446 p = tkz->in_begin;
447 end = tkz->in_end;
448
449 do {
450 if (p >= end) {
451 token = lxb_css_syntax_token_next(tkz);
452 if (token == NULL) {
453 return false;
454 }
455
457 token = lxb_css_syntax_token_next(tkz);
458 if (token == NULL) {
459 return false;
460 }
461 }
462
463 return token->type == LXB_CSS_SYNTAX_TOKEN_COLON;
464 }
465
466 switch (*p) {
467 case 0x3A:
468 return true;
469
470 case 0x0D:
471 case 0x0C:
472 case 0x09:
473 case 0x20:
474 case 0x0A:
475 p++;
476 break;
477
478 default:
479 return false;
480 }
481 }
482 while (true);
483}
484
485bool
488 const lxb_char_t stop_ch)
489{
490 const lxb_char_t *p, *end;
492
493 static const size_t length = sizeof(lxb_css_syntax_tokenizer_important) - 1;
494
495 p = tkz->in_begin;
496 end = tkz->in_end;
497
498 if (tkz->cache_pos + 1 < tkz->cache->length) {
499 token = tkz->cache->list[tkz->cache_pos + 1];
500
501 if (token->type == LXB_CSS_SYNTAX_TOKEN_IDENT) {
502 return false;
503 }
504
505 if (!(lxb_css_syntax_token_ident(token)->length == length
507 lxb_css_syntax_tokenizer_important,
508 length)))
509 {
510 return false;
511 }
512
513 if (tkz->cache_pos + 2 < tkz->cache->length) {
514 token = tkz->cache->list[tkz->cache_pos + 2];
515
517 if (tkz->cache_pos + 3 >= tkz->cache->length) {
518 return lxb_css_syntax_tokenizer_lookup_important_end(tkz,
519 p, end, stop_ch, stop, false);
520 }
521
522 token = tkz->cache->list[tkz->cache_pos + 3];
523 }
524
525 return (token->type == LXB_CSS_SYNTAX_TOKEN_SEMICOLON
526 || token->type == stop
527 || token->type == LXB_CSS_SYNTAX_TOKEN__EOF);
528 }
529
530 return lxb_css_syntax_tokenizer_lookup_important_end(tkz, p, end,
531 stop_ch, stop, false);
532 }
533
534 return lxb_css_syntax_tokenizer_lookup_important_ch(tkz, p, end, stop_ch,
535 stop, false);
536}
537
538static bool
539lxb_css_syntax_tokenizer_lookup_important_ch(lxb_css_syntax_tokenizer_t *tkz,
540 const lxb_char_t *p,
541 const lxb_char_t *end,
542 const lxb_char_t stop_ch,
544 bool skip_first)
545{
546 const lxb_char_t *imp;
547
548 imp = lxb_css_syntax_tokenizer_important;
549
550 do {
551 if (p >= end) {
552 return lxb_css_syntax_tokenizer_lookup_important_tokens(tkz, stop,
553 skip_first);
554 }
555
556 if (lexbor_str_res_map_lowercase[*p++] != *imp++) {
557 return false;
558 }
559 }
560 while (*imp != 0x00);
561
562 return lxb_css_syntax_tokenizer_lookup_important_end(tkz, p, end, stop_ch,
563 stop, skip_first);
564}
565
566static bool
567lxb_css_syntax_tokenizer_lookup_important_end(lxb_css_syntax_tokenizer_t *tkz,
568 const lxb_char_t *p,
569 const lxb_char_t *end,
570 const lxb_char_t stop_ch,
572 bool skip_first)
573{
574 do {
575 if (p >= end) {
576 return lxb_css_syntax_tokenizer_lookup_important_tokens(tkz, stop,
577 skip_first);
578 }
579
580 switch (*p) {
581 case 0x3B:
582 return true;
583
584 case 0x0D:
585 case 0x0C:
586 case 0x09:
587 case 0x20:
588 case 0x0A:
589 p++;
590 break;
591
592 default:
593 return (stop_ch != 0x00 && stop_ch == *p);
594 }
595 }
596 while (true);
597}
598
599static bool
600lxb_css_syntax_tokenizer_lookup_important_tokens(lxb_css_syntax_tokenizer_t *tkz,
602 bool skip_first)
603{
605
606 static const size_t length = sizeof(lxb_css_syntax_tokenizer_important) - 1;
607
608 if (skip_first) {
610 if (next == NULL) {
611 return false;
612 }
613 }
614
616 if (next == NULL) {
617 return false;
618 }
619
620 if (next->type != LXB_CSS_SYNTAX_TOKEN_IDENT) {
621 return false;
622 }
623
624 if (!(lxb_css_syntax_token_ident(next)->length == length
626 lxb_css_syntax_tokenizer_important,
627 length)))
628 {
629 return false;
630 }
631
633 if (next == NULL) {
634 return false;
635 }
636
639 if (next == NULL) {
640 return false;
641 }
642 }
643
644 return (next->type == LXB_CSS_SYNTAX_TOKEN_SEMICOLON
645 || next->type == stop || next->type == LXB_CSS_SYNTAX_TOKEN__EOF);
646}
647
648bool
651 const lxb_char_t stop_ch)
652{
654 const lxb_char_t *p, *end;
655
656 if (tkz->cache_pos + 1 < tkz->cache->length) {
657 token = tkz->cache->list[tkz->cache_pos + 1];
658
659 switch (token->type) {
661 if (lxb_css_syntax_token_delim(token)->character != '!') {
663 stop_ch);
664 }
665
666 return false;
667
669 return true;
670
671 default:
672 return token->type == stop_ch ||
674 }
675 }
676
677 p = tkz->in_begin;
678 end = tkz->in_end;
679
680 do {
681 if (p >= end) {
682 return lxb_css_syntax_tokenizer_lookup_important_tokens(tkz, stop,
683 true);
684 }
685
686 switch (*p) {
687 case 0x3B:
688 return true;
689
690 case 0x21:
691 p++;
692 return lxb_css_syntax_tokenizer_lookup_important_ch(tkz, p, end,
693 stop_ch, stop, true);
694
695 default:
696 return (stop_ch != 0x00 && stop_ch == *p);
697 }
698 }
699 while (true);
700}
701
702/*
703 * No inline functions for ABI.
704 */
void lexbor_array_obj_clean(lexbor_array_obj_t *array)
Definition array_obj.c:42
lexbor_array_obj_t * lexbor_array_obj_create(void)
Definition array_obj.c:11
lxb_status_t lexbor_array_obj_init(lexbor_array_obj_t *array, size_t size, size_t struct_size)
Definition array_obj.c:17
lexbor_array_obj_t * lexbor_array_obj_destroy(lexbor_array_obj_t *array, bool self_destroy)
Definition array_obj.c:50
@ LXB_STATUS_ERROR_MEMORY_ALLOCATION
Definition base.h:51
@ LXB_STATUS_ERROR_OBJECT_IS_NULL
Definition base.h:52
@ LXB_STATUS_OK
Definition base.h:49
struct lxb_css_syntax_token lxb_css_syntax_token_t
Definition base.h:46
struct lxb_css_syntax_tokenizer lxb_css_syntax_tokenizer_t
Definition base.h:45
const lxb_char_t * lxb_css_syntax_state_minus_process(lxb_css_syntax_tokenizer_t *tkz, lxb_css_syntax_token_t *token, const lxb_char_t *data, const lxb_char_t *end)
Definition state.c:902
const lxb_char_t * lxb_css_syntax_state_plus_process(lxb_css_syntax_tokenizer_t *tkz, lxb_css_syntax_token_t *token, const lxb_char_t *data, const lxb_char_t *end)
Definition state.c:798
lxb_status_t lxb_css_syntax_token_string_make(lxb_css_syntax_tokenizer_t *tkz, lxb_css_syntax_token_t *token)
Definition token.c:128
lxb_css_syntax_token_t * lxb_css_syntax_token_cached_create(lxb_css_syntax_tokenizer_t *tkz)
Definition token.c:165
lxb_css_syntax_token_t * lxb_css_syntax_token_next(lxb_css_syntax_tokenizer_t *tkz)
Definition token.c:57
#define lxb_css_syntax_token_ident(token)
Definition token.h:21
#define lxb_css_syntax_token_base(token)
Definition token.h:20
lxb_css_syntax_token_type_t
Definition token.h:68
@ LXB_CSS_SYNTAX_TOKEN__EOF
Definition token.h:101
@ LXB_CSS_SYNTAX_TOKEN_DELIM
Definition token.h:87
@ LXB_CSS_SYNTAX_TOKEN_COLON
Definition token.h:92
@ LXB_CSS_SYNTAX_TOKEN_IDENT
Definition token.h:72
@ LXB_CSS_SYNTAX_TOKEN_WHITESPACE
Definition token.h:81
@ LXB_CSS_SYNTAX_TOKEN_COMMENT
Definition token.h:80
@ LXB_CSS_SYNTAX_TOKEN_SEMICOLON
Definition token.h:93
#define lxb_css_syntax_token_delim(token)
Definition token.h:29
@ LXB_CSS_SYNTAX_TOKENIZER_OPT_UNDEF
Definition tokenizer.h:33
lxb_inline lxb_status_t lxb_css_syntax_tokenizer_status(lxb_css_syntax_tokenizer_t *tkz)
Definition tokenizer.h:110
#define LXB_API
Definition def.h:48
DNS_STATUS status
Definition dns_win32.c:49
lxb_status_t lexbor_dobject_init(lexbor_dobject_t *dobject, size_t chunk_size, size_t struct_size)
Definition dobject.c:22
void lexbor_dobject_clean(lexbor_dobject_t *dobject)
Definition dobject.c:64
lexbor_dobject_t * lexbor_dobject_destroy(lexbor_dobject_t *dobject, bool destroy_self)
Definition dobject.c:75
lexbor_dobject_t * lexbor_dobject_create(void)
Definition dobject.c:16
bool lxb_css_syntax_tokenizer_lookup_important(lxb_css_syntax_tokenizer_t *tkz, lxb_css_syntax_token_type_t stop, const lxb_char_t stop_ch)
Definition tokenizer.c:486
lxb_css_syntax_tokenizer_t * lxb_css_syntax_tokenizer_create(void)
Definition tokenizer.c:65
lxb_css_syntax_tokenizer_t * lxb_css_syntax_tokenizer_destroy(lxb_css_syntax_tokenizer_t *tkz)
Definition tokenizer.c:157
LXB_API lxb_status_t lxb_css_syntax_tokenizer_cache_push(lxb_css_syntax_tokenizer_cache_t *cache, lxb_css_syntax_token_t *value)
Definition tokenizer.c:246
lxb_status_t lxb_css_syntax_tokenizer_init(lxb_css_syntax_tokenizer_t *tkz)
Definition tokenizer.c:71
bool lxb_css_syntax_tokenizer_lookup_colon(lxb_css_syntax_tokenizer_t *tkz)
Definition tokenizer.c:424
lxb_status_t lxb_css_syntax_tokenizer_status_noi(lxb_css_syntax_tokenizer_t *tkz)
Definition tokenizer.c:706
lxb_css_syntax_token_t * lxb_css_syntax_tokenizer_token(lxb_css_syntax_tokenizer_t *tkz)
Definition tokenizer.c:278
lxb_status_t lxb_css_syntax_tokenizer_chunk(lxb_css_syntax_tokenizer_t *tkz, const lxb_char_t *data, size_t size)
Definition tokenizer.c:271
lxb_status_t lxb_css_syntax_tokenizer_next_chunk(lxb_css_syntax_tokenizer_t *tkz, const lxb_char_t **data, const lxb_char_t **end)
Definition tokenizer.c:392
lxb_status_t lxb_css_syntax_tokenizer_clean(lxb_css_syntax_tokenizer_t *tkz)
Definition tokenizer.c:136
bool lxb_css_syntax_tokenizer_lookup_declaration_ws_end(lxb_css_syntax_tokenizer_t *tkz, lxb_css_syntax_token_type_t stop, const lxb_char_t stop_ch)
Definition tokenizer.c:649
int begin
Definition eaw_table.h:20
new_type size
Definition ffi.c:4365
#define NULL
Definition gdcache.h:45
LXB_API void * lexbor_realloc(void *dst, size_t size)
Definition memory.c:21
LXB_API void * lexbor_free(void *dst)
Definition memory.c:33
LXB_API void * lexbor_malloc(size_t size)
Definition memory.c:15
LXB_API void * lexbor_calloc(size_t num, size_t size)
Definition memory.c:27
#define next(ls)
Definition minilua.c:2661
lexbor_mraw_t * lexbor_mraw_create(void)
Definition mraw.c:32
void lexbor_mraw_clean(lexbor_mraw_t *mraw)
Definition mraw.c:76
lxb_status_t lexbor_mraw_init(lexbor_mraw_t *mraw, size_t chunk_size)
Definition mraw.c:38
lexbor_mraw_t * lexbor_mraw_destroy(lexbor_mraw_t *mraw, bool destroy_self)
Definition mraw.c:87
unsigned const char * end
Definition php_ffi.h:51
zend_constant * data
p
Definition session.c:1105
bool lexbor_str_data_ncasecmp(const lxb_char_t *first, const lxb_char_t *sec, size_t size)
Definition str.c:435
lxb_css_syntax_token_type_t type
Definition token.h:192
uintptr_t offset
Definition token.h:193
lxb_css_syntax_token_t ** list
Definition tokenizer.h:37
const lxb_char_t * end
Definition tokenizer.h:65
lexbor_array_obj_t * parse_errors
Definition tokenizer.h:47
lexbor_mraw_t * mraw
Definition tokenizer.h:57
const lxb_char_t * in_begin
Definition tokenizer.h:49
lexbor_dobject_t * tokens
Definition tokenizer.h:45
lxb_css_syntax_tokenizer_cache_t * cache
Definition tokenizer.h:44
const lxb_char_t * in_end
Definition tokenizer.h:50
lxb_css_syntax_tokenizer_chunk_f chunk_cb
Definition tokenizer.h:59
unsigned int lxb_status_t
Definition types.h:28
unsigned char lxb_char_t
Definition types.h:27
value