php-internal-docs 8.4.8
Unofficial docs for php/php-src
Loading...
Searching...
No Matches
state_script.c
Go to the documentation of this file.
1/*
2 * Copyright (C) 2018-2020 Alexander Borisov
3 *
4 * Author: Alexander Borisov <borisov@lexbor.com>
5 */
6
9
10#define LEXBOR_STR_RES_ALPHA_CHARACTER
11#define LEXBOR_STR_RES_ANSI_REPLACEMENT_CHARACTER
12#include "lexbor/core/str_res.h"
13
14#include "lexbor/core/str_res.h"
15
16
17const lxb_tag_data_t *
19 const lxb_char_t *name, size_t length);
20
21
22static const lxb_char_t *
23lxb_html_tokenizer_state_script_data(lxb_html_tokenizer_t *tkz,
24 const lxb_char_t *data,
25 const lxb_char_t *end);
26
27static const lxb_char_t *
28lxb_html_tokenizer_state_script_data_less_than_sign(lxb_html_tokenizer_t *tkz,
29 const lxb_char_t *data,
30 const lxb_char_t *end);
31
32static const lxb_char_t *
33lxb_html_tokenizer_state_script_data_end_tag_open(lxb_html_tokenizer_t *tkz,
34 const lxb_char_t *data,
35 const lxb_char_t *end);
36
37static const lxb_char_t *
38lxb_html_tokenizer_state_script_data_end_tag_name(lxb_html_tokenizer_t *tkz,
39 const lxb_char_t *data,
40 const lxb_char_t *end);
41
42static const lxb_char_t *
43lxb_html_tokenizer_state_script_data_escape_start(lxb_html_tokenizer_t *tkz,
44 const lxb_char_t *data,
45 const lxb_char_t *end);
46
47static const lxb_char_t *
48lxb_html_tokenizer_state_script_data_escape_start_dash(
50 const lxb_char_t *data,
51 const lxb_char_t *end);
52
53static const lxb_char_t *
54lxb_html_tokenizer_state_script_data_escaped(lxb_html_tokenizer_t *tkz,
55 const lxb_char_t *data,
56 const lxb_char_t *end);
57
58static const lxb_char_t *
59lxb_html_tokenizer_state_script_data_escaped_dash(lxb_html_tokenizer_t *tkz,
60 const lxb_char_t *data,
61 const lxb_char_t *end);
62
63static const lxb_char_t *
64lxb_html_tokenizer_state_script_data_escaped_dash_dash(
66 const lxb_char_t *data,
67 const lxb_char_t *end);
68
69static const lxb_char_t *
70lxb_html_tokenizer_state_script_data_escaped_less_than_sign(
72 const lxb_char_t *data,
73 const lxb_char_t *end);
74
75static const lxb_char_t *
76lxb_html_tokenizer_state_script_data_escaped_end_tag_open(
78 const lxb_char_t *data,
79 const lxb_char_t *end);
80
81static const lxb_char_t *
82lxb_html_tokenizer_state_script_data_escaped_end_tag_name(
84 const lxb_char_t *data,
85 const lxb_char_t *end);
86
87static const lxb_char_t *
88lxb_html_tokenizer_state_script_data_double_escape_start(
90 const lxb_char_t *data,
91 const lxb_char_t *end);
92
93static const lxb_char_t *
94lxb_html_tokenizer_state_script_data_double_escaped(lxb_html_tokenizer_t *tkz,
95 const lxb_char_t *data,
96 const lxb_char_t *end);
97
98static const lxb_char_t *
99lxb_html_tokenizer_state_script_data_double_escaped_dash(
101 const lxb_char_t *data,
102 const lxb_char_t *end);
103
104static const lxb_char_t *
105lxb_html_tokenizer_state_script_data_double_escaped_dash_dash(
107 const lxb_char_t *data,
108 const lxb_char_t *end);
109
110static const lxb_char_t *
111lxb_html_tokenizer_state_script_data_double_escaped_less_than_sign(
113 const lxb_char_t *data,
114 const lxb_char_t *end);
115
116static const lxb_char_t *
117lxb_html_tokenizer_state_script_data_double_escaped_end_tag_open(
119 const lxb_char_t *data,
120 const lxb_char_t *end);
121
122static const lxb_char_t *
123lxb_html_tokenizer_state_script_data_double_escape_end(
125 const lxb_char_t *data,
126 const lxb_char_t *end);
127
128
129/*
130 * Helper function. No in the specification. For 12.2.5.4 Script data state
131 */
132const lxb_char_t *
134 const lxb_char_t *data,
135 const lxb_char_t *end)
136{
137 if (tkz->is_eof == false) {
139 }
140
141 tkz->state = lxb_html_tokenizer_state_script_data;
142
143 return data;
144}
145
146/*
147 * 12.2.5.4 Script data state
148 */
149static const lxb_char_t *
150lxb_html_tokenizer_state_script_data(lxb_html_tokenizer_t *tkz,
151 const lxb_char_t *data,
152 const lxb_char_t *end)
153{
155
156 while (data != end) {
157 switch (*data) {
158 /* U+003C LESS-THAN SIGN (<) */
159 case 0x3C:
162
163 tkz->state =
164 lxb_html_tokenizer_state_script_data_less_than_sign;
165
166 return (data + 1);
167
168 /* U+000D CARRIAGE RETURN (CR) */
169 case 0x0D:
170 if (++data >= end) {
172
174 tkz->state_return = lxb_html_tokenizer_state_script_data;
175
176 return data;
177 }
178
180 tkz->pos[-1] = 0x0A;
181
183
184 if (*data != 0x0A) {
186 data--;
187 }
188
189 break;
190
191 /*
192 * U+0000 NULL
193 * EOF
194 */
195 case 0x00:
197
198 if (tkz->is_eof) {
199 if (tkz->token->begin != NULL) {
201 }
202
203 tkz->token->tag_id = LXB_TAG__TEXT;
204
207
208 return end;
209 }
210
213
216 break;
217
218 default:
219 break;
220 }
221
222 data++;
223 }
224
226
227 return data;
228}
229
230/*
231 * 12.2.5.15 Script data less-than sign state
232 */
233static const lxb_char_t *
234lxb_html_tokenizer_state_script_data_less_than_sign(lxb_html_tokenizer_t *tkz,
235 const lxb_char_t *data,
236 const lxb_char_t *end)
237{
238 switch (*data) {
239 /* U+002F SOLIDUS (/) */
240 case 0x2F:
241 tkz->state = lxb_html_tokenizer_state_script_data_end_tag_open;
242
243 return (data + 1);
244
245 /* U+0021 EXCLAMATION MARK (!) */
246 case 0x21:
247 tkz->state = lxb_html_tokenizer_state_script_data_escape_start;
248
249 return (data + 1);
250
251 default:
252 tkz->state = lxb_html_tokenizer_state_script_data;
253
254 break;
255 }
256
257 return data;
258}
259
260/*
261 * 12.2.5.16 Script data end tag open state
262 */
263static const lxb_char_t *
264lxb_html_tokenizer_state_script_data_end_tag_open(lxb_html_tokenizer_t *tkz,
265 const lxb_char_t *data,
266 const lxb_char_t *end)
267{
268 if (lexbor_str_res_alpha_character[*data] != LEXBOR_STR_RES_SLIP) {
269 tkz->entity_start = (tkz->pos - 1) - tkz->start;
270 tkz->temp = data;
271
272 tkz->state = lxb_html_tokenizer_state_script_data_end_tag_name;
273 }
274 else {
275 tkz->state = lxb_html_tokenizer_state_script_data;
276 }
277
279
280 return data;
281}
282
283/*
284 * 12.2.5.17 Script data end tag name state
285 */
286static const lxb_char_t *
287lxb_html_tokenizer_state_script_data_end_tag_name(lxb_html_tokenizer_t *tkz,
288 const lxb_char_t *data,
289 const lxb_char_t *end)
290{
292
293 while (data != end) {
294 switch (*data) {
295 /*
296 * U+0009 CHARACTER TABULATION (tab)
297 * U+000A LINE FEED (LF)
298 * U+000C FORM FEED (FF)
299 * U+000D CARRIAGE RETURN (CR)
300 * U+0020 SPACE
301 */
302 case 0x09:
303 case 0x0A:
304 case 0x0C:
305 case 0x0D:
306 case 0x20:
309 tkz->pos);
310
311 if (tkz->tmp_tag_id != tkz->token->tag_id) {
312 goto anything_else;
313 }
314
316
317 goto done;
318
319 /* U+002F SOLIDUS (/) */
320 case 0x2F:
323 tkz->pos);
324
325 if (tkz->tmp_tag_id != tkz->token->tag_id) {
326 goto anything_else;
327 }
328
330
331 goto done;
332
333 /* U+003E GREATER-THAN SIGN (>) */
334 case 0x3E:
337 tkz->pos);
338
339 if (tkz->tmp_tag_id != tkz->token->tag_id) {
340 goto anything_else;
341 }
342
344
345 /* Emit text token */
346 tkz->token->tag_id = LXB_TAG__TEXT;
347 tkz->pos = &tkz->start[tkz->entity_start];
348
351
352 /* Init close token */
353 tkz->token->tag_id = tkz->tmp_tag_id;
354 tkz->token->begin = tkz->temp;
355 tkz->token->end = data;
357
358 /* Emit close token */
360
361 return (data + 1);
362
363 default:
364 if (lexbor_str_res_alpha_character[*data]
366 {
367 goto anything_else;
368 }
369
370 break;
371 }
372
373 data++;
374 }
375
377
378 return data;
379
380anything_else:
381
382 tkz->state = lxb_html_tokenizer_state_script_data;
383
384 return data;
385
386done:
387
388 /* Emit text token */
389 tkz->token->tag_id = LXB_TAG__TEXT;
390 tkz->pos = &tkz->start[tkz->entity_start];
391
394
395 /* Init close token */
396 tkz->token->tag_id = tkz->tmp_tag_id;
397 tkz->token->begin = tkz->temp;
398 tkz->token->end = data;
400
401 return (data + 1);
402}
403
404/*
405 * 12.2.5.18 Script data escape start state
406 */
407static const lxb_char_t *
408lxb_html_tokenizer_state_script_data_escape_start(lxb_html_tokenizer_t *tkz,
409 const lxb_char_t *data,
410 const lxb_char_t *end)
411{
412 /* U+002D HYPHEN-MINUS (-) */
413 if (*data == 0x2D) {
414 tkz->state = lxb_html_tokenizer_state_script_data_escape_start_dash;
415
416 return (data + 1);
417 }
418
420
421 tkz->state = lxb_html_tokenizer_state_script_data;
422
423 return data;
424}
425
426/*
427 * 12.2.5.19 Script data escape start dash state
428 */
429static const lxb_char_t *
430lxb_html_tokenizer_state_script_data_escape_start_dash(
432 const lxb_char_t *data,
433 const lxb_char_t *end)
434{
435 /* U+002D HYPHEN-MINUS (-) */
436 if (*data == 0x2D) {
438
439 tkz->state = lxb_html_tokenizer_state_script_data_escaped_dash_dash;
440
441 return (data + 1);
442 }
443
445
446 tkz->state = lxb_html_tokenizer_state_script_data;
447
448 return data;
449}
450
451/*
452 * 12.2.5.20 Script data escaped state
453 */
454static const lxb_char_t *
455lxb_html_tokenizer_state_script_data_escaped(lxb_html_tokenizer_t *tkz,
456 const lxb_char_t *data,
457 const lxb_char_t *end)
458{
460
461 while (data != end) {
462 switch (*data) {
463 /* U+002D HYPHEN-MINUS (-) */
464 case 0x2D:
466
467 tkz->state = lxb_html_tokenizer_state_script_data_escaped_dash;
468
469 return (data + 1);
470
471 /* U+003C LESS-THAN SIGN (<) */
472 case 0x3C:
475
476 tkz->state =
477 lxb_html_tokenizer_state_script_data_escaped_less_than_sign;
478
479 return (data + 1);
480
481 /* U+000D CARRIAGE RETURN (CR) */
482 case 0x0D:
483 if (++data >= end) {
485
487 tkz->state_return = lxb_html_tokenizer_state_script_data_escaped;
488
489 return data;
490 }
491
493 tkz->pos[-1] = 0x0A;
494
496
497 if (*data != 0x0A) {
499 data--;
500 }
501
502 break;
503
504 /*
505 * U+0000 NULL
506 * EOF
507 */
508 case 0x00:
510
511 if (tkz->is_eof) {
514
515 tkz->token->tag_id = LXB_TAG__TEXT;
516
520
521 return end;
522 }
523
526
529 break;
530
531 default:
532 break;
533 }
534
535 data++;
536 }
537
539
540 return data;
541}
542
543/*
544 * 12.2.5.21 Script data escaped dash state
545 */
546static const lxb_char_t *
547lxb_html_tokenizer_state_script_data_escaped_dash(lxb_html_tokenizer_t *tkz,
548 const lxb_char_t *data,
549 const lxb_char_t *end)
550{
551 switch (*data) {
552 /* U+002D HYPHEN-MINUS (-) */
553 case 0x2D:
555
556 tkz->state = lxb_html_tokenizer_state_script_data_escaped_dash_dash;
557
558 return (data + 1);
559
560 /* U+003C LESS-THAN SIGN (<) */
561 case 0x3C:
564
565 tkz->state =
566 lxb_html_tokenizer_state_script_data_escaped_less_than_sign;
567
568 return (data + 1);
569
570 /*
571 * U+0000 NULL
572 * EOF
573 */
574 case 0x00:
575 if (tkz->is_eof) {
578
579 tkz->token->tag_id = LXB_TAG__TEXT;
580
584
585 return end;
586 }
587
589
592
593 tkz->state = lxb_html_tokenizer_state_script_data_escaped;
594
595 return (data + 1);
596
597 default:
598 tkz->state = lxb_html_tokenizer_state_script_data_escaped;
599
600 return data;
601 }
602}
603
604/*
605 * 12.2.5.22 Script data escaped dash dash state
606 */
607static const lxb_char_t *
608lxb_html_tokenizer_state_script_data_escaped_dash_dash(lxb_html_tokenizer_t *tkz,
609 const lxb_char_t *data,
610 const lxb_char_t *end)
611{
612 switch (*data) {
613 /* U+002D HYPHEN-MINUS (-) */
614 case 0x2D:
616 return (data + 1);
617
618 /* U+003C LESS-THAN SIGN (<) */
619 case 0x3C:
622
623 tkz->state =
624 lxb_html_tokenizer_state_script_data_escaped_less_than_sign;
625
626 return (data + 1);
627
628 /* U+003E GREATER-THAN SIGN (>) */
629 case 0x3E:
630 tkz->state = lxb_html_tokenizer_state_script_data;
631 return data;
632
633 default:
634 tkz->state = lxb_html_tokenizer_state_script_data_escaped;
635 return data;
636 }
637}
638
639/*
640 * 12.2.5.23 Script data escaped less-than sign state
641 */
642static const lxb_char_t *
643lxb_html_tokenizer_state_script_data_escaped_less_than_sign(
645 const lxb_char_t *data,
646 const lxb_char_t *end)
647{
648 /* U+002F SOLIDUS (/) */
649 if (*data == 0x2F) {
650 tkz->state = lxb_html_tokenizer_state_script_data_escaped_end_tag_open;
651
652 return (data + 1);
653 }
654
655 /* ASCII alpha */
656 if (lexbor_str_res_alpha_character[*data] != LEXBOR_STR_RES_SLIP) {
657 tkz->entity_start = tkz->pos - tkz->start;
658
659 tkz->state = lxb_html_tokenizer_state_script_data_double_escape_start;
660
661 return data;
662 }
663
664 tkz->state = lxb_html_tokenizer_state_script_data_escaped;
665
666 return data;
667}
668
669/*
670 * 12.2.5.24 Script data escaped end tag open state
671 */
672static const lxb_char_t *
673lxb_html_tokenizer_state_script_data_escaped_end_tag_open(lxb_html_tokenizer_t *tkz,
674 const lxb_char_t *data,
675 const lxb_char_t *end)
676{
677 if (lexbor_str_res_alpha_character[*data] != LEXBOR_STR_RES_SLIP) {
678 tkz->temp = data;
679 tkz->entity_start = (tkz->pos - 1) - tkz->start;
680
681 tkz->state = lxb_html_tokenizer_state_script_data_escaped_end_tag_name;
682 }
683 else {
684 tkz->state = lxb_html_tokenizer_state_script_data_escaped;
685 }
686
688
689 return data;
690}
691
692/*
693 * 12.2.5.25 Script data escaped end tag name state
694 */
695static const lxb_char_t *
696lxb_html_tokenizer_state_script_data_escaped_end_tag_name(
698 const lxb_char_t *data,
699 const lxb_char_t *end)
700{
702
703 while (data != end) {
704 switch (*data) {
705 /*
706 * U+0009 CHARACTER TABULATION (tab)
707 * U+000A LINE FEED (LF)
708 * U+000C FORM FEED (FF)
709 * U+000D CARRIAGE RETURN (CR)
710 * U+0020 SPACE
711 */
712 case 0x09:
713 case 0x0A:
714 case 0x0C:
715 case 0x0D:
716 case 0x20:
719 tkz->pos);
720
721 if (tkz->tmp_tag_id != tkz->token->tag_id) {
722 goto anything_else;
723 }
724
726
727 goto done;
728
729 /* U+002F SOLIDUS (/) */
730 case 0x2F:
733 tkz->pos);
734
735 if (tkz->tmp_tag_id != tkz->token->tag_id) {
736 goto anything_else;
737 }
738
740
741 goto done;
742
743 /* U+003E GREATER-THAN SIGN (>) */
744 case 0x3E:
747 tkz->pos);
748
749 if (tkz->tmp_tag_id != tkz->token->tag_id) {
750 goto anything_else;
751 }
752
754
755 /* Emit text token */
756 tkz->token->tag_id = LXB_TAG__TEXT;
757 tkz->pos = &tkz->start[tkz->entity_start];
758
761
762 /* Init close token */
763 tkz->token->tag_id = tkz->tmp_tag_id;
764 tkz->token->begin = tkz->temp;
765 tkz->token->end = data;
767
768 /* Emit close token */
770
771 return (data + 1);
772
773 default:
774 if (lexbor_str_res_alpha_character[*data]
776 {
778 goto anything_else;
779 }
780
781 break;
782 }
783
784 data++;
785 }
786
788
789 return data;
790
791anything_else:
792
793 tkz->state = lxb_html_tokenizer_state_script_data_escaped;
794
795 return data;
796
797done:
798
799 /* Emit text token */
800 tkz->token->tag_id = LXB_TAG__TEXT;
801 tkz->pos = &tkz->start[tkz->entity_start];
802
805
806 /* Init close token */
807 tkz->token->tag_id = tkz->tmp_tag_id;
808 tkz->token->begin = tkz->temp;
809 tkz->token->end = data;
811
812 return (data + 1);
813}
814
815/*
816 * 12.2.5.26 Script data double escape start state
817 */
818static const lxb_char_t *
819lxb_html_tokenizer_state_script_data_double_escape_start(lxb_html_tokenizer_t *tkz,
820 const lxb_char_t *data,
821 const lxb_char_t *end)
822{
824
825 while (data != end) {
826 switch (*data) {
827 /*
828 * U+0009 CHARACTER TABULATION (tab)
829 * U+000A LINE FEED (LF)
830 * U+000C FORM FEED (FF)
831 * U+000D CARRIAGE RETURN (CR)
832 * U+0020 SPACE
833 * U+002F SOLIDUS (/)
834 * U+003E GREATER-THAN SIGN (>)
835 */
836 case 0x09:
837 case 0x0A:
838 case 0x0C:
839 case 0x0D:
840 case 0x20:
841 case 0x2F:
842 case 0x3E:
844
845 if ((tkz->pos - &tkz->start[tkz->entity_start]) == 6
847 (const lxb_char_t *) "script", 6))
848 {
849 tkz->state =
850 lxb_html_tokenizer_state_script_data_double_escaped;
851
852 return data;
853 }
854
855 tkz->state = lxb_html_tokenizer_state_script_data_escaped;
856
857 return data;
858
859 default:
860 if (lexbor_str_res_alpha_character[*data]
862 {
864
865 tkz->state = lxb_html_tokenizer_state_script_data_escaped;
866
867 return data;
868 }
869
870 break;
871 }
872
873 data++;
874 }
875
877
878 return data;
879}
880
881/*
882 * 12.2.5.27 Script data double escaped state
883 */
884static const lxb_char_t *
885lxb_html_tokenizer_state_script_data_double_escaped(lxb_html_tokenizer_t *tkz,
886 const lxb_char_t *data,
887 const lxb_char_t *end)
888{
890
891 while (data != end) {
892 switch (*data) {
893 /* U+002D HYPHEN-MINUS (-) */
894 case 0x2D:
896
897 tkz->state =
898 lxb_html_tokenizer_state_script_data_double_escaped_dash;
899
900 return (data + 1);
901
902 /* U+003C LESS-THAN SIGN (<) */
903 case 0x3C:
905
906 tkz->state =
907 lxb_html_tokenizer_state_script_data_double_escaped_less_than_sign;
908
909 return (data + 1);
910
911 /* U+000D CARRIAGE RETURN (CR) */
912 case 0x0D:
913 if (++data >= end) {
915
917 tkz->state_return = lxb_html_tokenizer_state_script_data_double_escaped;
918
919 return data;
920 }
921
923 tkz->pos[-1] = 0x0A;
924
926
927 if (*data != 0x0A) {
929 data--;
930 }
931
932 break;
933
934 /*
935 * U+0000 NULL
936 * EOF
937 */
938 case 0x00:
940
941 if (tkz->is_eof) {
944
945 tkz->token->tag_id = LXB_TAG__TEXT;
946
950
951 return end;
952 }
953
956
959 break;
960
961 default:
962 break;
963 }
964
965 data++;
966 }
967
969
970 return data;
971}
972
973/*
974 * 12.2.5.28 Script data double escaped dash state
975 */
976static const lxb_char_t *
977lxb_html_tokenizer_state_script_data_double_escaped_dash(lxb_html_tokenizer_t *tkz,
978 const lxb_char_t *data,
979 const lxb_char_t *end)
980{
981 switch (*data) {
982 /* U+002D HYPHEN-MINUS (-) */
983 case 0x2D:
985
986 tkz->state =
987 lxb_html_tokenizer_state_script_data_double_escaped_dash_dash;
988
989 return (data + 1);
990
991 /* U+003C LESS-THAN SIGN (<) */
992 case 0x3C:
994
995 tkz->state =
996 lxb_html_tokenizer_state_script_data_double_escaped_less_than_sign;
997
998 return (data + 1);
999
1000 /*
1001 * U+0000 NULL
1002 * EOF
1003 */
1004 case 0x00:
1005 if (tkz->is_eof) {
1008
1009 tkz->token->tag_id = LXB_TAG__TEXT;
1010
1014
1015 return end;
1016 }
1017
1019
1022
1023 tkz->state = lxb_html_tokenizer_state_script_data_double_escaped;
1024
1025 return (data + 1);
1026
1027 default:
1028 tkz->state = lxb_html_tokenizer_state_script_data_double_escaped;
1029
1030 return data;
1031 }
1032}
1033
1034/*
1035 * 12.2.5.29 Script data double escaped dash dash state
1036 */
1037static const lxb_char_t *
1038lxb_html_tokenizer_state_script_data_double_escaped_dash_dash(
1040 const lxb_char_t *data,
1041 const lxb_char_t *end)
1042{
1043 switch (*data) {
1044 /* U+002D HYPHEN-MINUS (-) */
1045 case 0x2D:
1047 return (data + 1);
1048
1049 /* U+003C LESS-THAN SIGN (<) */
1050 case 0x3C:
1052
1053 tkz->state =
1054 lxb_html_tokenizer_state_script_data_double_escaped_less_than_sign;
1055
1056 return (data + 1);
1057
1058 /* U+003E GREATER-THAN SIGN (>) */
1059 case 0x3E:
1061
1062 tkz->state = lxb_html_tokenizer_state_script_data;
1063
1064 return (data + 1);
1065
1066 /*
1067 * U+0000 NULL
1068 * EOF
1069 */
1070 case 0x00:
1071 if (tkz->is_eof) {
1074
1075 tkz->token->tag_id = LXB_TAG__TEXT;
1076
1080
1081 return end;
1082 }
1083
1085
1088
1089 tkz->state = lxb_html_tokenizer_state_script_data_double_escaped;
1090
1091 return (data + 1);
1092
1093 default:
1094 tkz->state = lxb_html_tokenizer_state_script_data_double_escaped;
1095
1096 return data;
1097 }
1098
1099 return data;
1100}
1101
1102/*
1103 * 12.2.5.30 Script data double escaped less-than sign state
1104 */
1105static const lxb_char_t *
1106lxb_html_tokenizer_state_script_data_double_escaped_less_than_sign(
1108 const lxb_char_t *data,
1109 const lxb_char_t *end)
1110{
1111 /* U+002F SOLIDUS (/) */
1112 if (*data == 0x2F) {
1113 tkz->state =
1114 lxb_html_tokenizer_state_script_data_double_escaped_end_tag_open;
1115
1116 return (data + 1);
1117 }
1118
1119 tkz->state = lxb_html_tokenizer_state_script_data_double_escaped;
1120
1121 return data;
1122}
1123
1124/*
1125 * 12.2.5.30.5 Helper function. No in the specification.
1126 */
1127static const lxb_char_t *
1128lxb_html_tokenizer_state_script_data_double_escaped_end_tag_open(
1130 const lxb_char_t *data,
1131 const lxb_char_t *end)
1132{
1133 if (lexbor_str_res_alpha_character[*data] != LEXBOR_STR_RES_SLIP) {
1134 tkz->entity_start = (tkz->pos + 1) - tkz->start;
1135
1136 tkz->state = lxb_html_tokenizer_state_script_data_double_escape_end;
1137 }
1138 else {
1139 tkz->state = lxb_html_tokenizer_state_script_data_double_escaped;
1140 }
1141
1143
1144 return data;
1145}
1146
1147/*
1148 * 12.2.5.31 Script data double escape end state
1149 */
1150static const lxb_char_t *
1151lxb_html_tokenizer_state_script_data_double_escape_end(
1153 const lxb_char_t *data,
1154 const lxb_char_t *end)
1155{
1157
1158 while (data != end) {
1159 switch (*data) {
1160 /*
1161 * U+0009 CHARACTER TABULATION (tab)
1162 * U+000A LINE FEED (LF)
1163 * U+000C FORM FEED (FF)
1164 * U+000D CARRIAGE RETURN (CR)
1165 * U+0020 SPACE
1166 * U+002F SOLIDUS (/)
1167 * U+003E GREATER-THAN SIGN (>)
1168 */
1169 case 0x09:
1170 case 0x0A:
1171 case 0x0C:
1172 case 0x0D:
1173 case 0x20:
1174 case 0x2F:
1175 case 0x3E:
1177
1178 if ((tkz->pos - &tkz->start[tkz->entity_start]) == 6
1180 (const lxb_char_t *) "script", 6))
1181 {
1182 tkz->state = lxb_html_tokenizer_state_script_data_escaped;
1183 return data;
1184 }
1185
1186 tkz->state = lxb_html_tokenizer_state_script_data_double_escaped;
1187
1188 return data;
1189
1190 default:
1191 if (lexbor_str_res_alpha_character[*data]
1193 {
1195
1196 tkz->state = lxb_html_tokenizer_state_script_data_double_escaped;
1197 return data;
1198 }
1199
1200 break;
1201 }
1202
1203 data++;
1204 }
1205
1207
1208 return data;
1209}
#define NULL
Definition gdcache.h:45
struct lexbor_hash lexbor_hash_t
Definition hash.h:41
hash(string $algo, string $data, bool $binary=false, array $options=[])
Definition hash.stub.php:12
struct lxb_html_tokenizer lxb_html_tokenizer_t
Definition base.h:26
@ LXB_HTML_TOKEN_TYPE_CLOSE
Definition token.h:27
lxb_html_tokenizer_error_t * lxb_html_tokenizer_error_add(lexbor_array_obj_t *parse_errors, const lxb_char_t *pos, lxb_html_tokenizer_error_id_t id)
Definition error.c:11
@ LXB_HTML_TOKENIZER_ERROR_UNNUCH
Definition error.h:112
@ LXB_HTML_TOKENIZER_ERROR_EOINSCHTCOLITE
Definition error.h:52
const lxb_char_t * lxb_html_tokenizer_state_before_attribute_name(lxb_html_tokenizer_t *tkz, const lxb_char_t *data, const lxb_char_t *end)
Definition state.c:617
const lxb_char_t * lxb_html_tokenizer_state_data_before(lxb_html_tokenizer_t *tkz, const lxb_char_t *data, const lxb_char_t *end)
Definition state.c:204
const lxb_char_t * lxb_html_tokenizer_state_cr(lxb_html_tokenizer_t *tkz, const lxb_char_t *data, const lxb_char_t *end)
Definition state.c:1257
const lxb_char_t * lxb_html_tokenizer_state_self_closing_start_tag(lxb_html_tokenizer_t *tkz, const lxb_char_t *data, const lxb_char_t *end)
Definition state.c:1275
#define lxb_html_tokenizer_state_append_data_m(tkz, v_data)
Definition state.h:19
#define lxb_html_tokenizer_state_set_text(tkz)
Definition state.h:174
#define lxb_html_tokenizer_state_token_set_begin(tkz, v_begin)
Definition state.h:89
#define lxb_html_tokenizer_state_append_replace_m(tkz)
Definition state.h:37
#define lxb_html_tokenizer_state_token_done_m(tkz, v_end)
Definition state.h:157
#define lxb_html_tokenizer_state_set_tag_m(tkz, _start, _end)
Definition state.h:48
#define lxb_html_tokenizer_state_token_set_end(tkz, v_end)
Definition state.h:98
#define lxb_html_tokenizer_state_token_set_end_oef(tkz)
Definition state.h:108
#define lxb_html_tokenizer_state_begin_set(tkz, v_data)
Definition state.h:16
#define lxb_html_tokenizer_state_append_m(tkz, v_data, size)
Definition state.h:27
unsigned const char * end
Definition php_ffi.h:51
zend_constant * data
const lxb_char_t * lxb_html_tokenizer_state_script_data_before(lxb_html_tokenizer_t *tkz, const lxb_char_t *data, const lxb_char_t *end)
const lxb_tag_data_t * lxb_tag_append_lower(lexbor_hash_t *hash, const lxb_char_t *name, size_t length)
Definition tag.c:41
bool lexbor_str_data_ncasecmp(const lxb_char_t *first, const lxb_char_t *sec, size_t size)
Definition str.c:435
#define LEXBOR_STR_RES_SLIP
Definition str_res.h:14
const lxb_char_t * end
Definition token.h:35
lxb_html_token_type_t type
Definition token.h:49
lxb_tag_id_t tag_id
Definition token.h:48
const lxb_char_t * begin
Definition token.h:34
lxb_html_token_t * token
Definition tokenizer.h:49
uintptr_t entity_start
Definition tokenizer.h:82
lxb_char_t * pos
Definition tokenizer.h:72
lxb_html_tokenizer_state_f state_return
Definition tokenizer.h:36
const lxb_char_t * temp
Definition tokenizer.h:68
lxb_html_tokenizer_state_f state
Definition tokenizer.h:35
lexbor_array_obj_t * parse_errors
Definition tokenizer.h:56
lxb_tag_id_t tmp_tag_id
Definition tokenizer.h:69
lxb_char_t * start
Definition tokenizer.h:71
const lxb_char_t * last
Definition tokenizer.h:75
@ LXB_TAG__TEXT
Definition const.h:26
unsigned char lxb_char_t
Definition types.h:27
zend_string * name