php-internal-docs 8.4.8
Unofficial docs for php/php-src
Loading...
Searching...
No Matches
state_rcdata.c
Go to the documentation of this file.
1/*
2 * Copyright (C) 2018-2020 Alexander Borisov
3 *
4 * Author: Alexander Borisov <borisov@lexbor.com>
5 */
6
9
10#define LEXBOR_STR_RES_ANSI_REPLACEMENT_CHARACTER
11#define LEXBOR_STR_RES_ALPHA_CHARACTER
12#include "lexbor/core/str_res.h"
13
14
15const lxb_tag_data_t *
17 const lxb_char_t *name, size_t length);
18
19
20static const lxb_char_t *
21lxb_html_tokenizer_state_rcdata(lxb_html_tokenizer_t *tkz,
22 const lxb_char_t *data,
23 const lxb_char_t *end);
24
25static const lxb_char_t *
26lxb_html_tokenizer_state_rcdata_less_than_sign(lxb_html_tokenizer_t *tkz,
27 const lxb_char_t *data,
28 const lxb_char_t *end);
29
30static const lxb_char_t *
31lxb_html_tokenizer_state_rcdata_end_tag_open(lxb_html_tokenizer_t *tkz,
32 const lxb_char_t *data,
33 const lxb_char_t *end);
34
35static const lxb_char_t *
36lxb_html_tokenizer_state_rcdata_end_tag_name(lxb_html_tokenizer_t *tkz,
37 const lxb_char_t *data,
38 const lxb_char_t *end);
39
40
41/*
42 * Helper function. No in the specification. For 12.2.5.2 RCDATA state
43 */
44const lxb_char_t *
46 const lxb_char_t *data,
47 const lxb_char_t *end)
48{
49 if (tkz->is_eof == false) {
51 }
52
53 tkz->state = lxb_html_tokenizer_state_rcdata;
54
55 return data;
56}
57
58/*
59 * 12.2.5.2 RCDATA state
60 */
61static const lxb_char_t *
62lxb_html_tokenizer_state_rcdata(lxb_html_tokenizer_t *tkz,
63 const lxb_char_t *data,
64 const lxb_char_t *end)
65{
67
68 while (data != end) {
69 switch (*data) {
70 /* U+003C LESS-THAN SIGN (<) */
71 case 0x3C:
74
75 tkz->state = lxb_html_tokenizer_state_rcdata_less_than_sign;
76
77 return (data + 1);
78
79 /* U+0026 AMPERSAND (&) */
80 case 0x26:
82
84 tkz->state_return = lxb_html_tokenizer_state_rcdata;
85
86 return data + 1;
87
88 /* U+000D CARRIAGE RETURN (CR) */
89 case 0x0D:
90 if (++data >= end) {
92
94 tkz->state_return = lxb_html_tokenizer_state_rcdata;
95
96 return data;
97 }
98
100 tkz->pos[-1] = 0x0A;
101
103
104 if (*data != 0x0A) {
106 data--;
107 }
108
109 break;
110
111 /*
112 * U+0000 NULL
113 * EOF
114 */
115 case 0x00:
117
118 if (tkz->is_eof) {
119 if (tkz->token->begin != NULL) {
121 }
122
123 tkz->token->tag_id = LXB_TAG__TEXT;
124
127
128 return end;
129 }
130
133
136 break;
137
138 default:
139 break;
140 }
141
142 data++;
143 }
144
146
147 return data;
148}
149
150/*
151 * 12.2.5.9 RCDATA less-than sign state
152 */
153static const lxb_char_t *
154lxb_html_tokenizer_state_rcdata_less_than_sign(lxb_html_tokenizer_t *tkz,
155 const lxb_char_t *data,
156 const lxb_char_t *end)
157{
158 /* U+002F SOLIDUS (/) */
159 if (*data == 0x2F) {
160 tkz->state = lxb_html_tokenizer_state_rcdata_end_tag_open;
161
162 return (data + 1);
163 }
164
165 tkz->state = lxb_html_tokenizer_state_rcdata;
166
167 return data;
168}
169
170/*
171 * 12.2.5.10 RCDATA end tag open state
172 */
173static const lxb_char_t *
174lxb_html_tokenizer_state_rcdata_end_tag_open(lxb_html_tokenizer_t *tkz,
175 const lxb_char_t *data,
176 const lxb_char_t *end)
177{
178 if (lexbor_str_res_alpha_character[*data] != LEXBOR_STR_RES_SLIP) {
179 tkz->temp = data;
180 tkz->entity_start = (tkz->pos - 1) - tkz->start;
181
182 tkz->state = lxb_html_tokenizer_state_rcdata_end_tag_name;
183 }
184 else {
185 tkz->state = lxb_html_tokenizer_state_rcdata;
186 }
187
189
190 return data;
191}
192
193/*
194 * 12.2.5.11 RCDATA end tag name state
195 */
196static const lxb_char_t *
197lxb_html_tokenizer_state_rcdata_end_tag_name(lxb_html_tokenizer_t *tkz,
198 const lxb_char_t *data,
199 const lxb_char_t *end)
200{
202
203 while (data != end) {
204 switch (*data) {
205 /*
206 * U+0009 CHARACTER TABULATION (tab)
207 * U+000A LINE FEED (LF)
208 * U+000C FORM FEED (FF)
209 * U+000D CARRIAGE RETURN (CR)
210 * U+0020 SPACE
211 */
212 case 0x09:
213 case 0x0A:
214 case 0x0C:
215 case 0x0D:
216 case 0x20:
219 tkz->pos);
220
221 if (tkz->tmp_tag_id != tkz->token->tag_id) {
222 goto anything_else;
223 }
224
226 goto done;
227
228 /* U+002F SOLIDUS (/) */
229 case 0x2F:
232 tkz->pos);
233
234 if (tkz->tmp_tag_id != tkz->token->tag_id) {
235 goto anything_else;
236 }
237
239 goto done;
240
241 /* U+003E GREATER-THAN SIGN (>) */
242 case 0x3E:
245 tkz->pos);
246
247 if (tkz->tmp_tag_id != tkz->token->tag_id) {
248 goto anything_else;
249 }
250
252
253 /* Emit text token */
254 tkz->token->tag_id = LXB_TAG__TEXT;
255 tkz->pos = &tkz->start[tkz->entity_start];
256
259
260 /* Init close token */
261 tkz->token->tag_id = tkz->tmp_tag_id;
262 tkz->token->begin = tkz->temp;
263 tkz->token->end = data;
265
266 /* Emit close token */
268
269 return (data + 1);
270
271 default:
272 if (lexbor_str_res_alpha_character[*data]
274 {
276
277 goto anything_else;
278 }
279
280 break;
281 }
282
283 data++;
284 }
285
287
288 return data;
289
290anything_else:
291
292 tkz->state = lxb_html_tokenizer_state_rcdata;
293
294 return data;
295
296done:
297
298 /* Emit text token */
299 tkz->token->tag_id = LXB_TAG__TEXT;
300 tkz->pos = &tkz->start[tkz->entity_start];
301
304
305 /* Init close token */
306 tkz->token->tag_id = tkz->tmp_tag_id;
307 tkz->token->end = data;
309
310 return (data + 1);
311}
#define NULL
Definition gdcache.h:45
struct lexbor_hash lexbor_hash_t
Definition hash.h:41
hash(string $algo, string $data, bool $binary=false, array $options=[])
Definition hash.stub.php:12
struct lxb_html_tokenizer lxb_html_tokenizer_t
Definition base.h:26
@ LXB_HTML_TOKEN_TYPE_CLOSE
Definition token.h:27
lxb_html_tokenizer_error_t * lxb_html_tokenizer_error_add(lexbor_array_obj_t *parse_errors, const lxb_char_t *pos, lxb_html_tokenizer_error_id_t id)
Definition error.c:11
@ LXB_HTML_TOKENIZER_ERROR_UNNUCH
Definition error.h:112
const lxb_char_t * lxb_html_tokenizer_state_before_attribute_name(lxb_html_tokenizer_t *tkz, const lxb_char_t *data, const lxb_char_t *end)
Definition state.c:617
const lxb_char_t * lxb_html_tokenizer_state_char_ref(lxb_html_tokenizer_t *tkz, const lxb_char_t *data, const lxb_char_t *end)
Definition state.c:1745
const lxb_char_t * lxb_html_tokenizer_state_data_before(lxb_html_tokenizer_t *tkz, const lxb_char_t *data, const lxb_char_t *end)
Definition state.c:204
const lxb_char_t * lxb_html_tokenizer_state_cr(lxb_html_tokenizer_t *tkz, const lxb_char_t *data, const lxb_char_t *end)
Definition state.c:1257
const lxb_char_t * lxb_html_tokenizer_state_self_closing_start_tag(lxb_html_tokenizer_t *tkz, const lxb_char_t *data, const lxb_char_t *end)
Definition state.c:1275
#define lxb_html_tokenizer_state_append_data_m(tkz, v_data)
Definition state.h:19
#define lxb_html_tokenizer_state_set_text(tkz)
Definition state.h:174
#define lxb_html_tokenizer_state_token_set_begin(tkz, v_begin)
Definition state.h:89
#define lxb_html_tokenizer_state_append_replace_m(tkz)
Definition state.h:37
#define lxb_html_tokenizer_state_token_done_m(tkz, v_end)
Definition state.h:157
#define lxb_html_tokenizer_state_set_tag_m(tkz, _start, _end)
Definition state.h:48
#define lxb_html_tokenizer_state_token_set_end(tkz, v_end)
Definition state.h:98
#define lxb_html_tokenizer_state_token_set_end_oef(tkz)
Definition state.h:108
#define lxb_html_tokenizer_state_begin_set(tkz, v_data)
Definition state.h:16
#define lxb_html_tokenizer_state_append_m(tkz, v_data, size)
Definition state.h:27
unsigned const char * end
Definition php_ffi.h:51
zend_constant * data
const lxb_char_t * lxb_html_tokenizer_state_rcdata_before(lxb_html_tokenizer_t *tkz, const lxb_char_t *data, const lxb_char_t *end)
const lxb_tag_data_t * lxb_tag_append_lower(lexbor_hash_t *hash, const lxb_char_t *name, size_t length)
Definition tag.c:41
#define LEXBOR_STR_RES_SLIP
Definition str_res.h:14
const lxb_char_t * end
Definition token.h:35
lxb_html_token_type_t type
Definition token.h:49
lxb_tag_id_t tag_id
Definition token.h:48
const lxb_char_t * begin
Definition token.h:34
lxb_html_token_t * token
Definition tokenizer.h:49
uintptr_t entity_start
Definition tokenizer.h:82
lxb_char_t * pos
Definition tokenizer.h:72
lxb_html_tokenizer_state_f state_return
Definition tokenizer.h:36
const lxb_char_t * temp
Definition tokenizer.h:68
lxb_html_tokenizer_state_f state
Definition tokenizer.h:35
lexbor_array_obj_t * parse_errors
Definition tokenizer.h:56
lxb_tag_id_t tmp_tag_id
Definition tokenizer.h:69
lxb_char_t * start
Definition tokenizer.h:71
@ LXB_TAG__TEXT
Definition const.h:26
unsigned char lxb_char_t
Definition types.h:27
zend_string * name