php-internal-docs 8.4.8
Unofficial docs for php/php-src
Loading...
Searching...
No Matches
state_rawtext.c
Go to the documentation of this file.
1/*
2 * Copyright (C) 2018-2020 Alexander Borisov
3 *
4 * Author: Alexander Borisov <borisov@lexbor.com>
5 */
6
9
10#define LEXBOR_STR_RES_ANSI_REPLACEMENT_CHARACTER
11#define LEXBOR_STR_RES_ALPHA_CHARACTER
12#include "lexbor/core/str_res.h"
13
14
15const lxb_tag_data_t *
17 const lxb_char_t *name, size_t length);
18
19
20static const lxb_char_t *
21lxb_html_tokenizer_state_rawtext(lxb_html_tokenizer_t *tkz,
22 const lxb_char_t *data,
23 const lxb_char_t *end);
24
25static const lxb_char_t *
26lxb_html_tokenizer_state_rawtext_less_than_sign(lxb_html_tokenizer_t *tkz,
27 const lxb_char_t *data,
28 const lxb_char_t *end);
29
30static const lxb_char_t *
31lxb_html_tokenizer_state_rawtext_end_tag_open(lxb_html_tokenizer_t *tkz,
32 const lxb_char_t *data,
33 const lxb_char_t *end);
34
35static const lxb_char_t *
36lxb_html_tokenizer_state_rawtext_end_tag_name(lxb_html_tokenizer_t *tkz,
37 const lxb_char_t *data,
38 const lxb_char_t *end);
39
40
41/*
42 * Helper function. No in the specification. For 12.2.5.3 RAWTEXT state
43 */
44const lxb_char_t *
46 const lxb_char_t *data,
47 const lxb_char_t *end)
48{
49 if (tkz->is_eof == false) {
51 }
52
53 tkz->state = lxb_html_tokenizer_state_rawtext;
54
55 return data;
56}
57
58/*
59 * 12.2.5.3 RAWTEXT state
60 */
61static const lxb_char_t *
62lxb_html_tokenizer_state_rawtext(lxb_html_tokenizer_t *tkz,
63 const lxb_char_t *data,
64 const lxb_char_t *end)
65{
67
68 while (data != end) {
69 switch (*data) {
70 /* U+003C LESS-THAN SIGN (<) */
71 case 0x3C:
74
75 tkz->state = lxb_html_tokenizer_state_rawtext_less_than_sign;
76
77 return (data + 1);
78
79 /* U+000D CARRIAGE RETURN (CR) */
80 case 0x0D:
81 if (++data >= end) {
83
85 tkz->state_return = lxb_html_tokenizer_state_rawtext;
86
87 return data;
88 }
89
91 tkz->pos[-1] = 0x0A;
92
94
95 if (*data != 0x0A) {
97 data--;
98 }
99
100 break;
101
102 /*
103 * U+0000 NULL
104 * EOF
105 */
106 case 0x00:
108
109 if (tkz->is_eof) {
110 if (tkz->token->begin != NULL) {
112 }
113
114 tkz->token->tag_id = LXB_TAG__TEXT;
115
118
119 return end;
120 }
121
124
127 break;
128
129 default:
130 break;
131 }
132
133 data++;
134 }
135
137
138 return data;
139}
140
141/*
142 * 12.2.5.12 RAWTEXT less-than sign state
143 */
144static const lxb_char_t *
145lxb_html_tokenizer_state_rawtext_less_than_sign(lxb_html_tokenizer_t *tkz,
146 const lxb_char_t *data,
147 const lxb_char_t *end)
148{
149 /* U+002F SOLIDUS (/) */
150 if (*data == 0x2F) {
151 tkz->state = lxb_html_tokenizer_state_rawtext_end_tag_open;
152
153 return (data + 1);
154 }
155
156 tkz->state = lxb_html_tokenizer_state_rawtext;
157
158 return data;
159}
160
161/*
162 * 12.2.5.13 RAWTEXT end tag open state
163 */
164static const lxb_char_t *
165lxb_html_tokenizer_state_rawtext_end_tag_open(lxb_html_tokenizer_t *tkz,
166 const lxb_char_t *data,
167 const lxb_char_t *end)
168{
169 if (lexbor_str_res_alpha_character[*data] != LEXBOR_STR_RES_SLIP) {
170 tkz->temp = data;
171 tkz->entity_start = (tkz->pos - 1) - tkz->start;
172
173 tkz->state = lxb_html_tokenizer_state_rawtext_end_tag_name;
174 }
175 else {
176 tkz->state = lxb_html_tokenizer_state_rawtext;
177 }
178
180
181 return data;
182}
183
184/*
185 * 12.2.5.14 RAWTEXT end tag name state
186 */
187static const lxb_char_t *
188lxb_html_tokenizer_state_rawtext_end_tag_name(lxb_html_tokenizer_t *tkz,
189 const lxb_char_t *data,
190 const lxb_char_t *end)
191{
193
194 while (data != end) {
195 switch (*data) {
196 /*
197 * U+0009 CHARACTER TABULATION (tab)
198 * U+000A LINE FEED (LF)
199 * U+000C FORM FEED (FF)
200 * U+000D CARRIAGE RETURN (CR)
201 * U+0020 SPACE
202 */
203 case 0x09:
204 case 0x0A:
205 case 0x0C:
206 case 0x0D:
207 case 0x20:
210 tkz->pos);
211
212 if (tkz->tmp_tag_id != tkz->token->tag_id) {
213 goto anything_else;
214 }
215
217 goto done;
218
219 /* U+002F SOLIDUS (/) */
220 case 0x2F:
223 tkz->pos);
224
225 if (tkz->tmp_tag_id != tkz->token->tag_id) {
226 goto anything_else;
227 }
228
230 goto done;
231
232 /* U+003E GREATER-THAN SIGN (>) */
233 case 0x3E:
236 tkz->pos);
237
238 if (tkz->tmp_tag_id != tkz->token->tag_id) {
239 goto anything_else;
240 }
241
243
244 /* Emit text token */
245 tkz->token->tag_id = LXB_TAG__TEXT;
246 tkz->pos = &tkz->start[tkz->entity_start];
247
250
251 /* Init close token */
252 tkz->token->tag_id = tkz->tmp_tag_id;
253 tkz->token->begin = tkz->temp;
254 tkz->token->end = data;
256
257 /* Emit close token */
259
260 return (data + 1);
261
262 default:
263 if (lexbor_str_res_alpha_character[*data]
265 {
267
268 goto anything_else;
269 }
270
271 break;
272 }
273
274 data++;
275 }
276
278
279 return data;
280
281anything_else:
282
283 tkz->state = lxb_html_tokenizer_state_rawtext;
284
285 return data;
286
287done:
288
289 /* Emit text token */
290 tkz->token->tag_id = LXB_TAG__TEXT;
291 tkz->pos = &tkz->start[tkz->entity_start];
292
295
296 /* Init close token */
297 tkz->token->tag_id = tkz->tmp_tag_id;
298 tkz->token->begin = tkz->temp;
299 tkz->token->end = data;
301
302 return (data + 1);
303}
#define NULL
Definition gdcache.h:45
struct lexbor_hash lexbor_hash_t
Definition hash.h:41
hash(string $algo, string $data, bool $binary=false, array $options=[])
Definition hash.stub.php:12
struct lxb_html_tokenizer lxb_html_tokenizer_t
Definition base.h:26
@ LXB_HTML_TOKEN_TYPE_CLOSE
Definition token.h:27
lxb_html_tokenizer_error_t * lxb_html_tokenizer_error_add(lexbor_array_obj_t *parse_errors, const lxb_char_t *pos, lxb_html_tokenizer_error_id_t id)
Definition error.c:11
@ LXB_HTML_TOKENIZER_ERROR_UNNUCH
Definition error.h:112
const lxb_char_t * lxb_html_tokenizer_state_before_attribute_name(lxb_html_tokenizer_t *tkz, const lxb_char_t *data, const lxb_char_t *end)
Definition state.c:617
const lxb_char_t * lxb_html_tokenizer_state_data_before(lxb_html_tokenizer_t *tkz, const lxb_char_t *data, const lxb_char_t *end)
Definition state.c:204
const lxb_char_t * lxb_html_tokenizer_state_cr(lxb_html_tokenizer_t *tkz, const lxb_char_t *data, const lxb_char_t *end)
Definition state.c:1257
const lxb_char_t * lxb_html_tokenizer_state_self_closing_start_tag(lxb_html_tokenizer_t *tkz, const lxb_char_t *data, const lxb_char_t *end)
Definition state.c:1275
#define lxb_html_tokenizer_state_append_data_m(tkz, v_data)
Definition state.h:19
#define lxb_html_tokenizer_state_set_text(tkz)
Definition state.h:174
#define lxb_html_tokenizer_state_token_set_begin(tkz, v_begin)
Definition state.h:89
#define lxb_html_tokenizer_state_append_replace_m(tkz)
Definition state.h:37
#define lxb_html_tokenizer_state_token_done_m(tkz, v_end)
Definition state.h:157
#define lxb_html_tokenizer_state_set_tag_m(tkz, _start, _end)
Definition state.h:48
#define lxb_html_tokenizer_state_token_set_end(tkz, v_end)
Definition state.h:98
#define lxb_html_tokenizer_state_token_set_end_oef(tkz)
Definition state.h:108
#define lxb_html_tokenizer_state_begin_set(tkz, v_data)
Definition state.h:16
#define lxb_html_tokenizer_state_append_m(tkz, v_data, size)
Definition state.h:27
unsigned const char * end
Definition php_ffi.h:51
zend_constant * data
const lxb_char_t * lxb_html_tokenizer_state_rawtext_before(lxb_html_tokenizer_t *tkz, const lxb_char_t *data, const lxb_char_t *end)
const lxb_tag_data_t * lxb_tag_append_lower(lexbor_hash_t *hash, const lxb_char_t *name, size_t length)
Definition tag.c:41
#define LEXBOR_STR_RES_SLIP
Definition str_res.h:14
const lxb_char_t * end
Definition token.h:35
lxb_html_token_type_t type
Definition token.h:49
lxb_tag_id_t tag_id
Definition token.h:48
const lxb_char_t * begin
Definition token.h:34
lxb_html_token_t * token
Definition tokenizer.h:49
uintptr_t entity_start
Definition tokenizer.h:82
lxb_char_t * pos
Definition tokenizer.h:72
lxb_html_tokenizer_state_f state_return
Definition tokenizer.h:36
const lxb_char_t * temp
Definition tokenizer.h:68
lxb_html_tokenizer_state_f state
Definition tokenizer.h:35
lexbor_array_obj_t * parse_errors
Definition tokenizer.h:56
lxb_tag_id_t tmp_tag_id
Definition tokenizer.h:69
lxb_char_t * start
Definition tokenizer.h:71
@ LXB_TAG__TEXT
Definition const.h:26
unsigned char lxb_char_t
Definition types.h:27
zend_string * name