php-internal-docs 8.4.8
Unofficial docs for php/php-src
Loading...
Searching...
No Matches
state_comment.c
Go to the documentation of this file.
1/*
2 * Copyright (C) 2018-2020 Alexander Borisov
3 *
4 * Author: Alexander Borisov <borisov@lexbor.com>
5 */
6
9
10#define LEXBOR_STR_RES_ANSI_REPLACEMENT_CHARACTER
11#include "lexbor/core/str_res.h"
12
13
14static const lxb_char_t *
15lxb_html_tokenizer_state_comment_start(lxb_html_tokenizer_t *tkz,
16 const lxb_char_t *data,
17 const lxb_char_t *end);
18
19static const lxb_char_t *
20lxb_html_tokenizer_state_comment_start_dash(lxb_html_tokenizer_t *tkz,
21 const lxb_char_t *data,
22 const lxb_char_t *end);
23
24static const lxb_char_t *
25lxb_html_tokenizer_state_comment(lxb_html_tokenizer_t *tkz,
26 const lxb_char_t *data,
27 const lxb_char_t *end);
28
29static const lxb_char_t *
30lxb_html_tokenizer_state_comment_less_than_sign(lxb_html_tokenizer_t *tkz,
31 const lxb_char_t *data,
32 const lxb_char_t *end);
33
34static const lxb_char_t *
35lxb_html_tokenizer_state_comment_less_than_sign_bang(lxb_html_tokenizer_t *tkz,
36 const lxb_char_t *data,
37 const lxb_char_t *end);
38
39static const lxb_char_t *
40lxb_html_tokenizer_state_comment_less_than_sign_bang_dash(
42 const lxb_char_t *data,
43 const lxb_char_t *end);
44
45static const lxb_char_t *
46lxb_html_tokenizer_state_comment_less_than_sign_bang_dash_dash(
48 const lxb_char_t *data,
49 const lxb_char_t *end);
50
51static const lxb_char_t *
52lxb_html_tokenizer_state_comment_end_dash(lxb_html_tokenizer_t *tkz,
53 const lxb_char_t *data,
54 const lxb_char_t *end);
55
56static const lxb_char_t *
57lxb_html_tokenizer_state_comment_end(lxb_html_tokenizer_t *tkz,
58 const lxb_char_t *data,
59 const lxb_char_t *end);
60
61static const lxb_char_t *
62lxb_html_tokenizer_state_comment_end_bang(lxb_html_tokenizer_t *tkz,
63 const lxb_char_t *data,
64 const lxb_char_t *end);
65
66
67/*
68 * Helper function. No in the specification. For 12.2.5.43
69 */
70const lxb_char_t *
72 const lxb_char_t *data,
73 const lxb_char_t *end)
74{
75 if (tkz->is_eof == false) {
78 }
79
81
82 return lxb_html_tokenizer_state_comment_start(tkz, data, end);
83}
84
85/*
86 * 12.2.5.43 Comment start state
87 */
88static const lxb_char_t *
89lxb_html_tokenizer_state_comment_start(lxb_html_tokenizer_t *tkz,
90 const lxb_char_t *data,
91 const lxb_char_t *end)
92{
93 /* U+002D HYPHEN-MINUS (-) */
94 if (*data == 0x2D) {
95 data++;
96 tkz->state = lxb_html_tokenizer_state_comment_start_dash;
97 }
98 /* U+003E GREATER-THAN SIGN (>) */
99 else if (*data == 0x3E) {
101
104
107
108 data++;
109 }
110 else {
111 tkz->state = lxb_html_tokenizer_state_comment;
112 }
113
114 return data;
115}
116
117/*
118 * 12.2.5.44 Comment start dash state
119 */
120static const lxb_char_t *
121lxb_html_tokenizer_state_comment_start_dash(lxb_html_tokenizer_t *tkz,
122 const lxb_char_t *data,
123 const lxb_char_t *end)
124{
125 /* U+002D HYPHEN-MINUS (-) */
126 if (*data == 0x2D) {
127 tkz->state = lxb_html_tokenizer_state_comment_end;
128
129 return (data + 1);
130 }
131 /* U+003E GREATER-THAN SIGN (>) */
132 else if (*data == 0x3E) {
134
137
140
141 return (data + 1);
142 }
143 /* EOF */
144 else if (*data == 0x00) {
145 if (tkz->is_eof) {
147
150
153
154 return end;
155 }
156 }
157
159
160 tkz->state = lxb_html_tokenizer_state_comment;
161
162 return data;
163}
164
165/*
166 * 12.2.5.45 Comment state
167 */
168static const lxb_char_t *
169lxb_html_tokenizer_state_comment(lxb_html_tokenizer_t *tkz,
170 const lxb_char_t *data,
171 const lxb_char_t *end)
172{
174
175 while (data != end) {
176 switch (*data) {
177 /* U+003C LESS-THAN SIGN (<) */
178 case 0x3C:
179 data++;
180
182
183 tkz->state = lxb_html_tokenizer_state_comment_less_than_sign;
184
185 return data;
186
187 /* U+002D HYPHEN-MINUS (-) */
188 case 0x2D:
191
192 tkz->state = lxb_html_tokenizer_state_comment_end_dash;
193
194 return (data + 1);
195
196 /* U+000D CARRIAGE RETURN (CR) */
197 case 0x0D:
198 if (++data >= end) {
200
202 tkz->state_return = lxb_html_tokenizer_state_comment;
203
204 return data;
205 }
206
208 tkz->pos[-1] = 0x0A;
209
211
212 if (*data != 0x0A) {
214 data--;
215 }
216
217 break;
218
219 /*
220 * EOF
221 * U+0000 NULL
222 */
223 case 0x00:
225
226 if (tkz->is_eof) {
227 if (tkz->token->begin != NULL) {
229 }
230
233
236
237 return end;
238 }
239
242
245 break;
246
247 default:
248 break;
249 }
250
251 data++;
252 }
253
255
256 return data;
257}
258
259/*
260 * 12.2.5.46 Comment less-than sign state
261 */
262static const lxb_char_t *
263lxb_html_tokenizer_state_comment_less_than_sign(lxb_html_tokenizer_t *tkz,
264 const lxb_char_t *data,
265 const lxb_char_t *end)
266{
267 /* U+0021 EXCLAMATION MARK (!) */
268 if (*data == 0x21) {
270
271 tkz->state = lxb_html_tokenizer_state_comment_less_than_sign_bang;
272
273 return (data + 1);
274 }
275 /* U+003C LESS-THAN SIGN (<) */
276 else if (*data == 0x3C) {
278
279 return (data + 1);
280 }
281
282 tkz->state = lxb_html_tokenizer_state_comment;
283
284 return data;
285}
286
287/*
288 * 12.2.5.47 Comment less-than sign bang state
289 */
290static const lxb_char_t *
291lxb_html_tokenizer_state_comment_less_than_sign_bang(lxb_html_tokenizer_t *tkz,
292 const lxb_char_t *data,
293 const lxb_char_t *end)
294{
295 /* U+002D HYPHEN-MINUS (-) */
296 if (*data == 0x2D) {
297 tkz->state = lxb_html_tokenizer_state_comment_less_than_sign_bang_dash;
298
299 return (data + 1);
300 }
301
302 tkz->state = lxb_html_tokenizer_state_comment;
303
304 return data;
305}
306
307/*
308 * 12.2.5.48 Comment less-than sign bang dash state
309 */
310static const lxb_char_t *
311lxb_html_tokenizer_state_comment_less_than_sign_bang_dash(
313 const lxb_char_t *data,
314 const lxb_char_t *end)
315{
316 /* U+002D HYPHEN-MINUS (-) */
317 if (*data == 0x2D) {
318 tkz->state =
319 lxb_html_tokenizer_state_comment_less_than_sign_bang_dash_dash;
320
321 return (data + 1);
322 }
323
324 tkz->state = lxb_html_tokenizer_state_comment_end_dash;
325
326 return data;
327}
328
329/*
330 * 12.2.5.49 Comment less-than sign bang dash dash state
331 */
332static const lxb_char_t *
333lxb_html_tokenizer_state_comment_less_than_sign_bang_dash_dash(
335 const lxb_char_t *data,
336 const lxb_char_t *end)
337{
338 /* U+003E GREATER-THAN SIGN (>) */
339 if (*data == 0x3E) {
340 tkz->state = lxb_html_tokenizer_state_comment_end;
341
342 return data;
343 }
344 /* EOF */
345 else if (*data == 0x00) {
346 if (tkz->is_eof) {
347 tkz->state = lxb_html_tokenizer_state_comment_end;
348
349 return data;
350 }
351 }
352
355
356 tkz->state = lxb_html_tokenizer_state_comment_end;
357
358 return data;
359}
360
361/*
362 * 12.2.5.50 Comment end dash state
363 */
364static const lxb_char_t *
365lxb_html_tokenizer_state_comment_end_dash(lxb_html_tokenizer_t *tkz,
366 const lxb_char_t *data,
367 const lxb_char_t *end)
368{
369 /* U+002D HYPHEN-MINUS (-) */
370 if (*data == 0x2D) {
371 tkz->state = lxb_html_tokenizer_state_comment_end;
372
373 return (data + 1);
374 }
375 /* EOF */
376 else if (*data == 0x00) {
377 if (tkz->is_eof) {
380
383
384 return end;
385 }
386 }
387
389
390 tkz->state = lxb_html_tokenizer_state_comment;
391
392 return data;
393}
394
395/*
396 * 12.2.5.51 Comment end state
397 */
398static const lxb_char_t *
399lxb_html_tokenizer_state_comment_end(lxb_html_tokenizer_t *tkz,
400 const lxb_char_t *data,
401 const lxb_char_t *end)
402{
403 /* U+003E GREATER-THAN SIGN (>) */
404 if (*data == 0x3E) {
405 /* Skip two '-' characters in comment tag end "-->"
406 * For <!----> or <!-----> ...
407 */
409
412
413 return (data + 1);
414 }
415 /* U+0021 EXCLAMATION MARK (!) */
416 else if (*data == 0x21) {
417 tkz->state = lxb_html_tokenizer_state_comment_end_bang;
418
419 return (data + 1);
420 }
421 /* U+002D HYPHEN-MINUS (-) */
422 else if (*data == 0x2D) {
424
425 return (data + 1);
426 }
427 /* EOF */
428 else if (*data == 0x00) {
429 if (tkz->is_eof) {
432
435
436 return end;
437 }
438 }
439
441
442 tkz->state = lxb_html_tokenizer_state_comment;
443
444 return data;
445}
446
447/*
448 * 12.2.5.52 Comment end bang state
449 */
450static const lxb_char_t *
451lxb_html_tokenizer_state_comment_end_bang(lxb_html_tokenizer_t *tkz,
452 const lxb_char_t *data,
453 const lxb_char_t *end)
454{
455 /* U+002D HYPHEN-MINUS (-) */
456 if (*data == 0x2D) {
457 tkz->state = lxb_html_tokenizer_state_comment_end_dash;
458
459 return (data + 1);
460 }
461 /* U+003E GREATER-THAN SIGN (>) */
462 else if (*data == 0x3E) {
464
467
470
471 return (data + 1);
472 }
473 /* EOF */
474 else if (*data == 0x00) {
475 if (tkz->is_eof) {
478
481
482 return end;
483 }
484 }
485
486 tkz->state = lxb_html_tokenizer_state_comment;
487
488 return data;
489}
#define NULL
Definition gdcache.h:45
struct lxb_html_tokenizer lxb_html_tokenizer_t
Definition base.h:26
lxb_html_tokenizer_error_t * lxb_html_tokenizer_error_add(lexbor_array_obj_t *parse_errors, const lxb_char_t *pos, lxb_html_tokenizer_error_id_t id)
Definition error.c:11
@ LXB_HTML_TOKENIZER_ERROR_INCLCO
Definition error.h:56
@ LXB_HTML_TOKENIZER_ERROR_NECO
Definition error.h:90
@ LXB_HTML_TOKENIZER_ERROR_ABCLOFEMCO
Definition error.h:22
@ LXB_HTML_TOKENIZER_ERROR_UNNUCH
Definition error.h:112
@ LXB_HTML_TOKENIZER_ERROR_EOINCO
Definition error.h:48
const lxb_char_t * lxb_html_tokenizer_state_data_before(lxb_html_tokenizer_t *tkz, const lxb_char_t *data, const lxb_char_t *end)
Definition state.c:204
const lxb_char_t * lxb_html_tokenizer_state_cr(lxb_html_tokenizer_t *tkz, const lxb_char_t *data, const lxb_char_t *end)
Definition state.c:1257
#define lxb_html_tokenizer_state_append_data_m(tkz, v_data)
Definition state.h:19
#define lxb_html_tokenizer_state_set_text(tkz)
Definition state.h:174
#define lxb_html_tokenizer_state_token_set_begin(tkz, v_begin)
Definition state.h:89
#define lxb_html_tokenizer_state_append_replace_m(tkz)
Definition state.h:37
#define lxb_html_tokenizer_state_token_done_m(tkz, v_end)
Definition state.h:157
#define lxb_html_tokenizer_state_token_set_end(tkz, v_end)
Definition state.h:98
#define lxb_html_tokenizer_state_token_set_end_oef(tkz)
Definition state.h:108
#define lxb_html_tokenizer_state_token_done_wo_check_m(tkz, v_end)
Definition state.h:167
#define lxb_html_tokenizer_state_begin_set(tkz, v_data)
Definition state.h:16
#define lxb_html_tokenizer_state_append_m(tkz, v_data, size)
Definition state.h:27
unsigned const char * end
Definition php_ffi.h:51
zend_constant * data
const lxb_char_t * lxb_html_tokenizer_state_comment_before_start(lxb_html_tokenizer_t *tkz, const lxb_char_t *data, const lxb_char_t *end)
const lxb_char_t * end
Definition token.h:35
lxb_tag_id_t tag_id
Definition token.h:48
const lxb_char_t * begin
Definition token.h:34
lxb_html_token_t * token
Definition tokenizer.h:49
lxb_char_t * pos
Definition tokenizer.h:72
lxb_html_tokenizer_state_f state_return
Definition tokenizer.h:36
lxb_html_tokenizer_state_f state
Definition tokenizer.h:35
lexbor_array_obj_t * parse_errors
Definition tokenizer.h:56
const lxb_char_t * last
Definition tokenizer.h:75
@ LXB_TAG__EM_COMMENT
Definition const.h:28
unsigned char lxb_char_t
Definition types.h:27