php-internal-docs 8.4.8
Unofficial docs for php/php-src
Loading...
Searching...
No Matches
decode.c
Go to the documentation of this file.
1/*
2 * Copyright (C) 2019 Alexander Borisov
3 *
4 * Author: Alexander Borisov <borisov@lexbor.com>
5 */
6
11
12
13#define LXB_ENCODING_DECODE_UTF_8_BOUNDARY(_lower, _upper, _cont) \
14 { \
15 ch = *p; \
16 \
17 if (ch < _lower || ch > _upper) { \
18 ctx->u.utf_8.lower = 0x00; \
19 ctx->u.utf_8.need = 0; \
20 \
21 LXB_ENCODING_DECODE_ERROR_BEGIN { \
22 *data = p; \
23 ctx->have_error = true; \
24 } \
25 LXB_ENCODING_DECODE_ERROR_END(); \
26 \
27 _cont; \
28 } \
29 else { \
30 p++; \
31 need--; \
32 ctx->codepoint = (ctx->codepoint << 6) | (ch & 0x3F); \
33 } \
34 }
35
36#define LXB_ENCODING_DECODE_UTF_8_BOUNDARY_SET(first, two, f_lower, s_upper) \
37 do { \
38 if (ch == first) { \
39 ctx->u.utf_8.lower = f_lower; \
40 ctx->u.utf_8.upper = 0xBF; \
41 } \
42 else if (ch == two) { \
43 ctx->u.utf_8.lower = 0x80; \
44 ctx->u.utf_8.upper = s_upper; \
45 } \
46 } \
47 while (0)
48
49#define LXB_ENCODING_DECODE_APPEND_WO_CHECK(ctx, cp) \
50 do { \
51 (ctx)->buffer_out[(ctx)->buffer_used++] = (cp); \
52 } \
53 while (0)
54
55#define LXB_ENCODING_DECODE_APPEND(ctx, cp) \
56 do { \
57 if ((ctx)->buffer_used >= (ctx)->buffer_length) { \
58 return LXB_STATUS_SMALL_BUFFER; \
59 } \
60 \
61 (ctx)->buffer_out[(ctx)->buffer_used++] = (cp); \
62 } \
63 while (0)
64
65#define LXB_ENCODING_DECODE_APPEND_P(ctx, cp) \
66 do { \
67 if ((ctx)->buffer_used >= (ctx)->buffer_length) { \
68 *data = p; \
69 return LXB_STATUS_SMALL_BUFFER; \
70 } \
71 \
72 (ctx)->buffer_out[(ctx)->buffer_used++] = (cp); \
73 } \
74 while (0)
75
76#define LXB_ENCODING_DECODE_CHECK_OUT(ctx) \
77 do { \
78 if ((ctx)->buffer_used >= (ctx)->buffer_length) { \
79 return LXB_STATUS_SMALL_BUFFER; \
80 } \
81 } \
82 while (0)
83
84#define LXB_ENCODING_DECODE_ERROR_BEGIN \
85 do { \
86 if (ctx->replace_to == NULL) { \
87 return LXB_STATUS_ERROR; \
88 } \
89 \
90 if ((ctx->buffer_used + ctx->replace_len) > ctx->buffer_length) { \
91 do
92
93#define LXB_ENCODING_DECODE_ERROR_END() \
94 while (0); \
95 \
96 return LXB_STATUS_SMALL_BUFFER; \
97 } \
98 \
99 memcpy(&ctx->buffer_out[ctx->buffer_used], ctx->replace_to, \
100 sizeof(lxb_codepoint_t) * ctx->replace_len); \
101 \
102 ctx->buffer_used += ctx->replace_len; \
103 } \
104 while (0)
105
106#define LXB_ENCODING_DECODE_ERROR(ctx) \
107 do { \
108 LXB_ENCODING_DECODE_ERROR_BEGIN { \
109 } LXB_ENCODING_DECODE_ERROR_END(); \
110 } \
111 while (0)
112
113#define LXB_ENCODING_DECODE_FAILED(ident) \
114 do { \
115 if ((byte) < (0x80)) { \
116 (*data)--; \
117 } \
118 \
119 LXB_ENCODING_DECODE_ERROR_BEGIN { \
120 ctx->have_error = true; \
121 (ident) = 0x01; \
122 } \
123 LXB_ENCODING_DECODE_ERROR_END(); \
124 } \
125 while (0)
126
127#define LXB_ENCODING_DECODE_SINGLE(decode_map) \
128 do { \
129 const lxb_char_t *p = *data; \
130 \
131 while (p < end) { \
132 if (*p < 0x80) { \
133 LXB_ENCODING_DECODE_APPEND_P(ctx, *p++); \
134 } \
135 else { \
136 ctx->codepoint = decode_map[(*p++) - 0x80].codepoint; \
137 if (ctx->codepoint == LXB_ENCODING_ERROR_CODEPOINT) { \
138 LXB_ENCODING_DECODE_ERROR_BEGIN { \
139 *data = p - 1; \
140 } \
141 LXB_ENCODING_DECODE_ERROR_END(); \
142 continue; \
143 } \
144 \
145 LXB_ENCODING_DECODE_APPEND_P(ctx, ctx->codepoint); \
146 } \
147 \
148 *data = p; \
149 } \
150 } \
151 while (0)
152
153#define LXB_ENCODING_DECODE_UTF_8_BOUNDARY_SINGLE(lower, upper) \
154 do { \
155 ch = **data; \
156 \
157 if (ch < lower || ch > upper) { \
158 goto failed; \
159 } \
160 \
161 (*data)++; \
162 needed--; \
163 ctx->codepoint = (ctx->codepoint << 6) | (ch & 0x3F); \
164 } \
165 while (0)
166
167#define LXB_ENCODING_DECODE_UTF_8_BOUNDARY_SET_SINGLE(first, two, f_lower, \
168 s_upper) \
169 do { \
170 if (ch == first) { \
171 ctx->u.utf_8.lower = f_lower; \
172 ctx->u.utf_8.upper = 0xBF; \
173 } \
174 else if (ch == two) { \
175 ctx->u.utf_8.lower = 0x80; \
176 ctx->u.utf_8.upper = s_upper; \
177 } \
178 } \
179 while (0)
180
181
188
191 const lxb_char_t **data, const lxb_char_t *end)
192{
193 *data = end;
194 return LXB_STATUS_ERROR;
195}
196
204
207 const lxb_char_t **data, const lxb_char_t *end)
208{
209 uint32_t index;
210 lxb_char_t lead, byte;
211
212 ctx->status = LXB_STATUS_OK;
213
214 if (ctx->u.lead != 0x00) {
215 if (ctx->have_error) {
216 ctx->u.lead = 0x00;
217 ctx->have_error = false;
218
220 ctx->u.lead = 0x01;
221 ctx->have_error = true;
223 }
224 else if (ctx->second_codepoint != 0x0000) {
225 if ((ctx->buffer_used + 2) > ctx->buffer_length) {
227 }
228
231
232 ctx->u.lead = 0x00;
233 ctx->second_codepoint = 0x0000;
234 }
235 else {
236 if (*data >= end) {
238
239 return LXB_STATUS_CONTINUE;
240 }
241
243
244 lead = (lxb_char_t) ctx->u.lead;
245 ctx->u.lead = 0x00;
246
247 goto lead_state;
248 }
249 }
250
251 while (*data < end) {
253
254 lead = *(*data)++;
255
256 if (lead < 0x80) {
258 continue;
259 }
260
261 if ((unsigned) (lead - 0x81) > (0xFE - 0x81)) {
263 (*data)--;
264 }
266
267 continue;
268 }
269
270 if (*data >= end) {
271 ctx->u.lead = lead;
273
274 return LXB_STATUS_CONTINUE;
275 }
276
277 lead_state:
278
279 index = 0;
280 byte = *(*data)++;
281
282 if ((unsigned) (byte - 0x40) <= (0x7E - 0x40)
283 || (unsigned) (byte - 0xA1) <= (0xFE - 0xA1))
284 {
285 if (byte < 0x7F) {
286 /* Max index == (0xFE - 0x81) * 157 + (0x7E - 0x62) == 19653 */
287 index = (lead - 0x81) * 157 + (byte - 0x40);
288 }
289 else {
290 /* Max index == (0xFE - 0x81) * 157 + (0xFE - 0x62) == 19781 */
291 index = (lead - 0x81) * 157 + (byte - 0x62);
292 }
293 }
294
295 /*
296 * 1133 U+00CA U+0304 Ê̄ (LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND MACRON)
297 * 1135 U+00CA U+030C Ê̌ (LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND CARON)
298 * 1164 U+00EA U+0304 ê̄ (LATIN SMALL LETTER E WITH CIRCUMFLEX AND MACRON)
299 * 1166 U+00EA U+030C ê̌ (LATIN SMALL LETTER E WITH CIRCUMFLEX AND CARON)
300 */
301 switch (index) {
302 case 1133:
303 if ((ctx->buffer_used + 2) > ctx->buffer_length) {
304 ctx->u.lead = 0x00CA;
305 ctx->second_codepoint = 0x0304;
306
308 }
309
312
313 continue;
314
315 case 1135:
316 if ((ctx->buffer_used + 2) > ctx->buffer_length) {
317 ctx->u.lead = 0x00CA;
318 ctx->second_codepoint = 0x030C;
319
321 }
322
325
326 continue;
327
328 case 1164:
329 if ((ctx->buffer_used + 2) > ctx->buffer_length) {
330 ctx->u.lead = 0x00EA;
331 ctx->second_codepoint = 0x0304;
332
334 }
335
338
339 continue;
340
341 case 1166:
342 if ((ctx->buffer_used + 2) > ctx->buffer_length) {
343 ctx->u.lead = 0x00EA;
344 ctx->second_codepoint = 0x030C;
345
347 }
348
351
352 continue;
353
354 case 0:
356 continue;
357 }
358
359 ctx->codepoint = lxb_encoding_multi_index_big5[index].codepoint;
362 continue;
363 }
364
366 }
367
368 return LXB_STATUS_OK;
369}
370
373 const lxb_char_t **data, const lxb_char_t *end)
374{
375 bool is_jis0212;
376 lxb_char_t byte, lead;
377
378 ctx->status = LXB_STATUS_OK;
379
380 if (ctx->u.euc_jp.lead != 0x00) {
381 if (ctx->have_error) {
382 ctx->have_error = false;
383 ctx->u.euc_jp.lead = 0x00;
384
386 ctx->have_error = true;
387 ctx->u.euc_jp.lead = 0x01;
389 }
390 else {
391 if (*data >= end) {
393
394 return LXB_STATUS_CONTINUE;
395 }
396
398
399 lead = ctx->u.euc_jp.lead;
400 byte = *(*data)++;
401
402 ctx->u.euc_jp.lead = 0x00;
403
404 if (ctx->u.euc_jp.is_jis0212) {
405 is_jis0212 = true;
406 ctx->u.euc_jp.is_jis0212 = false;
407
408 goto lead_jis_state;
409 }
410
411 goto lead_state;
412 }
413 }
414
415 while (*data < end) {
417
418 lead = *(*data)++;
419
420 if (lead < 0x80) {
422 continue;
423 }
424
425 if ((unsigned) (lead - 0xA1) > (0xFE - 0xA1)
426 && (lead != 0x8E && lead != 0x8F))
427 {
429 (*data)--;
430 }
432
433 continue;
434 }
435
436 if (*data >= end) {
437 ctx->u.euc_jp.lead = lead;
439
440 return LXB_STATUS_CONTINUE;
441 }
442
443 byte = *(*data)++;
444
445 lead_state:
446
447 if (lead == 0x8E && (unsigned) (byte - 0xA1) <= (0xDF - 0xA1)) {
448 LXB_ENCODING_DECODE_APPEND_WO_CHECK(ctx, 0xFF61 - 0xA1 + byte);
449 continue;
450 }
451
452 is_jis0212 = false;
453
454 if (lead == 0x8F && (unsigned) (byte - 0xA1) <= (0xFE - 0xA1)) {
455 if (*data >= end) {
456 ctx->u.euc_jp.lead = byte;
457 ctx->u.euc_jp.is_jis0212 = true;
458
460
461 return LXB_STATUS_CONTINUE;
462 }
463
464 lead = byte;
465 byte = *(*data)++;
466 is_jis0212 = true;
467 }
468
469 lead_jis_state:
470
471 if ((unsigned) (lead - 0xA1) > (0xFE - 0xA1)
472 || (unsigned) (byte - 0xA1) > (0xFE - 0xA1))
473 {
475 continue;
476 }
477
478 /* Max index == (0xFE - 0xA1) * 94 + 0xFE - 0xA1 == 8835 */
479 ctx->codepoint = (lead - 0xA1) * 94 + byte - 0xA1;
480
481 if (is_jis0212) {
483 / sizeof(lxb_encoding_multi_index_t)) <= ctx->codepoint)
484 {
486 continue;
487 }
488
490 }
491 else {
493 / sizeof(lxb_encoding_multi_index_t)) <= ctx->codepoint)
494 {
496 continue;
497 }
498
500 }
501
504 continue;
505 }
506
508 }
509
510 return LXB_STATUS_OK;
511}
512
515 const lxb_char_t **data, const lxb_char_t *end)
516{
517 lxb_char_t lead, byte;
518
519 ctx->status = LXB_STATUS_OK;
520
521 if (ctx->u.lead != 0x00) {
522 if (ctx->have_error) {
523 ctx->have_error = false;
524 ctx->u.lead = 0x00;
525
527 ctx->have_error = true;
528 ctx->u.lead = 0x01;
530 }
531 else {
532 if (*data >= end) {
534
535 return LXB_STATUS_CONTINUE;
536 }
537
539
540 lead = (lxb_char_t) ctx->u.lead;
541 ctx->u.lead = 0x00;
542
543 goto lead_state;
544 }
545 }
546
547 while (*data < end) {
549
550 lead = *(*data)++;
551
552 if (lead < 0x80) {
554 continue;
555 }
556
557 if ((unsigned) (lead - 0x81) > (0xFE - 0x81)) {
559 (*data)--;
560 }
562
563 continue;
564 }
565
566 if (*data == end) {
567 ctx->u.lead = lead;
569
570 return LXB_STATUS_CONTINUE;
571 }
572
573 lead_state:
574
575 byte = *(*data)++;
576
577 if ((unsigned) (byte - 0x41) > (0xFE - 0x41)) {
579 continue;
580 }
581
582 /* Max index == (0xFE - 0x81) * 190 + (0xFE - 0x41) == 23939 */
583 ctx->codepoint = (lead - 0x81) * 190 + (byte - 0x41);
584
587 {
589 continue;
590 }
591
595 continue;
596 }
597
599 }
600
601 return LXB_STATUS_OK;
602}
603
610
619
622 const lxb_char_t **data, const lxb_char_t *end)
623{
624#define LXB_ENCODING_DECODE_ISO_2022_JP_OK() \
625 do { \
626 if (*data >= end) { \
627 return LXB_STATUS_OK; \
628 } \
629 } \
630 while (0)
631
632#define LXB_ENCODING_DECODE_ISO_2022_JP_CONTINUE() \
633 do { \
634 if (*data >= end) { \
635 ctx->status = LXB_STATUS_CONTINUE; \
636 return LXB_STATUS_CONTINUE; \
637 } \
638 } \
639 while (0)
640
641
642 lxb_char_t byte;
644
645 ctx->status = LXB_STATUS_OK;
646
647 if (ctx->have_error) {
648 ctx->have_error = false;
649
651 ctx->have_error = true;
652 }
654 }
655
656 if (iso->prepand != 0x00) {
657 if (*data >= end) {
659
660 return LXB_STATUS_CONTINUE;
661 }
662
664
665 byte = iso->prepand;
666 iso->prepand = 0x00;
667
668 goto prepand;
669 }
670
671 if (*data >= end) {
672 return LXB_STATUS_OK;
673 }
674
675 do {
677
678 byte = *(*data)++;
679
680 prepand:
681
682 switch (iso->state) {
684 if (byte == 0x1B) {
686
688 break;
689 }
690
691 /* 0x00 to 0x7F, excluding 0x0E, 0x0F, and 0x1B */
692 if ((unsigned) (byte - 0x00) <= (0x7F - 0x00)
693 && byte != 0x0E && byte != 0x0F)
694 {
695 iso->out_flag = false;
696
699 break;
700 }
701
702 iso->out_flag = false;
703
705 ctx->have_error = true;
706 }
708
710 break;
711
713 switch (byte) {
714 case 0x1B:
716
718 continue;
719
720 case 0x5C:
721 iso->out_flag = false;
722
725
726 continue;
727
728 case 0x7E:
729 iso->out_flag = false;
730
733
734 continue;
735
736 case 0x0E:
737 case 0x0F:
738 break;
739
740 default:
741 /* 0x00 to 0x7F */
742 if ((unsigned) (byte - 0x00) <= (0x7F - 0x00)) {
743 iso->out_flag = false;
744
747
748 continue;
749 }
750
751 break;
752 }
753
754 iso->out_flag = false;
755
757 ctx->have_error = true;
758 }
760
762 break;
763
765 if (byte == 0x1B) {
767
769 break;
770 }
771
772 /* 0x21 to 0x5F */
773 if ((unsigned) (byte - 0x21) <= (0x5F - 0x21)) {
774 iso->out_flag = false;
775
777 0xFF61 - 0x21 + byte);
779 break;
780 }
781
782 iso->out_flag = false;
783
785 ctx->have_error = true;
786 }
788
790 break;
791
793 if (byte == 0x1B) {
795
797 break;
798 }
799
800 /* 0x21 to 0x7E */
801 if ((unsigned) (byte - 0x21) <= (0x7E - 0x21)) {
802 iso->out_flag = false;
803 iso->lead = byte;
805
807 break;
808 }
809
810 iso->out_flag = false;
811
813 ctx->have_error = true;
814 }
816
818 break;
819
821 if (byte == 0x1B) {
823
825 ctx->have_error = true;
826 }
828
830 break;
831 }
832
834
835 /* 0x21 to 0x7E */
836 if ((unsigned) (byte - 0x21) <= (0x7E - 0x21)) {
837 /* Max index == (0x7E - 0x21) * 94 + 0x7E - 0x21 == 8835 */
838 ctx->codepoint = (iso->lead - 0x21) * 94 + byte - 0x21;
839
841
845
846 break;
847 }
848 }
849
851 iso->prepand = 0x01;
852 ctx->have_error = true;
853 }
855
857 break;
858
860 if (byte == 0x24 || byte == 0x28) {
862 iso->lead = byte;
863
865 break;
866 }
867
868 (*data)--;
869
870 iso->out_flag = false;
871 iso->state = ctx->u.iso_2022_jp.out_state;
872
874 iso->prepand = 0x01;
875 ctx->have_error = true;
876 }
878
879 break;
880
883
884 if (iso->lead == 0x28) {
885 if (byte == 0x42) {
887 }
888 else if (byte == 0x4A) {
890 }
891 else if (byte == 0x49) {
893 }
894 }
895 else if (iso->lead == 0x24) {
896 if (byte == 0x40 || byte == 0x42) {
898 }
899 }
900
902 (*data)--;
903
904 iso->out_flag = false;
905 iso->state = iso->out_state;
906
908 iso->prepand = iso->lead;
909 iso->lead = 0x00;
910
911 ctx->have_error = true;
912 }
914
915 byte = iso->lead;
916 iso->lead = 0x00;
917
918 goto prepand;
919 }
920
921 iso->lead = 0x00;
922 iso->out_state = iso->state;
923
924 if (iso->out_flag) {
926 ctx->have_error = true;
927 }
929
931 break;
932 }
933
934 iso->out_flag = true;
935
937 break;
938 }
939 }
940 while (true);
941
942 return LXB_STATUS_OK;
943
944#undef LXB_ENCODING_DECODE_ISO_2022_JP_OK
945#undef LXB_ENCODING_DECODE_ISO_2022_JP_CONTINUE
946}
947
956
965
974
983
992
1001
1010
1019
1028
1037
1046
1055
1064
1073
1082
1085 const lxb_char_t **data, const lxb_char_t *end)
1086{
1087 lxb_char_t byte, lead;
1088
1089 ctx->status = LXB_STATUS_OK;
1090
1091 if (ctx->u.lead != 0x00) {
1092 if (ctx->have_error) {
1093 ctx->have_error = false;
1094 ctx->u.lead = 0x00;
1095
1097 ctx->have_error = true;
1098 ctx->u.lead = 0x01;
1100 }
1101 else {
1102 if (*data >= end) {
1104
1105 return LXB_STATUS_CONTINUE;
1106 }
1107
1109
1110 lead = (lxb_char_t) ctx->u.lead;
1111 ctx->u.lead = 0x00;
1112
1113 goto lead_state;
1114 }
1115 }
1116
1117 while (*data < end) {
1119
1120 lead = *(*data)++;
1121
1122 if (lead <= 0x80) {
1124 continue;
1125 }
1126
1127 if ((unsigned) (lead - 0xA1) <= (0xDF - 0xA1)) {
1128 LXB_ENCODING_DECODE_APPEND_WO_CHECK(ctx, 0xFF61 - 0xA1 + lead);
1129 continue;
1130 }
1131
1132 if ((unsigned) (lead - 0x81) > (0x9F - 0x81)
1133 && lead != 0xE0 && lead != 0xFC)
1134 {
1136 (*data)--;
1137 }
1139
1140 continue;
1141 }
1142
1143 if (*data >= end) {
1144 ctx->u.lead = lead;
1146
1147 return LXB_STATUS_CONTINUE;
1148 }
1149
1150 lead_state:
1151
1152 byte = *(*data)++;
1153
1154 if (byte < 0x7F) {
1155 ctx->codepoint = 0x40;
1156 }
1157 else {
1158 ctx->codepoint = 0x41;
1159 }
1160
1161 if (lead < 0xA0) {
1162 ctx->second_codepoint = 0x81;
1163 }
1164 else {
1165 ctx->second_codepoint = 0xC1;
1166 }
1167
1168 if ((unsigned) (byte - 0x40) > (0x7E - 0x40)
1169 && (unsigned) (byte - 0x80) > (0xFC - 0x80))
1170 {
1172 continue;
1173 }
1174
1175 /* Max index == (0xFC - 0xC1) * 188 + 0xFC - 0x41 = 11279 */
1176 ctx->codepoint = (lead - ctx->second_codepoint) * 188
1177 + byte - ctx->codepoint;
1178
1179 if (ctx->codepoint >= (sizeof(lxb_encoding_multi_index_jis0208)
1180 / sizeof(lxb_encoding_multi_index_t)))
1181 {
1183 continue;
1184 }
1185
1186 if ((unsigned) (ctx->codepoint - 8836) <= (10715 - 8836)) {
1187 LXB_ENCODING_DECODE_APPEND_WO_CHECK(ctx, 0xE000 - 8836 + ctx->codepoint);
1188 continue;
1189 }
1190
1194 continue;
1195 }
1196
1198 }
1199
1200 return LXB_STATUS_OK;
1201}
1202
1205 const lxb_char_t **data, const lxb_char_t *end)
1206{
1207 unsigned lead;
1208 lxb_codepoint_t unit;
1209
1210 ctx->status = LXB_STATUS_OK;
1211
1212 if (ctx->have_error) {
1213 ctx->have_error = false;
1214
1216 ctx->have_error = true;
1217 }
1219 }
1220
1221 if (ctx->u.lead != 0x00) {
1222 if (*data >= end) {
1224
1225 return LXB_STATUS_CONTINUE;
1226 }
1227
1229
1230 lead = ctx->u.lead - 0x01;
1231 ctx->u.lead = 0x00;
1232
1233 goto lead_state;
1234 }
1235
1236 while (*data < end) {
1238
1239 pair_state:
1240
1241 lead = *(*data)++;
1242
1243 if (*data >= end) {
1244 ctx->u.lead = lead + 0x01;
1246
1247 return LXB_STATUS_CONTINUE;
1248 }
1249
1250 lead_state:
1251
1252 /* For UTF-16BE or UTF-16LE */
1253 if (is_be) {
1254 unit = (lead << 8) + *(*data)++;
1255 }
1256 else {
1257 unit = (*(*data)++ << 8) + lead;
1258 }
1259
1260 if (ctx->second_codepoint != 0x00) {
1261 if ((unsigned) (unit - 0xDC00) <= (0xDFFF - 0xDC00)) {
1262 ctx->codepoint = 0x10000 + ((ctx->second_codepoint - 0xD800) << 10)
1263 + (unit - 0xDC00);
1264
1265 ctx->second_codepoint = 0x00;
1266
1268 continue;
1269 }
1270
1271 (*data)--;
1272
1273 ctx->second_codepoint = 0x00;
1274
1276 ctx->have_error = true;
1277
1278 ctx->u.lead = lead + 0x01;
1279 }
1281
1282 goto lead_state;
1283 }
1284
1285 /* Surrogate pair */
1286 if ((unsigned) (unit - 0xD800) <= (0xDFFF - 0xD800)) {
1287 if ((unsigned) (unit - 0xDC00) <= (0xDFFF - 0xDC00)) {
1289 ctx->have_error = true;
1290 }
1292
1293 continue;
1294 }
1295
1296 ctx->second_codepoint = unit;
1297
1298 if (*data >= end) {
1300
1301 return LXB_STATUS_CONTINUE;
1302 }
1303
1304 goto pair_state;
1305 }
1306
1308 }
1309
1310 return LXB_STATUS_OK;
1311}
1312
1319
1322 const lxb_char_t **data, const lxb_char_t *end)
1323{
1324 return lxb_encoding_decode_utf_16(ctx, false, data, end);
1325}
1326
1329 const lxb_char_t **data, const lxb_char_t *end)
1330{
1331 unsigned need;
1332 lxb_char_t ch;
1333 const lxb_char_t *p = *data;
1334
1335 ctx->status = LXB_STATUS_OK;
1336
1337 if (ctx->have_error) {
1338 ctx->have_error = false;
1339
1341 ctx->have_error = true;
1342 }
1344 }
1345
1346 if (ctx->u.utf_8.need != 0) {
1347 if (p >= end) {
1349
1350 return LXB_STATUS_CONTINUE;
1351 }
1352
1354
1355 need = ctx->u.utf_8.need;
1356 ctx->u.utf_8.need = 0;
1357
1358 if (ctx->u.utf_8.lower != 0x00) {
1360 ctx->u.utf_8.upper, goto begin);
1361 ctx->u.utf_8.lower = 0x00;
1362 }
1363
1364 goto decode;
1365 }
1366
1367begin:
1368
1369 while (p < end) {
1370 if (ctx->buffer_used >= ctx->buffer_length) {
1371 *data = p;
1372
1374 }
1375
1376 ch = *p++;
1377
1378 if (ch < 0x80) {
1380 continue;
1381 }
1382 else if (ch <= 0xDF) {
1383 if (ch < 0xC2) {
1385 *data = p - 1;
1386 }
1388
1389 continue;
1390 }
1391
1392 need = 1;
1393 ctx->codepoint = ch & 0x1F;
1394 }
1395 else if (ch < 0xF0) {
1396 need = 2;
1397 ctx->codepoint = ch & 0x0F;
1398
1399 if (p == end) {
1400 LXB_ENCODING_DECODE_UTF_8_BOUNDARY_SET(0xE0, 0xED, 0xA0, 0x9F);
1401
1402 *data = p;
1403
1404 ctx->u.utf_8.need = need;
1406
1407 return LXB_STATUS_CONTINUE;
1408 }
1409
1410 if (ch == 0xE0) {
1411 LXB_ENCODING_DECODE_UTF_8_BOUNDARY(0xA0, 0xBF, continue);
1412 }
1413 else if (ch == 0xED) {
1414 LXB_ENCODING_DECODE_UTF_8_BOUNDARY(0x80, 0x9F, continue);
1415 }
1416 }
1417 else if (ch < 0xF5) {
1418 need = 3;
1419 ctx->codepoint = ch & 0x07;
1420
1421 if (p == end) {
1422 LXB_ENCODING_DECODE_UTF_8_BOUNDARY_SET(0xF0, 0xF4, 0x90, 0x8F);
1423
1424 *data = p;
1425
1426 ctx->u.utf_8.need = need;
1428
1429 return LXB_STATUS_CONTINUE;
1430 }
1431
1432 if (ch == 0xF0) {
1433 LXB_ENCODING_DECODE_UTF_8_BOUNDARY(0x90, 0xBF, continue);
1434 }
1435 else if (ch == 0xF4) {
1436 LXB_ENCODING_DECODE_UTF_8_BOUNDARY(0x80, 0x8F, continue);
1437 }
1438 }
1439 else {
1441 *data = p - 1;
1442 }
1444
1445 continue;
1446 }
1447
1448 decode:
1449
1450 do {
1451 if (p >= end) {
1452 *data = p;
1453
1454 ctx->u.utf_8.need = need;
1456
1457 return LXB_STATUS_CONTINUE;
1458 }
1459
1460 ch = *p++;
1461
1462 if (ch < 0x80 || ch > 0xBF) {
1463 p--;
1464
1465 ctx->u.utf_8.need = 0;
1466
1468 *data = p;
1469 ctx->have_error = true;
1470 }
1472
1473 break;
1474 }
1475
1476 ctx->codepoint = (ctx->codepoint << 6) | (ch & 0x3F);
1477
1478 if (--need == 0) {
1480
1481 break;
1482 }
1483 }
1484 while (true);
1485 }
1486
1487 *data = p;
1488
1489 return LXB_STATUS_OK;
1490}
1491
1494{
1495 size_t mid, left, right;
1496 const lxb_encoding_range_index_t *range;
1497
1498 /*
1499 * Pointer greater than 39419 and less than 189000,
1500 * or pointer is greater than 1237575
1501 */
1502 if ((unsigned) (index - 39419) < (189000 - 39419)
1503 || index > 1237575)
1504 {
1506 }
1507
1508 if (index == 7457) {
1509 return 0xE7C7;
1510 }
1511
1512 left = 0;
1515
1516 /* Some compilers say about uninitialized mid */
1517 mid = 0;
1518
1519 while (left < right) {
1520 mid = left + (right - left) / 2;
1521
1522 if (range[mid].index < index) {
1523 left = mid + 1;
1524
1525 if (left < right && range[ left ].index > index) {
1526 break;
1527 }
1528 }
1529 else if (range[mid].index > index) {
1530 right = mid - 1;
1531
1532 if (right > 0 && range[right].index <= index) {
1533 mid = right;
1534 break;
1535 }
1536 }
1537 else {
1538 break;
1539 }
1540 }
1541
1542 return range[mid].codepoint + index - range[mid].index;
1543}
1544
1547 const lxb_char_t **data, const lxb_char_t *end)
1548{
1549 uint32_t pointer;
1550 lxb_char_t first, second, third, offset;
1551
1552 /* Make compiler happy */
1553 second = 0x00;
1554
1555 ctx->status = LXB_STATUS_OK;
1556
1557 if (ctx->have_error) {
1558 ctx->have_error = false;
1559
1561 ctx->have_error = true;
1562 }
1564 }
1565
1566 if (ctx->u.gb18030.first != 0) {
1567 if (*data >= end) {
1569
1570 return LXB_STATUS_CONTINUE;
1571 }
1572
1574
1575 if (ctx->u.gb18030.third != 0x00) {
1576 first = ctx->u.gb18030.first;
1577 second = ctx->u.gb18030.second;
1578 third = ctx->u.gb18030.third;
1579
1580 memset(&ctx->u.gb18030, 0, sizeof(lxb_encoding_ctx_gb18030_t));
1581
1582 if (ctx->prepend) {
1583 /* The first is always < 0x80 */
1585
1586 if (ctx->buffer_used == ctx->buffer_length) {
1587 ctx->u.gb18030.first = third;
1588
1590 }
1591
1592 first = third;
1593 ctx->prepend = false;
1594
1595 goto prepend_first;
1596 }
1597
1598 goto third_state;
1599 }
1600 else if (ctx->u.gb18030.second != 0x00) {
1601 first = ctx->u.gb18030.first;
1602 second = ctx->u.gb18030.second;
1603
1604 memset(&ctx->u.gb18030, 0, sizeof(lxb_encoding_ctx_gb18030_t));
1605
1606 goto second_state;
1607 }
1608
1609 first = ctx->u.gb18030.first;
1610 ctx->u.gb18030.first = 0x00;
1611
1612 if (ctx->prepend) {
1613 ctx->prepend = false;
1614 goto prepend_first;
1615 }
1616
1617 goto first_state;
1618 }
1619
1620 while (*data < end) {
1622
1623 first = *(*data)++;
1624
1625 prepend_first:
1626
1627 if (first < 0x80) {
1629 continue;
1630 }
1631
1632 if (first == 0x80) {
1634 continue;
1635 }
1636
1637 /* Range 0x81 to 0xFE, inclusive */
1638 if ((unsigned) (first - 0x81) > (0xFE - 0x81)) {
1640 (*data)--;
1641 }
1643
1644 continue;
1645 }
1646
1647 if (*data == end) {
1648 ctx->u.gb18030.first = first;
1650
1651 return LXB_STATUS_CONTINUE;
1652 }
1653
1654 /* First */
1655 first_state:
1656
1657 second = *(*data)++;
1658
1659 /* Range 0x30 to 0x39, inclusive */
1660 if ((unsigned) (second - 0x30) > (0x39 - 0x30)) {
1661 offset = (second < 0x7F) ? 0x40 : 0x41;
1662
1663 /* Range 0x40 to 0x7E, inclusive, or 0x80 to 0xFE, inclusive */
1664 if ((unsigned) (second - 0x40) <= (0x7E - 0x40)
1665 || (unsigned) (second - 0x80) <= (0xFE - 0x80))
1666 {
1667 pointer = (first - 0x81) * 190 + (second - offset);
1668 }
1669 else {
1670 if (second < 0x80) {
1671 (*data)--;
1672 }
1673
1675 ctx->have_error = true;
1676 }
1678
1679 continue;
1680 }
1681
1682 /* Max pointer value == (0xFE - 0x81) * 190 + (0xFE - 0x41) == 23939 */
1683 ctx->codepoint = lxb_encoding_multi_index_gb18030[pointer].codepoint;
1685 if (second < 0x80) {
1686 (*data)--;
1687 }
1688
1690 ctx->have_error = true;
1691 }
1693
1694 continue;
1695 }
1696
1698 continue;
1699 }
1700
1701 if (*data == end) {
1702 ctx->u.gb18030.first = first;
1703 ctx->u.gb18030.second = second;
1704
1706
1707 return LXB_STATUS_CONTINUE;
1708 }
1709
1710 /* Second */
1711 second_state:
1712
1713 third = *(*data)++;
1714
1715 /* Range 0x81 to 0xFE, inclusive */
1716 if ((unsigned) (third - 0x81) > (0xFE - 0x81)) {
1717 (*data)--;
1718
1720 ctx->prepend = true;
1721 ctx->have_error = true;
1722 ctx->u.gb18030.first = second;
1723 }
1725
1726 first = second;
1727
1728 goto prepend_first;
1729 }
1730
1731 if (*data == end) {
1732 ctx->u.gb18030.first = first;
1733 ctx->u.gb18030.second = second;
1734 ctx->u.gb18030.third = third;
1735
1737
1738 return LXB_STATUS_CONTINUE;
1739 }
1740
1741 /* Third */
1742 third_state:
1743
1744 /* Range 0x30 to 0x39, inclusive */
1745 if ((unsigned) (**data - 0x30) > (0x39 - 0x30)) {
1746 ctx->prepend = true;
1747
1749 ctx->prepend = true;
1750 ctx->have_error = true;
1751
1752 /* First is a fake for trigger */
1753 ctx->u.gb18030.first = 0x01;
1754 ctx->u.gb18030.second = second;
1755 ctx->u.gb18030.third = third;
1756 }
1758
1760
1761 if (ctx->buffer_used == ctx->buffer_length) {
1762 ctx->prepend = true;
1763 ctx->have_error = true;
1764
1765 /* First is a fake for trigger */
1766 ctx->u.gb18030.first = 0x01;
1767 ctx->u.gb18030.second = second;
1768 ctx->u.gb18030.third = third;
1769
1771 }
1772
1773 first = third;
1774
1775 goto prepend_first;
1776 }
1777
1778 pointer = ((first - 0x81) * (10 * 126 * 10))
1779 + ((second - 0x30) * (10 * 126))
1780 + ((third - 0x81) * 10) + (*(*data)++) - 0x30;
1781
1783
1787
1788 continue;
1789 }
1790
1792 }
1793
1794 return LXB_STATUS_OK;
1795}
1796
1805
1813
1822
1831
1840
1849
1858
1867
1876
1885
1894
1903
1912
1915 const lxb_char_t **data, const lxb_char_t *end)
1916{
1917 while (*data < end) {
1918 if (**data < 0x80) {
1919 LXB_ENCODING_DECODE_APPEND(ctx, *(*data)++);
1920 }
1921 else {
1922 LXB_ENCODING_DECODE_APPEND(ctx, 0xF780 + (*(*data)++) - 0x80);
1923 }
1924 }
1925
1926 return LXB_STATUS_OK;
1927}
1928
1929/*
1930 * Single
1931 */
1938
1945
1952
1955 const lxb_char_t **data, const lxb_char_t *end)
1956{
1957 uint32_t index;
1958 lxb_char_t lead, byte;
1959
1960 if (ctx->u.lead != 0x00) {
1961 if (ctx->second_codepoint != 0x00) {
1962 (*data)++;
1963
1964 ctx->u.lead = 0x00;
1965
1966 ctx->codepoint = ctx->second_codepoint;
1967 ctx->second_codepoint = 0x00;
1968
1969 return ctx->codepoint;
1970 }
1971
1972 lead = (lxb_char_t) ctx->u.lead;
1973 ctx->u.lead = 0x00;
1974
1975 goto lead_state;
1976 }
1977
1978 lead = *(*data)++;
1979
1980 if (lead < 0x80) {
1981 return lead;
1982 }
1983
1984 if ((unsigned) (lead - 0x81) > (0xFE - 0x81)) {
1986 }
1987
1988 if (*data >= end) {
1989 ctx->u.lead = lead;
1990
1992 }
1993
1994lead_state:
1995
1996 index = 0;
1997 byte = **data;
1998
1999 if ((unsigned) (byte - 0x40) <= (0x7E - 0x40)
2000 || (unsigned) (byte - 0xA1) <= (0xFE - 0xA1))
2001 {
2002 if (byte < 0x7F) {
2003 /* Max index == (0xFE - 0x81) * 157 + (0x7E - 0x62) == 19653 */
2004 index = (lead - 0x81) * 157 + (byte - 0x40);
2005 }
2006 else {
2007 /* Max index == (0xFE - 0x81) * 157 + (0xFE - 0x62) == 19781 */
2008 index = (lead - 0x81) * 157 + (byte - 0x62);
2009 }
2010 }
2011
2012 /*
2013 * 1133 U+00CA U+0304 Ê̄ (LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND MACRON)
2014 * 1135 U+00CA U+030C Ê̌ (LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND CARON)
2015 * 1164 U+00EA U+0304 ê̄ (LATIN SMALL LETTER E WITH CIRCUMFLEX AND MACRON)
2016 * 1166 U+00EA U+030C ê̌ (LATIN SMALL LETTER E WITH CIRCUMFLEX AND CARON)
2017 */
2018 switch (index) {
2019 case 1133:
2020 ctx->u.lead = lead;
2021 ctx->second_codepoint = 0x0304;
2022 return 0x00CA;
2023
2024 case 1135:
2025 ctx->u.lead = lead;
2026 ctx->second_codepoint = 0x030C;
2027 return 0x00CA;
2028
2029 case 1164:
2030 ctx->u.lead = lead;
2031 ctx->second_codepoint = 0x0304;
2032 return 0x00EA;
2033
2034 case 1166:
2035 ctx->u.lead = lead;
2036 ctx->second_codepoint = 0x030C;
2037 return 0x00EA;
2038
2039 case 0:
2040 goto failed;
2041 }
2042
2043 ctx->codepoint = lxb_encoding_multi_index_big5[index].codepoint;
2045 goto failed;
2046 }
2047
2048 (*data)++;
2049
2050 return ctx->codepoint;
2051
2052failed:
2053
2054 if (byte >= 0x80) {
2055 (*data)++;
2056 }
2057
2059}
2060
2063 const lxb_char_t **data, const lxb_char_t *end)
2064{
2065 bool is_jis0212;
2066 lxb_char_t byte, lead;
2067
2068 if (ctx->u.euc_jp.lead != 0x00) {
2069 lead = ctx->u.euc_jp.lead;
2070 byte = *(*data)++;
2071
2072 ctx->u.euc_jp.lead = 0x00;
2073
2074 if (ctx->u.euc_jp.is_jis0212) {
2075 is_jis0212 = true;
2076 ctx->u.euc_jp.is_jis0212 = false;
2077
2078 goto lead_jis_state;
2079 }
2080
2081 goto lead_state;
2082 }
2083
2084 lead = *(*data)++;
2085
2086 if (lead < 0x80) {
2087 return lead;
2088 }
2089
2090 if ((unsigned) (lead - 0xA1) > (0xFE - 0xA1)
2091 && (lead != 0x8E && lead != 0x8F))
2092 {
2094 }
2095
2096 if (*data >= end) {
2097 ctx->u.euc_jp.lead = lead;
2099 }
2100
2101 byte = *(*data)++;
2102
2103lead_state:
2104
2105 if (lead == 0x8E && (unsigned) (byte - 0xA1) <= (0xDF - 0xA1)) {
2106 return 0xFF61 - 0xA1 + byte;
2107 }
2108
2109 is_jis0212 = false;
2110
2111 if (lead == 0x8F && (unsigned) (byte - 0xA1) <= (0xFE - 0xA1)) {
2112 if (*data >= end) {
2113 ctx->u.euc_jp.lead = byte;
2114 ctx->u.euc_jp.is_jis0212 = true;
2115
2117 }
2118
2119 lead = byte;
2120 byte = *(*data)++;
2121 is_jis0212 = true;
2122 }
2123
2124lead_jis_state:
2125
2126 if ((unsigned) (lead - 0xA1) > (0xFE - 0xA1)
2127 || (unsigned) (byte - 0xA1) > (0xFE - 0xA1))
2128 {
2129 goto failed;
2130 }
2131
2132 /* Max index == (0xFE - 0xA1) * 94 + 0xFE - 0xA1 == 8835 */
2133 ctx->codepoint = (lead - 0xA1) * 94 + byte - 0xA1;
2134
2135 if (is_jis0212) {
2137 / sizeof(lxb_encoding_multi_index_t)) <= ctx->codepoint)
2138 {
2139 goto failed;
2140 }
2141
2143 }
2144 else {
2146 / sizeof(lxb_encoding_multi_index_t)) <= ctx->codepoint)
2147 {
2148 goto failed;
2149 }
2150
2152 }
2153
2155 goto failed;
2156 }
2157
2158 return ctx->codepoint;
2159
2160failed:
2161
2162 if (byte < 0x80) {
2163 (*data)--;
2164 }
2165
2167}
2168
2171 const lxb_char_t **data, const lxb_char_t *end)
2172{
2173 lxb_char_t lead, byte;
2174
2175 if (ctx->u.lead != 0x00) {
2176 lead = (lxb_char_t) ctx->u.lead;
2177 ctx->u.lead = 0x00;
2178
2179 goto lead_state;
2180 }
2181
2182 lead = *(*data)++;
2183
2184 if (lead < 0x80) {
2185 return lead;
2186 }
2187
2188 if ((unsigned) (lead - 0x81) > (0xFE - 0x81)) {
2190 }
2191
2192 if (*data == end) {
2193 ctx->u.lead = lead;
2195 }
2196
2197lead_state:
2198
2199 byte = *(*data)++;
2200
2201 if ((unsigned) (byte - 0x41) > (0xFE - 0x41)) {
2202 goto failed;
2203 }
2204
2205 /* Max index == (0xFE - 0x81) * 190 + (0xFE - 0x41) == 23939 */
2206 ctx->codepoint = (lead - 0x81) * 190 + (byte - 0x41);
2207
2208 if (ctx->codepoint >= sizeof(lxb_encoding_multi_index_euc_kr)
2210 {
2211 goto failed;
2212 }
2213
2216 goto failed;
2217 }
2218
2219 return ctx->codepoint;
2220
2221failed:
2222
2223 if (byte < 0x80) {
2224 (*data)--;
2225 }
2226
2228}
2229
2236
2239 const lxb_char_t **data, const lxb_char_t *end)
2240{
2241 if (**data < 0x80) {
2242 return *(*data)++;
2243 }
2244
2245 return lxb_encoding_single_index_ibm866[*(*data)++ - 0x80].codepoint;
2246}
2247
2250 const lxb_char_t **data, const lxb_char_t *end)
2251{
2252 lxb_char_t byte;
2254
2255 if (iso->prepand != 0x00) {
2256 byte = iso->prepand;
2257 iso->prepand = 0x00;
2258
2259 goto prepand;
2260 }
2261
2262 do {
2263 byte = *(*data)++;
2264
2265 prepand:
2266
2267 switch (iso->state) {
2269 if (byte == 0x1B) {
2271
2272 break;
2273 }
2274
2275 /* 0x00 to 0x7F, excluding 0x0E, 0x0F, and 0x1B */
2276 if ((unsigned) (byte - 0x00) <= (0x7F - 0x00)
2277 && byte != 0x0E && byte != 0x0F)
2278 {
2279 iso->out_flag = false;
2280
2281 return byte;
2282 }
2283
2284 iso->out_flag = false;
2285
2287
2289 switch (byte) {
2290 case 0x1B:
2292
2293 continue;
2294
2295 case 0x5C:
2296 iso->out_flag = false;
2297
2298 return 0x00A5;
2299
2300 case 0x7E:
2301 iso->out_flag = false;
2302
2303 return 0x203E;
2304
2305 case 0x0E:
2306 case 0x0F:
2307 break;
2308
2309 default:
2310 /* 0x00 to 0x7F */
2311 if ((unsigned) (byte - 0x00) <= (0x7F - 0x00)) {
2312 iso->out_flag = false;
2313
2314 return byte;
2315 }
2316
2317 break;
2318 }
2319
2320 iso->out_flag = false;
2321
2323
2325 if (byte == 0x1B) {
2327
2328 break;
2329 }
2330
2331 /* 0x21 to 0x5F */
2332 if ((unsigned) (byte - 0x21) <= (0x5F - 0x21)) {
2333 iso->out_flag = false;
2334
2335 return 0xFF61 - 0x21 + byte;
2336 }
2337
2338 iso->out_flag = false;
2339
2341
2343 if (byte == 0x1B) {
2345
2346 break;
2347 }
2348
2349 /* 0x21 to 0x7E */
2350 if ((unsigned) (byte - 0x21) <= (0x7E - 0x21)) {
2351 iso->out_flag = false;
2352 iso->lead = byte;
2354
2355 break;
2356 }
2357
2358 iso->out_flag = false;
2359
2361
2363 if (byte == 0x1B) {
2365
2367 }
2368
2370
2371 /* 0x21 to 0x7E */
2372 if ((unsigned) (byte - 0x21) <= (0x7E - 0x21)) {
2373 /* Max index == (0x7E - 0x21) * 94 + 0x7E - 0x21 == 8835 */
2374 ctx->codepoint = (iso->lead - 0x21) * 94 + byte - 0x21;
2375
2376 return lxb_encoding_multi_index_jis0208[ctx->codepoint].codepoint;
2377 }
2378
2380
2382 if (byte == 0x24 || byte == 0x28) {
2384 iso->lead = byte;
2385
2386 break;
2387 }
2388
2389 (*data)--;
2390
2391 iso->out_flag = false;
2392 iso->state = ctx->u.iso_2022_jp.out_state;
2393
2395
2398
2399 if (iso->lead == 0x28) {
2400 if (byte == 0x42) {
2402 }
2403 else if (byte == 0x4A) {
2405 }
2406 else if (byte == 0x49) {
2408 }
2409 }
2410 else if (iso->lead == 0x24) {
2411 if (byte == 0x40 || byte == 0x42) {
2413 }
2414 }
2415
2417 iso->prepand = iso->lead;
2418 iso->lead = 0x00;
2419
2420 (*data)--;
2421
2422 iso->out_flag = false;
2423 iso->state = iso->out_state;
2424
2426 }
2427
2428 iso->lead = 0x00;
2429 iso->out_state = iso->state;
2430
2431 if (iso->out_flag) {
2433 }
2434
2435 iso->out_flag = true;
2436
2437 break;
2438 }
2439 }
2440 while (*data < end);
2441
2443}
2444
2447 const lxb_char_t **data, const lxb_char_t *end)
2448{
2449 if (**data < 0x80) {
2450 return *(*data)++;
2451 }
2452
2453 return lxb_encoding_single_index_iso_8859_10[*(*data)++ - 0x80].codepoint;
2454}
2455
2458 const lxb_char_t **data, const lxb_char_t *end)
2459{
2460 if (**data < 0x80) {
2461 return *(*data)++;
2462 }
2463
2464 return lxb_encoding_single_index_iso_8859_13[*(*data)++ - 0x80].codepoint;
2465}
2466
2469 const lxb_char_t **data, const lxb_char_t *end)
2470{
2471 if (**data < 0x80) {
2472 return *(*data)++;
2473 }
2474
2475 return lxb_encoding_single_index_iso_8859_14[*(*data)++ - 0x80].codepoint;
2476}
2477
2480 const lxb_char_t **data, const lxb_char_t *end)
2481{
2482 if (**data < 0x80) {
2483 return *(*data)++;
2484 }
2485
2486 return lxb_encoding_single_index_iso_8859_15[*(*data)++ - 0x80].codepoint;
2487}
2488
2491 const lxb_char_t **data, const lxb_char_t *end)
2492{
2493 if (**data < 0x80) {
2494 return *(*data)++;
2495 }
2496
2497 return lxb_encoding_single_index_iso_8859_16[*(*data)++ - 0x80].codepoint;
2498}
2499
2502 const lxb_char_t **data, const lxb_char_t *end)
2503{
2504 if (**data < 0x80) {
2505 return *(*data)++;
2506 }
2507
2508 return lxb_encoding_single_index_iso_8859_2[*(*data)++ - 0x80].codepoint;
2509}
2510
2513 const lxb_char_t **data, const lxb_char_t *end)
2514{
2515 if (**data < 0x80) {
2516 return *(*data)++;
2517 }
2518
2519 return lxb_encoding_single_index_iso_8859_3[*(*data)++ - 0x80].codepoint;
2520}
2521
2524 const lxb_char_t **data, const lxb_char_t *end)
2525{
2526 if (**data < 0x80) {
2527 return *(*data)++;
2528 }
2529
2530 return lxb_encoding_single_index_iso_8859_4[*(*data)++ - 0x80].codepoint;
2531}
2532
2535 const lxb_char_t **data, const lxb_char_t *end)
2536{
2537 if (**data < 0x80) {
2538 return *(*data)++;
2539 }
2540
2541 return lxb_encoding_single_index_iso_8859_5[*(*data)++ - 0x80].codepoint;
2542}
2543
2546 const lxb_char_t **data, const lxb_char_t *end)
2547{
2548 if (**data < 0x80) {
2549 return *(*data)++;
2550 }
2551
2552 return lxb_encoding_single_index_iso_8859_6[*(*data)++ - 0x80].codepoint;
2553}
2554
2557 const lxb_char_t **data, const lxb_char_t *end)
2558{
2559 if (**data < 0x80) {
2560 return *(*data)++;
2561 }
2562
2563 return lxb_encoding_single_index_iso_8859_7[*(*data)++ - 0x80].codepoint;
2564}
2565
2568 const lxb_char_t **data, const lxb_char_t *end)
2569{
2570 if (**data < 0x80) {
2571 return *(*data)++;
2572 }
2573
2574 return lxb_encoding_single_index_iso_8859_8[*(*data)++ - 0x80].codepoint;
2575}
2576
2579 const lxb_char_t **data, const lxb_char_t *end)
2580{
2581 if (**data < 0x80) {
2582 return *(*data)++;
2583 }
2584
2585 return lxb_encoding_single_index_iso_8859_8[*(*data)++ - 0x80].codepoint;
2586}
2587
2590 const lxb_char_t **data, const lxb_char_t *end)
2591{
2592 if (**data < 0x80) {
2593 return *(*data)++;
2594 }
2595
2596 return lxb_encoding_single_index_koi8_r[*(*data)++ - 0x80].codepoint;
2597}
2598
2601 const lxb_char_t **data, const lxb_char_t *end)
2602{
2603 if (**data < 0x80) {
2604 return *(*data)++;
2605 }
2606
2607 return lxb_encoding_single_index_koi8_u[*(*data)++ - 0x80].codepoint;
2608}
2609
2612 const lxb_char_t **data, const lxb_char_t *end)
2613{
2614 lxb_char_t byte, lead;
2615
2616 if (ctx->u.lead != 0x00) {
2617 lead = (lxb_char_t) ctx->u.lead;
2618 ctx->u.lead = 0x00;
2619
2620 goto lead_state;
2621 }
2622
2623 lead = *(*data)++;
2624
2625 if (lead <= 0x80) {
2626 return lead;
2627 }
2628
2629 if ((unsigned) (lead - 0xA1) <= (0xDF - 0xA1)) {
2630 return 0xFF61 - 0xA1 + lead;
2631 }
2632
2633 if ((unsigned) (lead - 0x81) > (0x9F - 0x81)
2634 && lead != 0xE0 && lead != 0xFC)
2635 {
2637 }
2638
2639 if (*data >= end) {
2640 ctx->u.lead = lead;
2641
2643 }
2644
2645lead_state:
2646
2647 byte = *(*data)++;
2648
2649 if (byte < 0x7F) {
2650 ctx->codepoint = 0x40;
2651 }
2652 else {
2653 ctx->codepoint = 0x41;
2654 }
2655
2656 if (lead < 0xA0) {
2657 ctx->second_codepoint = 0x81;
2658 }
2659 else {
2660 ctx->second_codepoint = 0xC1;
2661 }
2662
2663 if ((unsigned) (byte - 0x40) <= (0x7E - 0x40)
2664 || (unsigned) (byte - 0x80) <= (0xFC - 0x80))
2665 {
2666 /* Max index == (0xFC - 0xC1) * 188 + 0xFC - 0x41 = 11279 */
2667 ctx->codepoint = (lead - ctx->second_codepoint) * 188
2668 + byte - ctx->codepoint;
2669
2670 if (ctx->codepoint >= (sizeof(lxb_encoding_multi_index_jis0208)
2671 / sizeof(lxb_encoding_multi_index_t)))
2672 {
2673 goto failed;
2674 }
2675
2676 if ((unsigned) (ctx->codepoint - 8836) <= (10715 - 8836)) {
2677 return 0xE000 - 8836 + ctx->codepoint;
2678 }
2679
2682 goto failed;
2683 }
2684
2685 return ctx->codepoint;
2686 }
2687
2688failed:
2689
2690 if (byte < 0x80) {
2691 (*data)--;
2692 }
2693
2695}
2696
2699 const lxb_char_t **data, const lxb_char_t *end)
2700{
2701 unsigned lead;
2702 lxb_codepoint_t unit;
2703
2704 if (ctx->u.lead != 0x00) {
2705 lead = ctx->u.lead - 0x01;
2706 ctx->u.lead = 0x00;
2707
2708 goto lead_state;
2709 }
2710
2711pair_state:
2712
2713 lead = *(*data)++;
2714
2715 if (*data >= end) {
2716 ctx->u.lead = lead + 0x01;
2718 }
2719
2720lead_state:
2721
2722 /* For UTF-16BE or UTF-16LE */
2723 if (is_be) {
2724 unit = (lead << 8) + *(*data)++;
2725 }
2726 else {
2727 unit = (*(*data)++ << 8) + lead;
2728 }
2729
2730 if (ctx->second_codepoint != 0x00) {
2731 if ((unsigned) (unit - 0xDC00) <= (0xDFFF - 0xDC00)) {
2732 ctx->codepoint = 0x10000 + ((ctx->second_codepoint - 0xD800) << 10)
2733 + (unit - 0xDC00);
2734
2735 ctx->second_codepoint = 0x00;
2736 return ctx->codepoint;
2737 }
2738
2739 (*data)--;
2740
2741 ctx->u.lead = lead + 0x01;
2742 ctx->second_codepoint = 0x00;
2743
2745 }
2746
2747 /* Surrogate pair */
2748 if ((unsigned) (unit - 0xD800) <= (0xDFFF - 0xD800)) {
2749 if ((unsigned) (unit - 0xDC00) <= (0xDFFF - 0xDC00)) {
2751 }
2752
2753 ctx->second_codepoint = unit;
2754
2755 if (*data >= end) {
2757 }
2758
2759 goto pair_state;
2760 }
2761
2762 return unit;
2763}
2764
2771
2778
2781 const lxb_char_t **data, const lxb_char_t *end)
2782{
2783 unsigned needed;
2784 lxb_char_t ch;
2785 const lxb_char_t *p;
2786
2787 if (ctx->u.utf_8.need != 0) {
2788 needed = ctx->u.utf_8.need;
2789 ctx->u.utf_8.need = 0;
2790
2791 if (ctx->u.utf_8.lower != 0x00) {
2793 ctx->u.utf_8.upper);
2794 ctx->u.utf_8.lower = 0x00;
2795 }
2796
2797 goto decode;
2798 }
2799
2800 ch = *(*data)++;
2801
2802 if (ch < 0x80) {
2803 return ch;
2804 }
2805 else if (ch <= 0xDF) {
2806 if (ch < 0xC2) {
2808 }
2809
2810 needed = 1;
2811 ctx->codepoint = ch & 0x1F;
2812 }
2813 else if (ch < 0xF0) {
2814 needed = 2;
2815 ctx->codepoint = ch & 0x0F;
2816
2817 if (*data == end) {
2819 0xA0, 0x9F);
2820 goto next;
2821 }
2822
2823 if (ch == 0xE0) {
2825 }
2826 else if (ch == 0xED) {
2828 }
2829 }
2830 else if (ch < 0xF5) {
2831 needed = 3;
2832 ctx->codepoint = ch & 0x07;
2833
2834 if (*data == end) {
2836 0x90, 0x8F);
2837
2838 goto next;
2839 }
2840
2841 if (ch == 0xF0) {
2843 }
2844 else if (ch == 0xF4) {
2846 }
2847 }
2848 else {
2850 }
2851
2852decode:
2853
2854 for (p = *data; p < end; p++) {
2855 ch = *p;
2856
2857 if (ch < 0x80 || ch > 0xBF) {
2858 *data = p;
2859
2860 goto failed;
2861 }
2862
2863 ctx->codepoint = (ctx->codepoint << 6) | (ch & 0x3F);
2864
2865 if (--needed == 0) {
2866 *data = p + 1;
2867
2868 return ctx->codepoint;
2869 }
2870 }
2871
2872 *data = p;
2873
2874next:
2875
2876 ctx->u.utf_8.need = needed;
2877
2879
2880failed:
2881
2882 ctx->u.utf_8.lower = 0x00;
2883 ctx->u.utf_8.need = 0;
2884
2886}
2887
2890 const lxb_char_t *end)
2891{
2893 const lxb_char_t *p = *data;
2894
2895 if (*p < 0x80){
2896 /* 0xxxxxxx */
2897
2898 if (end - p < 1) {
2899 *data = end;
2901 }
2902
2903 cp = (lxb_codepoint_t) *p;
2904
2905 (*data) += 1;
2906 }
2907 else if ((*p & 0xe0) == 0xc0) {
2908 /* 110xxxxx 10xxxxxx */
2909
2910 if (end - p < 2) {
2911 *data = end;
2913 }
2914
2915 cp = (p[0] ^ (0xC0 & p[0])) << 6;
2916 cp |= (p[1] ^ (0x80 & p[1]));
2917
2918 (*data) += 2;
2919 }
2920 else if ((*p & 0xf0) == 0xe0) {
2921 /* 1110xxxx 10xxxxxx 10xxxxxx */
2922
2923 if (end - p < 3) {
2924 *data = end;
2926 }
2927
2928 cp = (p[0] ^ (0xE0 & p[0])) << 12;
2929 cp |= (p[1] ^ (0x80 & p[1])) << 6;
2930 cp |= (p[2] ^ (0x80 & p[2]));
2931
2932 (*data) += 3;
2933 }
2934 else if ((*p & 0xf8) == 0xf0) {
2935 /* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
2936
2937 if (end - p < 4) {
2938 *data = end;
2940 }
2941
2942 cp = (p[0] ^ (0xF0 & p[0])) << 18;
2943 cp |= (p[1] ^ (0x80 & p[1])) << 12;
2944 cp |= (p[2] ^ (0x80 & p[2])) << 6;
2945 cp |= (p[3] ^ (0x80 & p[3]));
2946
2947 (*data) += 4;
2948 }
2949 else {
2950 (*data)++;
2951
2953 }
2954
2955 return cp;
2956}
2957
2960 const lxb_char_t *begin)
2961{
2963 const lxb_char_t *p = *end;
2964
2965 while (p > begin) {
2966 p -= 1;
2967
2968 if (*p < 0x80){
2969 cp = (lxb_codepoint_t) *p;
2970
2971 (*end) = p;
2972 return cp;
2973 }
2974 else if ((*p & 0xe0) == 0xc0) {
2975 /* 110xxxxx 10xxxxxx */
2976
2977 if (*end - p < 2) {
2978 *end = p;
2980 }
2981
2982 cp = (p[0] ^ (0xC0 & p[0])) << 6;
2983 cp |= (p[1] ^ (0x80 & p[1]));
2984
2985 (*end) = p;
2986 return cp;
2987 }
2988 else if ((*p & 0xf0) == 0xe0) {
2989 /* 1110xxxx 10xxxxxx 10xxxxxx */
2990
2991 if (*end - p < 3) {
2992 *end = p;
2994 }
2995
2996 cp = (p[0] ^ (0xE0 & p[0])) << 12;
2997 cp |= (p[1] ^ (0x80 & p[1])) << 6;
2998 cp |= (p[2] ^ (0x80 & p[2]));
2999
3000 (*end) = p;
3001 return cp;
3002 }
3003 else if ((*p & 0xf8) == 0xf0) {
3004 /* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
3005
3006 if (*end - p < 4) {
3007 *end = p;
3009 }
3010
3011 cp = (p[0] ^ (0xF0 & p[0])) << 18;
3012 cp |= (p[1] ^ (0x80 & p[1])) << 12;
3013 cp |= (p[2] ^ (0x80 & p[2])) << 6;
3014 cp |= (p[3] ^ (0x80 & p[3]));
3015
3016 (*end) = p;
3017 return cp;
3018 }
3019 else if (*end - p >= 4) {
3020 break;
3021 }
3022 }
3023
3024 *end = p;
3025
3027}
3028
3029uint8_t
3031{
3032
3033 if (data < 0x80){
3034 return 1;
3035 }
3036 else if ((data & 0xe0) == 0xc0) {
3037 return 2;
3038 }
3039 else if ((data & 0xf0) == 0xe0) {
3040 return 3;
3041 }
3042 else if ((data & 0xf8) == 0xf0) {
3043 return 4;
3044 }
3045
3046 return 0;
3047}
3048
3051 const lxb_char_t **data, const lxb_char_t *end)
3052{
3053 uint32_t pointer;
3054 lxb_char_t first, second, third, offset;
3055
3056 /* Make compiler happy */
3057 second = 0x00;
3058
3059 if (ctx->u.gb18030.first != 0) {
3060 if (ctx->u.gb18030.third != 0x00) {
3061 first = ctx->u.gb18030.first;
3062 second = ctx->u.gb18030.second;
3063 third = ctx->u.gb18030.third;
3064
3065 memset(&ctx->u.gb18030, 0, sizeof(lxb_encoding_ctx_gb18030_t));
3066
3067 if (ctx->prepend) {
3068 /* The first is always < 0x80 */
3069 ctx->u.gb18030.first = third;
3070
3071 return second;
3072 }
3073
3074 goto third_state;
3075 }
3076 else if (ctx->u.gb18030.second != 0x00) {
3077 first = ctx->u.gb18030.first;
3078 second = ctx->u.gb18030.second;
3079
3080 memset(&ctx->u.gb18030, 0, sizeof(lxb_encoding_ctx_gb18030_t));
3081
3082 goto second_state;
3083 }
3084
3085 first = ctx->u.gb18030.first;
3086 ctx->u.gb18030.first = 0x00;
3087
3088 if (ctx->prepend) {
3089 ctx->prepend = false;
3090 goto prepend_first;
3091 }
3092
3093 goto first_state;
3094 }
3095
3096 first = *(*data)++;
3097
3098prepend_first:
3099
3100 if (first < 0x80) {
3101 return first;
3102 }
3103
3104 if (first == 0x80) {
3105 return 0x20AC;
3106 }
3107
3108 /* Range 0x81 to 0xFE, inclusive */
3109 if ((unsigned) (first - 0x81) > (0xFE - 0x81)) {
3111 }
3112
3113 if (*data == end) {
3114 ctx->u.gb18030.first = first;
3116 }
3117
3118 /* First */
3119first_state:
3120
3121 second = *(*data)++;
3122
3123 /* Range 0x30 to 0x39, inclusive */
3124 if ((unsigned) (second - 0x30) > (0x39 - 0x30)) {
3125 offset = (second < 0x7F) ? 0x40 : 0x41;
3126
3127 /* Range 0x40 to 0x7E, inclusive, or 0x80 to 0xFE, inclusive */
3128 if ((unsigned) (second - 0x40) <= (0x7E - 0x40)
3129 || (unsigned) (second - 0x80) <= (0xFE - 0x80))
3130 {
3131 pointer = (first - 0x81) * 190 + (second - offset);
3132 }
3133 else {
3134 goto failed;
3135 }
3136
3137 /* Max pointer value == (0xFE - 0x81) * 190 + (0xFE - 0x41) == 23939 */
3138 ctx->codepoint = lxb_encoding_multi_index_gb18030[pointer].codepoint;
3140 goto failed;
3141 }
3142
3143 return ctx->codepoint;
3144 }
3145
3146 if (*data == end) {
3147 ctx->u.gb18030.first = first;
3148 ctx->u.gb18030.second = second;
3149
3151 }
3152
3153 /* Second */
3154second_state:
3155
3156 third = *(*data)++;
3157
3158 /* Range 0x81 to 0xFE, inclusive */
3159 if ((unsigned) (third - 0x81) > (0xFE - 0x81)) {
3160 (*data)--;
3161
3162 ctx->prepend = true;
3163 ctx->u.gb18030.first = second;
3164
3166 }
3167
3168 if (*data == end) {
3169 ctx->u.gb18030.first = first;
3170 ctx->u.gb18030.second = second;
3171 ctx->u.gb18030.third = third;
3172
3174 }
3175
3176 /* Third */
3177third_state:
3178
3179 /* Range 0x30 to 0x39, inclusive */
3180 if ((unsigned) (**data - 0x30) > (0x39 - 0x30)) {
3181 ctx->prepend = true;
3182
3183 /* First is a fake for trigger */
3184 ctx->u.gb18030.first = 0x01;
3185 ctx->u.gb18030.second = second;
3186 ctx->u.gb18030.third = third;
3187
3189 }
3190
3191 pointer = ((first - 0x81) * (10 * 126 * 10))
3192 + ((second - 0x30) * (10 * 126))
3193 + ((third - 0x81) * 10) + (*(*data)++) - 0x30;
3194
3195 return lxb_encoding_decode_gb18030_range(pointer);
3196
3197failed:
3198
3199 if (second < 0x80) {
3200 (*data)--;
3201 }
3202
3204}
3205
3208 const lxb_char_t **data, const lxb_char_t *end)
3209{
3210 if (**data < 0x80) {
3211 return *(*data)++;
3212 }
3213
3214 return lxb_encoding_single_index_macintosh[*(*data)++ - 0x80].codepoint;
3215}
3216
3223
3226 const lxb_char_t **data, const lxb_char_t *end)
3227{
3228 if (**data < 0x80) {
3229 return *(*data)++;
3230 }
3231
3232 return lxb_encoding_single_index_windows_1250[*(*data)++ - 0x80].codepoint;
3233}
3234
3237 const lxb_char_t **data, const lxb_char_t *end)
3238{
3239 if (**data < 0x80) {
3240 return *(*data)++;
3241 }
3242
3243 return lxb_encoding_single_index_windows_1251[*(*data)++ - 0x80].codepoint;
3244}
3245
3248 const lxb_char_t **data, const lxb_char_t *end)
3249{
3250 if (**data < 0x80) {
3251 return *(*data)++;
3252 }
3253
3254 return lxb_encoding_single_index_windows_1252[*(*data)++ - 0x80].codepoint;
3255}
3256
3259 const lxb_char_t **data, const lxb_char_t *end)
3260{
3261 if (**data < 0x80) {
3262 return *(*data)++;
3263 }
3264
3265 return lxb_encoding_single_index_windows_1253[*(*data)++ - 0x80].codepoint;
3266}
3267
3270 const lxb_char_t **data, const lxb_char_t *end)
3271{
3272 if (**data < 0x80) {
3273 return *(*data)++;
3274 }
3275
3276 return lxb_encoding_single_index_windows_1254[*(*data)++ - 0x80].codepoint;
3277}
3278
3281 const lxb_char_t **data, const lxb_char_t *end)
3282{
3283 if (**data < 0x80) {
3284 return *(*data)++;
3285 }
3286
3287 return lxb_encoding_single_index_windows_1255[*(*data)++ - 0x80].codepoint;
3288}
3289
3292 const lxb_char_t **data, const lxb_char_t *end)
3293{
3294 if (**data < 0x80) {
3295 return *(*data)++;
3296 }
3297
3298 return lxb_encoding_single_index_windows_1256[*(*data)++ - 0x80].codepoint;
3299}
3300
3303 const lxb_char_t **data, const lxb_char_t *end)
3304{
3305 if (**data < 0x80) {
3306 return *(*data)++;
3307 }
3308
3309 return lxb_encoding_single_index_windows_1257[*(*data)++ - 0x80].codepoint;
3310}
3311
3314 const lxb_char_t **data, const lxb_char_t *end)
3315{
3316 if (**data < 0x80) {
3317 return *(*data)++;
3318 }
3319
3320 return lxb_encoding_single_index_windows_1258[*(*data)++ - 0x80].codepoint;
3321}
3322
3325 const lxb_char_t **data, const lxb_char_t *end)
3326{
3327 if (**data < 0x80) {
3328 return *(*data)++;
3329 }
3330
3331 return lxb_encoding_single_index_windows_874[*(*data)++ - 0x80].codepoint;
3332}
3333
3336 const lxb_char_t **data, const lxb_char_t *end)
3337{
3338 if (**data < 0x80) {
3339 return *(*data)++;
3340 }
3341
3342 return lxb_encoding_single_index_x_mac_cyrillic[*(*data)++ - 0x80].codepoint;
3343}
3344
3347 const lxb_char_t **data, const lxb_char_t *end)
3348{
3349 if (**data < 0x80) {
3350 return *(*data)++;
3351 }
3352
3353 return 0xF780 + (*(*data)++) - 0x80;
3354}
LXB_API const lxb_encoding_multi_index_t lxb_encoding_multi_index_big5[19782]
Definition big5.c:18
@ LXB_STATUS_SMALL_BUFFER
Definition base.h:64
@ LXB_STATUS_CONTINUE
Definition base.h:63
@ LXB_STATUS_OK
Definition base.h:49
@ LXB_STATUS_ERROR
Definition base.h:50
lxb_codepoint_t lxb_encoding_decode_auto_single(lxb_encoding_decode_t *ctx, const lxb_char_t **data, const lxb_char_t *end)
Definition decode.c:1940
lxb_status_t lxb_encoding_decode_windows_1256(lxb_encoding_decode_t *ctx, const lxb_char_t **data, const lxb_char_t *end)
Definition decode.c:1869
lxb_codepoint_t lxb_encoding_decode_utf_16be_single(lxb_encoding_decode_t *ctx, const lxb_char_t **data, const lxb_char_t *end)
Definition decode.c:2766
lxb_codepoint_t lxb_encoding_decode_utf_16le_single(lxb_encoding_decode_t *ctx, const lxb_char_t **data, const lxb_char_t *end)
Definition decode.c:2773
lxb_status_t lxb_encoding_decode_iso_2022_jp(lxb_encoding_decode_t *ctx, const lxb_char_t **data, const lxb_char_t *end)
Definition decode.c:621
lxb_status_t lxb_encoding_decode_iso_8859_5(lxb_encoding_decode_t *ctx, const lxb_char_t **data, const lxb_char_t *end)
Definition decode.c:1021
lxb_status_t lxb_encoding_decode_windows_1252(lxb_encoding_decode_t *ctx, const lxb_char_t **data, const lxb_char_t *end)
Definition decode.c:1833
lxb_status_t lxb_encoding_decode_undefined(lxb_encoding_decode_t *ctx, const lxb_char_t **data, const lxb_char_t *end)
Definition decode.c:198
lxb_status_t lxb_encoding_decode_macintosh(lxb_encoding_decode_t *ctx, const lxb_char_t **data, const lxb_char_t *end)
Definition decode.c:1798
lxb_status_t lxb_encoding_decode_windows_1250(lxb_encoding_decode_t *ctx, const lxb_char_t **data, const lxb_char_t *end)
Definition decode.c:1815
lxb_codepoint_t lxb_encoding_decode_koi8_r_single(lxb_encoding_decode_t *ctx, const lxb_char_t **data, const lxb_char_t *end)
Definition decode.c:2589
lxb_status_t lxb_encoding_decode_ibm866(lxb_encoding_decode_t *ctx, const lxb_char_t **data, const lxb_char_t *end)
Definition decode.c:612
#define LXB_ENCODING_DECODE_ERROR_END()
Definition decode.c:93
lxb_codepoint_t lxb_encoding_decode_koi8_u_single(lxb_encoding_decode_t *ctx, const lxb_char_t **data, const lxb_char_t *end)
Definition decode.c:2600
#define LXB_ENCODING_DECODE_ISO_2022_JP_CONTINUE()
lxb_codepoint_t lxb_encoding_decode_windows_1257_single(lxb_encoding_decode_t *ctx, const lxb_char_t **data, const lxb_char_t *end)
Definition decode.c:3302
lxb_status_t lxb_encoding_decode_iso_8859_10(lxb_encoding_decode_t *ctx, const lxb_char_t **data, const lxb_char_t *end)
Definition decode.c:949
lxb_status_t lxb_encoding_decode_iso_8859_3(lxb_encoding_decode_t *ctx, const lxb_char_t **data, const lxb_char_t *end)
Definition decode.c:1003
lxb_codepoint_t lxb_encoding_decode_default_single(lxb_encoding_decode_t *ctx, const lxb_char_t **data, const lxb_char_t *end)
Definition decode.c:1933
lxb_status_t lxb_encoding_decode_utf_16be(lxb_encoding_decode_t *ctx, const lxb_char_t **data, const lxb_char_t *end)
Definition decode.c:1314
lxb_codepoint_t lxb_encoding_decode_windows_874_single(lxb_encoding_decode_t *ctx, const lxb_char_t **data, const lxb_char_t *end)
Definition decode.c:3324
uint8_t lxb_encoding_decode_utf_8_length(lxb_char_t data)
Definition decode.c:3030
lxb_status_t lxb_encoding_decode_iso_8859_16(lxb_encoding_decode_t *ctx, const lxb_char_t **data, const lxb_char_t *end)
Definition decode.c:985
lxb_status_t lxb_encoding_decode_iso_8859_6(lxb_encoding_decode_t *ctx, const lxb_char_t **data, const lxb_char_t *end)
Definition decode.c:1030
lxb_status_t lxb_encoding_decode_x_user_defined(lxb_encoding_decode_t *ctx, const lxb_char_t **data, const lxb_char_t *end)
Definition decode.c:1914
lxb_status_t lxb_encoding_decode_big5(lxb_encoding_decode_t *ctx, const lxb_char_t **data, const lxb_char_t *end)
Definition decode.c:206
lxb_codepoint_t lxb_encoding_decode_windows_1255_single(lxb_encoding_decode_t *ctx, const lxb_char_t **data, const lxb_char_t *end)
Definition decode.c:3280
lxb_codepoint_t lxb_encoding_decode_windows_1250_single(lxb_encoding_decode_t *ctx, const lxb_char_t **data, const lxb_char_t *end)
Definition decode.c:3225
#define LXB_ENCODING_DECODE_UTF_8_BOUNDARY_SET(first, two, f_lower, s_upper)
Definition decode.c:36
#define LXB_ENCODING_DECODE_UTF_8_BOUNDARY_SET_SINGLE(first, two, f_lower, s_upper)
Definition decode.c:167
lxb_status_t lxb_encoding_decode_gb18030(lxb_encoding_decode_t *ctx, const lxb_char_t **data, const lxb_char_t *end)
Definition decode.c:1546
lxb_codepoint_t lxb_encoding_decode_iso_8859_5_single(lxb_encoding_decode_t *ctx, const lxb_char_t **data, const lxb_char_t *end)
Definition decode.c:2534
lxb_status_t lxb_encoding_decode_default(lxb_encoding_decode_t *ctx, const lxb_char_t **data, const lxb_char_t *end)
Definition decode.c:183
lxb_inline lxb_status_t lxb_encoding_decode_utf_16(lxb_encoding_decode_t *ctx, bool is_be, const lxb_char_t **data, const lxb_char_t *end)
Definition decode.c:1204
lxb_status_t lxb_encoding_decode_iso_8859_7(lxb_encoding_decode_t *ctx, const lxb_char_t **data, const lxb_char_t *end)
Definition decode.c:1039
lxb_status_t lxb_encoding_decode_iso_8859_13(lxb_encoding_decode_t *ctx, const lxb_char_t **data, const lxb_char_t *end)
Definition decode.c:958
lxb_status_t lxb_encoding_decode_utf_16le(lxb_encoding_decode_t *ctx, const lxb_char_t **data, const lxb_char_t *end)
Definition decode.c:1321
lxb_status_t lxb_encoding_decode_iso_8859_8(lxb_encoding_decode_t *ctx, const lxb_char_t **data, const lxb_char_t *end)
Definition decode.c:1048
#define LXB_ENCODING_DECODE_SINGLE(decode_map)
Definition decode.c:127
lxb_codepoint_t lxb_encoding_decode_iso_8859_7_single(lxb_encoding_decode_t *ctx, const lxb_char_t **data, const lxb_char_t *end)
Definition decode.c:2556
lxb_status_t lxb_encoding_decode_iso_8859_8_i(lxb_encoding_decode_t *ctx, const lxb_char_t **data, const lxb_char_t *end)
Definition decode.c:1057
lxb_status_t lxb_encoding_decode_euc_jp(lxb_encoding_decode_t *ctx, const lxb_char_t **data, const lxb_char_t *end)
Definition decode.c:372
lxb_codepoint_t lxb_encoding_decode_iso_8859_8_single(lxb_encoding_decode_t *ctx, const lxb_char_t **data, const lxb_char_t *end)
Definition decode.c:2567
lxb_status_t lxb_encoding_decode_koi8_r(lxb_encoding_decode_t *ctx, const lxb_char_t **data, const lxb_char_t *end)
Definition decode.c:1066
lxb_status_t lxb_encoding_decode_windows_1257(lxb_encoding_decode_t *ctx, const lxb_char_t **data, const lxb_char_t *end)
Definition decode.c:1878
lxb_codepoint_t lxb_encoding_decode_iso_8859_3_single(lxb_encoding_decode_t *ctx, const lxb_char_t **data, const lxb_char_t *end)
Definition decode.c:2512
lxb_codepoint_t lxb_encoding_decode_x_user_defined_single(lxb_encoding_decode_t *ctx, const lxb_char_t **data, const lxb_char_t *end)
Definition decode.c:3346
lxb_status_t lxb_encoding_decode_x_mac_cyrillic(lxb_encoding_decode_t *ctx, const lxb_char_t **data, const lxb_char_t *end)
Definition decode.c:1905
lxb_status_t lxb_encoding_decode_gbk(lxb_encoding_decode_t *ctx, const lxb_char_t **data, const lxb_char_t *end)
Definition decode.c:605
#define LXB_ENCODING_DECODE_UTF_8_BOUNDARY(_lower, _upper, _cont)
Definition decode.c:13
lxb_codepoint_t lxb_encoding_decode_iso_8859_13_single(lxb_encoding_decode_t *ctx, const lxb_char_t **data, const lxb_char_t *end)
Definition decode.c:2457
#define LXB_ENCODING_DECODE_UTF_8_BOUNDARY_SINGLE(lower, upper)
Definition decode.c:153
#define LXB_ENCODING_DECODE_FAILED(ident)
Definition decode.c:113
lxb_codepoint_t lxb_encoding_decode_euc_jp_single(lxb_encoding_decode_t *ctx, const lxb_char_t **data, const lxb_char_t *end)
Definition decode.c:2062
lxb_status_t lxb_encoding_decode_shift_jis(lxb_encoding_decode_t *ctx, const lxb_char_t **data, const lxb_char_t *end)
Definition decode.c:1084
lxb_status_t lxb_encoding_decode_iso_8859_4(lxb_encoding_decode_t *ctx, const lxb_char_t **data, const lxb_char_t *end)
Definition decode.c:1012
#define LXB_ENCODING_DECODE_ERROR(ctx)
Definition decode.c:106
lxb_codepoint_t lxb_encoding_decode_windows_1256_single(lxb_encoding_decode_t *ctx, const lxb_char_t **data, const lxb_char_t *end)
Definition decode.c:3291
lxb_codepoint_t lxb_encoding_decode_macintosh_single(lxb_encoding_decode_t *ctx, const lxb_char_t **data, const lxb_char_t *end)
Definition decode.c:3207
lxb_status_t lxb_encoding_decode_koi8_u(lxb_encoding_decode_t *ctx, const lxb_char_t **data, const lxb_char_t *end)
Definition decode.c:1075
lxb_codepoint_t lxb_encoding_decode_iso_8859_2_single(lxb_encoding_decode_t *ctx, const lxb_char_t **data, const lxb_char_t *end)
Definition decode.c:2501
lxb_status_t lxb_encoding_decode_iso_8859_15(lxb_encoding_decode_t *ctx, const lxb_char_t **data, const lxb_char_t *end)
Definition decode.c:976
lxb_status_t lxb_encoding_decode_utf_8(lxb_encoding_decode_t *ctx, const lxb_char_t **data, const lxb_char_t *end)
Definition decode.c:1328
lxb_codepoint_t lxb_encoding_decode_undefined_single(lxb_encoding_decode_t *ctx, const lxb_char_t **data, const lxb_char_t *end)
Definition decode.c:1947
lxb_codepoint_t lxb_encoding_decode_euc_kr_single(lxb_encoding_decode_t *ctx, const lxb_char_t **data, const lxb_char_t *end)
Definition decode.c:2170
lxb_codepoint_t lxb_encoding_decode_shift_jis_single(lxb_encoding_decode_t *ctx, const lxb_char_t **data, const lxb_char_t *end)
Definition decode.c:2611
lxb_status_t lxb_encoding_decode_windows_1258(lxb_encoding_decode_t *ctx, const lxb_char_t **data, const lxb_char_t *end)
Definition decode.c:1887
lxb_codepoint_t lxb_encoding_decode_iso_8859_6_single(lxb_encoding_decode_t *ctx, const lxb_char_t **data, const lxb_char_t *end)
Definition decode.c:2545
lxb_codepoint_t lxb_encoding_decode_valid_utf_8_single(const lxb_char_t **data, const lxb_char_t *end)
Definition decode.c:2889
lxb_codepoint_t lxb_encoding_decode_windows_1252_single(lxb_encoding_decode_t *ctx, const lxb_char_t **data, const lxb_char_t *end)
Definition decode.c:3247
#define LXB_ENCODING_DECODE_CHECK_OUT(ctx)
Definition decode.c:76
lxb_status_t lxb_encoding_decode_auto(lxb_encoding_decode_t *ctx, const lxb_char_t **data, const lxb_char_t *end)
Definition decode.c:190
lxb_codepoint_t lxb_encoding_decode_iso_8859_15_single(lxb_encoding_decode_t *ctx, const lxb_char_t **data, const lxb_char_t *end)
Definition decode.c:2479
lxb_codepoint_t lxb_encoding_decode_x_mac_cyrillic_single(lxb_encoding_decode_t *ctx, const lxb_char_t **data, const lxb_char_t *end)
Definition decode.c:3335
lxb_codepoint_t lxb_encoding_decode_iso_8859_10_single(lxb_encoding_decode_t *ctx, const lxb_char_t **data, const lxb_char_t *end)
Definition decode.c:2446
#define LXB_ENCODING_DECODE_APPEND(ctx, cp)
Definition decode.c:55
lxb_status_t lxb_encoding_decode_iso_8859_2(lxb_encoding_decode_t *ctx, const lxb_char_t **data, const lxb_char_t *end)
Definition decode.c:994
lxb_inline lxb_codepoint_t lxb_encoding_decode_utf_16_single(lxb_encoding_decode_t *ctx, bool is_be, const lxb_char_t **data, const lxb_char_t *end)
Definition decode.c:2698
lxb_codepoint_t lxb_encoding_decode_iso_8859_16_single(lxb_encoding_decode_t *ctx, const lxb_char_t **data, const lxb_char_t *end)
Definition decode.c:2490
#define LXB_ENCODING_DECODE_ERROR_BEGIN
Definition decode.c:84
lxb_codepoint_t lxb_encoding_decode_windows_1254_single(lxb_encoding_decode_t *ctx, const lxb_char_t **data, const lxb_char_t *end)
Definition decode.c:3269
lxb_status_t lxb_encoding_decode_replacement(lxb_encoding_decode_t *ctx, const lxb_char_t **data, const lxb_char_t *end)
Definition decode.c:1807
lxb_status_t lxb_encoding_decode_euc_kr(lxb_encoding_decode_t *ctx, const lxb_char_t **data, const lxb_char_t *end)
Definition decode.c:514
lxb_status_t lxb_encoding_decode_windows_1254(lxb_encoding_decode_t *ctx, const lxb_char_t **data, const lxb_char_t *end)
Definition decode.c:1851
#define LXB_ENCODING_DECODE_ISO_2022_JP_OK()
lxb_codepoint_t lxb_encoding_decode_windows_1258_single(lxb_encoding_decode_t *ctx, const lxb_char_t **data, const lxb_char_t *end)
Definition decode.c:3313
lxb_codepoint_t lxb_encoding_decode_windows_1253_single(lxb_encoding_decode_t *ctx, const lxb_char_t **data, const lxb_char_t *end)
Definition decode.c:3258
lxb_status_t lxb_encoding_decode_iso_8859_14(lxb_encoding_decode_t *ctx, const lxb_char_t **data, const lxb_char_t *end)
Definition decode.c:967
lxb_inline lxb_codepoint_t lxb_encoding_decode_gb18030_range(uint32_t index)
Definition decode.c:1493
lxb_codepoint_t lxb_encoding_decode_gbk_single(lxb_encoding_decode_t *ctx, const lxb_char_t **data, const lxb_char_t *end)
Definition decode.c:2231
lxb_codepoint_t lxb_encoding_decode_iso_8859_14_single(lxb_encoding_decode_t *ctx, const lxb_char_t **data, const lxb_char_t *end)
Definition decode.c:2468
#define LXB_ENCODING_DECODE_APPEND_WO_CHECK(ctx, cp)
Definition decode.c:49
lxb_codepoint_t lxb_encoding_decode_windows_1251_single(lxb_encoding_decode_t *ctx, const lxb_char_t **data, const lxb_char_t *end)
Definition decode.c:3236
lxb_codepoint_t lxb_encoding_decode_gb18030_single(lxb_encoding_decode_t *ctx, const lxb_char_t **data, const lxb_char_t *end)
Definition decode.c:3050
lxb_codepoint_t lxb_encoding_decode_iso_2022_jp_single(lxb_encoding_decode_t *ctx, const lxb_char_t **data, const lxb_char_t *end)
Definition decode.c:2249
lxb_codepoint_t lxb_encoding_decode_iso_8859_4_single(lxb_encoding_decode_t *ctx, const lxb_char_t **data, const lxb_char_t *end)
Definition decode.c:2523
lxb_status_t lxb_encoding_decode_windows_874(lxb_encoding_decode_t *ctx, const lxb_char_t **data, const lxb_char_t *end)
Definition decode.c:1896
lxb_status_t lxb_encoding_decode_windows_1255(lxb_encoding_decode_t *ctx, const lxb_char_t **data, const lxb_char_t *end)
Definition decode.c:1860
lxb_codepoint_t lxb_encoding_decode_big5_single(lxb_encoding_decode_t *ctx, const lxb_char_t **data, const lxb_char_t *end)
Definition decode.c:1954
lxb_codepoint_t lxb_encoding_decode_iso_8859_8_i_single(lxb_encoding_decode_t *ctx, const lxb_char_t **data, const lxb_char_t *end)
Definition decode.c:2578
lxb_status_t lxb_encoding_decode_windows_1251(lxb_encoding_decode_t *ctx, const lxb_char_t **data, const lxb_char_t *end)
Definition decode.c:1824
lxb_codepoint_t lxb_encoding_decode_ibm866_single(lxb_encoding_decode_t *ctx, const lxb_char_t **data, const lxb_char_t *end)
Definition decode.c:2238
lxb_codepoint_t lxb_encoding_decode_utf_8_single(lxb_encoding_decode_t *ctx, const lxb_char_t **data, const lxb_char_t *end)
Definition decode.c:2780
lxb_codepoint_t lxb_encoding_decode_replacement_single(lxb_encoding_decode_t *ctx, const lxb_char_t **data, const lxb_char_t *end)
Definition decode.c:3218
lxb_status_t lxb_encoding_decode_windows_1253(lxb_encoding_decode_t *ctx, const lxb_char_t **data, const lxb_char_t *end)
Definition decode.c:1842
lxb_codepoint_t lxb_encoding_decode_valid_utf_8_single_reverse(const lxb_char_t **end, const lxb_char_t *begin)
Definition decode.c:2959
int begin
Definition eaw_table.h:20
@ LXB_ENCODING_DECODE_CONTINUE
Definition base.h:54
@ LXB_ENCODING_ERROR_CODEPOINT
Definition base.h:42
lxb_encoding_single_index_t lxb_encoding_multi_index_t
Definition base.h:205
@ LXB_ENCODING_DECODE_2022_JP_TRAIL
Definition base.h:62
@ LXB_ENCODING_DECODE_2022_JP_ASCII
Definition base.h:58
@ LXB_ENCODING_DECODE_2022_JP_ESCAPE
Definition base.h:64
@ LXB_ENCODING_DECODE_2022_JP_UNSET
Definition base.h:65
@ LXB_ENCODING_DECODE_2022_JP_ROMAN
Definition base.h:59
@ LXB_ENCODING_DECODE_2022_JP_LEAD
Definition base.h:61
@ LXB_ENCODING_DECODE_2022_JP_KATAKANA
Definition base.h:60
@ LXB_ENCODING_DECODE_2022_JP_ESCAPE_START
Definition base.h:63
LXB_API const lxb_encoding_multi_index_t lxb_encoding_multi_index_euc_kr[23750]
Definition euc_kr.c:18
zend_long ch
Definition ffi.c:4580
memset(ptr, 0, type->size)
zend_long offset
LXB_API const lxb_encoding_multi_index_t lxb_encoding_multi_index_gb18030[23940]
Definition gb18030.c:18
LXB_API const lxb_encoding_multi_index_t lxb_encoding_multi_index_jis0208[11104]
Definition jis0208.c:18
LXB_API const lxb_encoding_multi_index_t lxb_encoding_multi_index_jis0212[7211]
Definition jis0212.c:18
lu_byte right
Definition minilua.c:4267
lu_byte left
Definition minilua.c:4266
#define next(ls)
Definition minilua.c:2661
unsigned const char * end
Definition php_ffi.h:51
zend_constant * data
LXB_API const lxb_encoding_range_index_t lxb_encoding_range_index_gb18030[207]
Definition range.c:16
#define LXB_ENCODING_RANGE_INDEX_GB18030_SIZE
Definition range.h:24
p
Definition session.c:1105
LXB_API const lxb_encoding_single_index_t lxb_encoding_single_index_iso_8859_14[128]
Definition single.c:414
LXB_API const lxb_encoding_single_index_t lxb_encoding_single_index_windows_1255[128]
Definition single.c:2790
LXB_API const lxb_encoding_single_index_t lxb_encoding_single_index_iso_8859_10[128]
Definition single.c:150
LXB_API const lxb_encoding_single_index_t lxb_encoding_single_index_iso_8859_3[128]
Definition single.c:942
LXB_API const lxb_encoding_single_index_t lxb_encoding_single_index_ibm866[128]
Definition single.c:18
LXB_API const lxb_encoding_single_index_t lxb_encoding_single_index_macintosh[128]
Definition single.c:1998
LXB_API const lxb_encoding_single_index_t lxb_encoding_single_index_iso_8859_6[128]
Definition single.c:1338
LXB_API const lxb_encoding_single_index_t lxb_encoding_single_index_windows_874[128]
Definition single.c:3318
LXB_API const lxb_encoding_single_index_t lxb_encoding_single_index_iso_8859_5[128]
Definition single.c:1206
LXB_API const lxb_encoding_single_index_t lxb_encoding_single_index_windows_1252[128]
Definition single.c:2394
LXB_API const lxb_encoding_single_index_t lxb_encoding_single_index_iso_8859_2[128]
Definition single.c:810
LXB_API const lxb_encoding_single_index_t lxb_encoding_single_index_windows_1253[128]
Definition single.c:2526
LXB_API const lxb_encoding_single_index_t lxb_encoding_single_index_windows_1250[128]
Definition single.c:2130
LXB_API const lxb_encoding_single_index_t lxb_encoding_single_index_koi8_r[128]
Definition single.c:1734
LXB_API const lxb_encoding_single_index_t lxb_encoding_single_index_iso_8859_4[128]
Definition single.c:1074
LXB_API const lxb_encoding_single_index_t lxb_encoding_single_index_windows_1257[128]
Definition single.c:3054
LXB_API const lxb_encoding_single_index_t lxb_encoding_single_index_iso_8859_15[128]
Definition single.c:546
LXB_API const lxb_encoding_single_index_t lxb_encoding_single_index_windows_1258[128]
Definition single.c:3186
LXB_API const lxb_encoding_single_index_t lxb_encoding_single_index_windows_1254[128]
Definition single.c:2658
LXB_API const lxb_encoding_single_index_t lxb_encoding_single_index_iso_8859_16[128]
Definition single.c:678
LXB_API const lxb_encoding_single_index_t lxb_encoding_single_index_iso_8859_8[128]
Definition single.c:1602
LXB_API const lxb_encoding_single_index_t lxb_encoding_single_index_iso_8859_13[128]
Definition single.c:282
LXB_API const lxb_encoding_single_index_t lxb_encoding_single_index_koi8_u[128]
Definition single.c:1866
LXB_API const lxb_encoding_single_index_t lxb_encoding_single_index_windows_1251[128]
Definition single.c:2262
LXB_API const lxb_encoding_single_index_t lxb_encoding_single_index_iso_8859_7[128]
Definition single.c:1470
LXB_API const lxb_encoding_single_index_t lxb_encoding_single_index_windows_1256[128]
Definition single.c:2922
LXB_API const lxb_encoding_single_index_t lxb_encoding_single_index_x_mac_cyrillic[128]
Definition single.c:3450
lxb_char_t upper
Definition base.h:77
lxb_char_t lower
Definition base.h:76
size_t buffer_length
Definition base.h:110
lxb_status_t status
Definition base.h:126
lxb_codepoint_t codepoint
Definition base.h:121
lxb_encoding_ctx_utf_8_t utf_8
Definition base.h:129
union lxb_encoding_decode_t::@302274252113053227061303304053361346350151303155 u
lxb_encoding_ctx_euc_jp_t euc_jp
Definition base.h:132
lxb_encoding_ctx_2022_jp_t iso_2022_jp
Definition base.h:133
lxb_codepoint_t second_codepoint
Definition base.h:122
lxb_encoding_ctx_gb18030_t gb18030
Definition base.h:130
unsigned int lxb_status_t
Definition types.h:28
#define lxb_inline
Definition types.h:21
unsigned char lxb_char_t
Definition types.h:27
uint32_t lxb_codepoint_t
Definition types.h:26