php-internal-docs 8.4.8
Unofficial docs for php/php-src
Loading...
Searching...
No Matches
mbfilter_utf16.c
Go to the documentation of this file.
1/*
2 * "streamable kanji code filter and converter"
3 * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
4 *
5 * LICENSE NOTICES
6 *
7 * This file is part of "streamable kanji code filter and converter",
8 * which is distributed under the terms of GNU Lesser General Public
9 * License (version 2) as published by the Free Software Foundation.
10 *
11 * This software is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with "streamable kanji code filter and converter";
18 * if not, write to the Free Software Foundation, Inc., 59 Temple Place,
19 * Suite 330, Boston, MA 02111-1307 USA
20 *
21 * The author of this file:
22 *
23 */
24/*
25 * The source code included in this file was separated from mbfilter.c
26 * by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
27 *
28 */
29
30#include "zend_bitset.h"
31#include "mbfilter.h"
32#include "mbfilter_utf16.h"
33
34#ifdef ZEND_INTRIN_AVX2_NATIVE
35
36/* We are building AVX2-only binary */
37# include <immintrin.h>
38# define mb_utf16be_to_wchar mb_utf16be_to_wchar_avx2
39# define mb_utf16le_to_wchar mb_utf16le_to_wchar_avx2
40# define mb_wchar_to_utf16be mb_wchar_to_utf16be_avx2
41# define mb_wchar_to_utf16le mb_wchar_to_utf16le_avx2
42
43static size_t mb_utf16be_to_wchar_avx2(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state);
44static void mb_wchar_to_utf16be_avx2(uint32_t *in, size_t len, mb_convert_buf *buf, bool end);
45static size_t mb_utf16le_to_wchar_avx2(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state);
46static void mb_wchar_to_utf16le_avx2(uint32_t *in, size_t len, mb_convert_buf *buf, bool end);
47
48#elif defined(ZEND_INTRIN_AVX2_RESOLVER)
49
50/* We are building binary which works with or without AVX2; whether or not to use
51 * AVX2-accelerated functions will be determined at runtime */
52# include <immintrin.h>
53# include "Zend/zend_cpuinfo.h"
54
55static size_t mb_utf16be_to_wchar_default(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state);
56static void mb_wchar_to_utf16be_default(uint32_t *in, size_t len, mb_convert_buf *buf, bool end);
57static size_t mb_utf16le_to_wchar_default(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state);
58static void mb_wchar_to_utf16le_default(uint32_t *in, size_t len, mb_convert_buf *buf, bool end);
59
60# ifdef ZEND_INTRIN_AVX2_FUNC_PROTO
61/* Dynamic linker will decide whether or not to use AVX2-based functions and
62 * resolve symbols accordingly */
63
64ZEND_INTRIN_AVX2_FUNC_DECL(size_t mb_utf16be_to_wchar_avx2(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state));
65ZEND_INTRIN_AVX2_FUNC_DECL(void mb_wchar_to_utf16be_avx2(uint32_t *in, size_t len, mb_convert_buf *buf, bool end));
66ZEND_INTRIN_AVX2_FUNC_DECL(size_t mb_utf16le_to_wchar_avx2(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state));
67ZEND_INTRIN_AVX2_FUNC_DECL(void mb_wchar_to_utf16le_avx2(uint32_t *in, size_t len, mb_convert_buf *buf, bool end));
68
69size_t mb_utf16be_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state) __attribute__((ifunc("resolve_utf16be_wchar")));
70void mb_wchar_to_utf16be(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) __attribute__((ifunc("resolve_wchar_utf16be")));
71size_t mb_utf16le_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state) __attribute__((ifunc("resolve_utf16le_wchar")));
72void mb_wchar_to_utf16le(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) __attribute__((ifunc("resolve_wchar_utf16le")));
73
76static mb_to_wchar_fn resolve_utf16be_wchar(void)
77{
78 return zend_cpu_supports_avx2() ? mb_utf16be_to_wchar_avx2 : mb_utf16be_to_wchar_default;
79}
80
83static mb_from_wchar_fn resolve_wchar_utf16be(void)
84{
85 return zend_cpu_supports_avx2() ? mb_wchar_to_utf16be_avx2 : mb_wchar_to_utf16be_default;
86}
87
90static mb_to_wchar_fn resolve_utf16le_wchar(void)
91{
92 return zend_cpu_supports_avx2() ? mb_utf16le_to_wchar_avx2 : mb_utf16le_to_wchar_default;
93}
94
97static mb_from_wchar_fn resolve_wchar_utf16le(void)
98{
99 return zend_cpu_supports_avx2() ? mb_wchar_to_utf16le_avx2 : mb_wchar_to_utf16le_default;
100}
101
102# else /* ZEND_INTRIN_AVX2_FUNC_PTR */
103/* We are compiling for a target where the dynamic linker will not be able to
104 * resolve symbols according to whether the host supports AVX2 or not; so instead,
105 * we can make calls go through a function pointer and set the function pointer
106 * on module load */
107
108#ifdef HAVE_FUNC_ATTRIBUTE_TARGET
109static size_t mb_utf16be_to_wchar_avx2(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state) __attribute__((target("avx2")));
110static void mb_wchar_to_utf16be_avx2(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) __attribute__((target("avx2")));
111static size_t mb_utf16le_to_wchar_avx2(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state) __attribute__((target("avx2")));
112static void mb_wchar_to_utf16le_avx2(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) __attribute__((target("avx2")));
113#else
114static size_t mb_utf16be_to_wchar_avx2(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state);
115static void mb_wchar_to_utf16be_avx2(uint32_t *in, size_t len, mb_convert_buf *buf, bool end);
116static size_t mb_utf16le_to_wchar_avx2(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state);
117static void mb_wchar_to_utf16le_avx2(uint32_t *in, size_t len, mb_convert_buf *buf, bool end);
118#endif
119
120static mb_to_wchar_fn utf16be_to_wchar_ptr = NULL;
121static mb_from_wchar_fn wchar_to_utf16be_ptr = NULL;
122static mb_to_wchar_fn utf16le_to_wchar_ptr = NULL;
123static mb_from_wchar_fn wchar_to_utf16le_ptr = NULL;
124
125static size_t mb_utf16be_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state)
126{
127 return utf16be_to_wchar_ptr(in, in_len, buf, bufsize, NULL);
128}
129
130static void mb_wchar_to_utf16be(uint32_t *in, size_t len, mb_convert_buf *buf, bool end)
131{
132 wchar_to_utf16be_ptr(in, len, buf, end);
133}
134
135static size_t mb_utf16le_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state)
136{
137 return utf16le_to_wchar_ptr(in, in_len, buf, bufsize, NULL);
138}
139
140static void mb_wchar_to_utf16le(uint32_t *in, size_t len, mb_convert_buf *buf, bool end)
141{
142 wchar_to_utf16le_ptr(in, len, buf, end);
143}
144
145void init_convert_utf16(void)
146{
147 if (zend_cpu_supports_avx2()) {
148 utf16be_to_wchar_ptr = mb_utf16be_to_wchar_avx2;
149 wchar_to_utf16be_ptr = mb_wchar_to_utf16be_avx2;
150 utf16le_to_wchar_ptr = mb_utf16le_to_wchar_avx2;
151 wchar_to_utf16le_ptr = mb_wchar_to_utf16le_avx2;
152 } else {
153 utf16be_to_wchar_ptr = mb_utf16be_to_wchar_default;
154 wchar_to_utf16be_ptr = mb_wchar_to_utf16be_default;
155 utf16le_to_wchar_ptr = mb_utf16le_to_wchar_default;
156 wchar_to_utf16le_ptr = mb_wchar_to_utf16le_default;
157 }
158}
159# endif
160
161#else
162
163/* No AVX2 support */
164# define mb_utf16be_to_wchar mb_utf16be_to_wchar_default
165# define mb_utf16le_to_wchar mb_utf16le_to_wchar_default
166# define mb_wchar_to_utf16be mb_wchar_to_utf16be_default
167# define mb_wchar_to_utf16le mb_wchar_to_utf16le_default
168
169static size_t mb_utf16be_to_wchar_default(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state);
170static void mb_wchar_to_utf16be_default(uint32_t *in, size_t len, mb_convert_buf *buf, bool end);
171static size_t mb_utf16le_to_wchar_default(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state);
172static void mb_wchar_to_utf16le_default(uint32_t *in, size_t len, mb_convert_buf *buf, bool end);
173
174#endif
175
176static int mbfl_filt_conv_utf16_wchar_flush(mbfl_convert_filter *filter);
177static size_t mb_utf16_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state);
178static zend_string* mb_cut_utf16(unsigned char *str, size_t from, size_t len, unsigned char *end);
179static zend_string* mb_cut_utf16be(unsigned char *str, size_t from, size_t len, unsigned char *end);
180static zend_string* mb_cut_utf16le(unsigned char *str, size_t from, size_t len, unsigned char *end);
181
182static const char *mbfl_encoding_utf16_aliases[] = {"utf16", NULL};
183
186 "UTF-16",
187 "UTF-16",
188 mbfl_encoding_utf16_aliases,
189 NULL,
190 0,
193 mb_utf16_to_wchar,
195 NULL,
196 mb_cut_utf16
197};
198
201 "UTF-16BE",
202 "UTF-16BE",
203 NULL,
204 NULL,
205 0,
210 NULL,
211 mb_cut_utf16be
212};
213
216 "UTF-16LE",
217 "UTF-16LE",
218 NULL,
219 NULL,
220 0,
225 NULL,
226 mb_cut_utf16le
227};
228
233 NULL,
235 mbfl_filt_conv_utf16_wchar_flush,
236 NULL,
237};
238
248
253 NULL,
255 mbfl_filt_conv_utf16_wchar_flush,
256 NULL,
257};
258
268
273 NULL,
275 mbfl_filt_conv_utf16_wchar_flush,
276 NULL,
277};
278
288
289#define CK(statement) do { if ((statement) < 0) return (-1); } while (0)
290
292{
293 /* Start with the assumption that the string is big-endian;
294 * If we find a little-endian BOM, then we will change that assumption */
295 if (filter->status == 0) {
296 filter->cache = c & 0xFF;
297 filter->status = 1;
298 } else {
299 int n = (filter->cache << 8) | (c & 0xFF);
300 filter->cache = filter->status = 0;
301 if (n == 0xFFFE) {
302 /* Switch to little-endian mode */
304 } else {
306 if (n >= 0xD800 && n <= 0xDBFF) {
307 filter->cache = n & 0x3FF; /* Pick out 10 data bits */
308 filter->status = 2;
309 return 0;
310 } else if (n >= 0xDC00 && n <= 0xDFFF) {
311 /* This is wrong; second part of surrogate pair has come first */
312 CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
313 } else if (n != 0xFEFF) {
314 CK((*filter->output_function)(n, filter->data));
315 }
316 }
317 }
318
319 return 0;
320}
321
323{
324 int n;
325
326 switch (filter->status) {
327 case 0: /* First byte */
328 filter->cache = c & 0xFF;
329 filter->status = 1;
330 break;
331
332 case 1: /* Second byte */
333 n = (filter->cache << 8) | (c & 0xFF);
334 if (n >= 0xD800 && n <= 0xDBFF) {
335 filter->cache = n & 0x3FF; /* Pick out 10 data bits */
336 filter->status = 2;
337 } else if (n >= 0xDC00 && n <= 0xDFFF) {
338 /* This is wrong; second part of surrogate pair has come first */
339 filter->status = 0;
340 CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
341 } else {
342 filter->status = 0;
343 CK((*filter->output_function)(n, filter->data));
344 }
345 break;
346
347 case 2: /* Second part of surrogate, first byte */
348 filter->cache = (filter->cache << 8) | (c & 0xFF);
349 filter->status = 3;
350 break;
351
352 case 3: /* Second part of surrogate, second byte */
353 n = ((filter->cache & 0xFF) << 8) | (c & 0xFF);
354 if (n >= 0xD800 && n <= 0xDBFF) {
355 /* Wrong; that's the first half of a surrogate pair, not the second */
356 filter->cache = n & 0x3FF;
357 filter->status = 2;
358 CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
359 } else if (n >= 0xDC00 && n <= 0xDFFF) {
360 filter->status = 0;
361 n = ((filter->cache & 0x3FF00) << 2) + (n & 0x3FF) + 0x10000;
362 CK((*filter->output_function)(n, filter->data));
363 } else {
364 filter->status = 0;
365 CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
366 CK((*filter->output_function)(n, filter->data));
367 }
368 }
369
370 return 0;
371}
372
374{
375 int n;
376
377 if (c >= 0 && c < MBFL_WCSPLANE_UCS2MAX) {
378 CK((*filter->output_function)((c >> 8) & 0xff, filter->data));
379 CK((*filter->output_function)(c & 0xff, filter->data));
380 } else if (c >= MBFL_WCSPLANE_SUPMIN && c < MBFL_WCSPLANE_SUPMAX) {
381 n = ((c >> 10) - 0x40) | 0xd800;
382 CK((*filter->output_function)((n >> 8) & 0xff, filter->data));
383 CK((*filter->output_function)(n & 0xff, filter->data));
384 n = (c & 0x3ff) | 0xdc00;
385 CK((*filter->output_function)((n >> 8) & 0xff, filter->data));
386 CK((*filter->output_function)(n & 0xff, filter->data));
387 } else {
389 }
390
391 return 0;
392}
393
395{
396 int n;
397
398 switch (filter->status) {
399 case 0:
400 filter->cache = c & 0xff;
401 filter->status = 1;
402 break;
403
404 case 1:
405 if ((c & 0xfc) == 0xd8) {
406 /* Looks like we have a surrogate pair here */
407 filter->cache += ((c & 0x3) << 8);
408 filter->status = 2;
409 } else if ((c & 0xfc) == 0xdc) {
410 /* This is wrong; the second part of the surrogate pair has come first */
411 filter->status = 0;
412 CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
413 } else {
414 filter->status = 0;
415 CK((*filter->output_function)(filter->cache + ((c & 0xff) << 8), filter->data));
416 }
417 break;
418
419 case 2:
420 filter->cache = (filter->cache << 10) + (c & 0xff);
421 filter->status = 3;
422 break;
423
424 case 3:
425 n = (filter->cache & 0xFF) | ((c & 0xFF) << 8);
426 if (n >= 0xD800 && n <= 0xDBFF) {
427 /* We previously saw the first part of a surrogate pair and were
428 * expecting the second part; this is another first part */
429 filter->cache = n & 0x3FF;
430 filter->status = 2;
431 CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
432 } else if (n >= 0xDC00 && n <= 0xDFFF) {
433 n = filter->cache + ((c & 0x3) << 8) + 0x10000;
434 filter->status = 0;
435 CK((*filter->output_function)(n, filter->data));
436 } else {
437 /* The first part of a surrogate pair was followed by some other codepoint
438 * which is not part of a surrogate pair at all */
439 filter->status = 0;
440 CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
441 CK((*filter->output_function)(n, filter->data));
442 }
443 break;
444 }
445
446 return 0;
447}
448
450{
451 int n;
452
453 if (c >= 0 && c < MBFL_WCSPLANE_UCS2MAX) {
454 CK((*filter->output_function)(c & 0xff, filter->data));
455 CK((*filter->output_function)((c >> 8) & 0xff, filter->data));
456 } else if (c >= MBFL_WCSPLANE_SUPMIN && c < MBFL_WCSPLANE_SUPMAX) {
457 n = ((c >> 10) - 0x40) | 0xd800;
458 CK((*filter->output_function)(n & 0xff, filter->data));
459 CK((*filter->output_function)((n >> 8) & 0xff, filter->data));
460 n = (c & 0x3ff) | 0xdc00;
461 CK((*filter->output_function)(n & 0xff, filter->data));
462 CK((*filter->output_function)((n >> 8) & 0xff, filter->data));
463 } else {
465 }
466
467 return 0;
468}
469
470static int mbfl_filt_conv_utf16_wchar_flush(mbfl_convert_filter *filter)
471{
472 if (filter->status) {
473 /* Input string was truncated */
474 filter->status = 0;
475 CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
476 }
477
478 if (filter->flush_function) {
479 (*filter->flush_function)(filter->data);
480 }
481
482 return 0;
483}
484
485#define DETECTED_BE 1
486#define DETECTED_LE 2
487
488static size_t mb_utf16_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state)
489{
490 if (*state == DETECTED_BE) {
491 return mb_utf16be_to_wchar(in, in_len, buf, bufsize, NULL);
492 } else if (*state == DETECTED_LE) {
493 return mb_utf16le_to_wchar(in, in_len, buf, bufsize, NULL);
494 } else if (*in_len >= 2) {
495 unsigned char *p = *in;
496 unsigned char c1 = *p++;
497 unsigned char c2 = *p++;
498 uint16_t n = (c1 << 8) | c2;
499
500 if (n == 0xFFFE) {
501 /* Little-endian BOM */
502 *in = p;
503 *in_len -= 2;
505 return mb_utf16le_to_wchar(in, in_len, buf, bufsize, NULL);
506 } if (n == 0xFEFF) {
507 /* Big-endian BOM; don't send to output */
508 *in = p;
509 *in_len -= 2;
510 }
511 }
512
514 return mb_utf16be_to_wchar(in, in_len, buf, bufsize, NULL);
515}
516
517static size_t mb_utf16be_to_wchar_default(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state)
518{
519 /* We only want to read 16-bit words out of `str`; any trailing byte will be handled at the end */
520 unsigned char *p = *in, *e = p + (*in_len & ~1);
521 /* Set `limit` to one less than the actual amount of space in the buffer; this is because
522 * on some iterations of the below loop, we might produce two output words */
523 uint32_t *out = buf, *limit = buf + bufsize - 1;
524
525 while (p < e && out < limit) {
526 unsigned char c1 = *p++;
527 unsigned char c2 = *p++;
528 uint16_t n = (c1 << 8) | c2;
529
530 if (n >= 0xD800 && n <= 0xDBFF) {
531 /* Handle surrogate */
532 if (p < e) {
533 unsigned char c3 = *p++;
534 unsigned char c4 = *p++;
535 uint16_t n2 = (c3 << 8) | c4;
536
537 if (n2 >= 0xD800 && n2 <= 0xDBFF) {
538 /* Wrong; that's the first half of a surrogate pair, when we were expecting the second */
539 *out++ = MBFL_BAD_INPUT;
540 p -= 2;
541 } else if (n2 >= 0xDC00 && n2 <= 0xDFFF) {
542 *out++ = (((n & 0x3FF) << 10) | (n2 & 0x3FF)) + 0x10000;
543 } else {
544 /* The first half of a surrogate pair was followed by a 'normal' codepoint */
545 *out++ = MBFL_BAD_INPUT;
546 *out++ = n2;
547 }
548 } else {
549 *out++ = MBFL_BAD_INPUT;
550 }
551 } else if (n >= 0xDC00 && n <= 0xDFFF) {
552 /* This is wrong; second part of surrogate pair has come first */
553 *out++ = MBFL_BAD_INPUT;
554 } else {
555 *out++ = n;
556 }
557 }
558
559 if (p == e && (*in_len & 0x1) && out < limit) {
560 /* There is an extra trailing byte (which shouldn't be there) */
561 *out++ = MBFL_BAD_INPUT;
562 p++;
563 }
564
565 *in_len -= (p - *in);
566 *in = p;
567 return out - buf;
568}
569
570static void mb_wchar_to_utf16be_default(uint32_t *in, size_t len, mb_convert_buf *buf, bool end)
571{
572 unsigned char *out, *limit;
573 MB_CONVERT_BUF_LOAD(buf, out, limit);
574 MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2);
575
576 while (len--) {
577 uint32_t w = *in++;
578
579 if (w < MBFL_WCSPLANE_UCS2MAX) {
580 out = mb_convert_buf_add2(out, (w >> 8) & 0xFF, w & 0xFF);
581 } else if (w < MBFL_WCSPLANE_UTF32MAX) {
582 uint16_t n1 = ((w >> 10) - 0x40) | 0xD800;
583 uint16_t n2 = (w & 0x3FF) | 0xDC00;
584 MB_CONVERT_BUF_ENSURE(buf, out, limit, (len * 2) + 4);
585 out = mb_convert_buf_add4(out, (n1 >> 8) & 0xFF, n1 & 0xFF, (n2 >> 8) & 0xFF, n2 & 0xFF);
586 } else {
587 MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_utf16be_default);
588 MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2);
589 }
590 }
591
593}
594
595static size_t mb_utf16le_to_wchar_default(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state)
596{
597 /* We only want to read 16-bit words out of `str`; any trailing byte will be handled at the end */
598 unsigned char *p = *in, *e = p + (*in_len & ~1);
599 /* Set `limit` to one less than the actual amount of space in the buffer; this is because
600 * on some iterations of the below loop, we might produce two output words */
601 uint32_t *out = buf, *limit = buf + bufsize - 1;
602
603 while (p < e && out < limit) {
604 unsigned char c1 = *p++;
605 unsigned char c2 = *p++;
606 uint16_t n = (c2 << 8) | c1;
607
608 if (n >= 0xD800 && n <= 0xDBFF) {
609 /* Handle surrogate */
610 if (p < e) {
611 unsigned char c3 = *p++;
612 unsigned char c4 = *p++;
613 uint16_t n2 = (c4 << 8) | c3;
614
615 if (n2 >= 0xD800 && n2 <= 0xDBFF) {
616 /* Wrong; that's the first half of a surrogate pair, when we were expecting the second */
617 *out++ = MBFL_BAD_INPUT;
618 p -= 2;
619 } else if (n2 >= 0xDC00 && n2 <= 0xDFFF) {
620 *out++ = (((n & 0x3FF) << 10) | (n2 & 0x3FF)) + 0x10000;
621 } else {
622 /* The first half of a surrogate pair was followed by a 'normal' codepoint */
623 *out++ = MBFL_BAD_INPUT;
624 *out++ = n2;
625 }
626 } else {
627 *out++ = MBFL_BAD_INPUT;
628 }
629 } else if (n >= 0xDC00 && n <= 0xDFFF) {
630 /* This is wrong; second part of surrogate pair has come first */
631 *out++ = MBFL_BAD_INPUT;
632 } else {
633 *out++ = n;
634 }
635 }
636
637 if (p == e && (*in_len & 0x1) && out < limit) {
638 /* There is an extra trailing byte (which shouldn't be there) */
639 *out++ = MBFL_BAD_INPUT;
640 p++;
641 }
642
643 *in_len -= (p - *in);
644 *in = p;
645 return out - buf;
646}
647
648static void mb_wchar_to_utf16le_default(uint32_t *in, size_t len, mb_convert_buf *buf, bool end)
649{
650 unsigned char *out, *limit;
651 MB_CONVERT_BUF_LOAD(buf, out, limit);
652 MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2);
653
654 while (len--) {
655 uint32_t w = *in++;
656
657 if (w < MBFL_WCSPLANE_UCS2MAX) {
658 out = mb_convert_buf_add2(out, w & 0xFF, (w >> 8) & 0xFF);
659 } else if (w < MBFL_WCSPLANE_UTF32MAX) {
660 uint16_t n1 = ((w >> 10) - 0x40) | 0xD800;
661 uint16_t n2 = (w & 0x3FF) | 0xDC00;
662 MB_CONVERT_BUF_ENSURE(buf, out, limit, (len * 2) + 4);
663 out = mb_convert_buf_add4(out, n1 & 0xFF, (n1 >> 8) & 0xFF, n2 & 0xFF, (n2 >> 8) & 0xFF);
664 } else {
665 MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_utf16le_default);
666 MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2);
667 }
668 }
669
671}
672
673#if defined(ZEND_INTRIN_AVX2_NATIVE) || defined(ZEND_INTRIN_AVX2_RESOLVER)
674
675#ifdef ZEND_INTRIN_AVX2_FUNC_PROTO
676size_t mb_utf16be_to_wchar_avx2(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state)
677#else
678static size_t mb_utf16be_to_wchar_avx2(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state)
679#endif
680{
681 size_t len = *in_len;
682
683 if (len >= 32 && bufsize >= 16) {
684 unsigned char *p = *in;
685 uint32_t *out = buf;
686
687 /* Used to determine if a block of input bytes contains any surrogates */
688 const __m256i _f8 = _mm256_set1_epi16(0xF8);
689 const __m256i _d8 = _mm256_set1_epi16(0xD8);
690 /* wchars must be in host byte order, which is little-endian on x86;
691 * Since we are reading in (big-endian) UTF-16BE, use this vector to swap byte order for output */
692 const __m256i swap_bytes = _mm256_set_epi8(14, 15, 12, 13, 10, 11, 8, 9, 6, 7, 4, 5, 2, 3, 0, 1, 14, 15, 12, 13, 10, 11, 8, 9, 6, 7, 4, 5, 2, 3, 0, 1);
693
694 do {
695 __m256i operand = _mm256_loadu_si256((__m256i*)p); /* Load 32 bytes */
696
697 uint32_t surrogate_bitvec = _mm256_movemask_epi8(_mm256_cmpeq_epi16(_mm256_and_si256(operand, _f8), _d8));
698 if (surrogate_bitvec == 0) {
699 /* There are no surrogates among these 16 characters
700 * So converting the UTF-16 input to wchars is very simple; just extend each 16-bit value
701 * to a 32-bit value, filling in zero bits in the high end */
702 operand = _mm256_shuffle_epi8(operand, swap_bytes);
703 _mm256_storeu_si256((__m256i*)out, _mm256_cvtepu16_epi32(_mm256_castsi256_si128(operand)));
704 _mm256_storeu_si256((__m256i*)(out + 8), _mm256_cvtepu16_epi32(_mm256_extracti128_si256(operand, 1)));
705 out += 16;
706 bufsize -= 16;
707 p += sizeof(__m256i);
708 len -= sizeof(__m256i);
709 } else if ((surrogate_bitvec & 1) == 0) {
710 /* Some prefix of the current block is non-surrogates; output those */
711 uint8_t n_chars = zend_ulong_ntz(surrogate_bitvec) >> 1;
712 operand = _mm256_shuffle_epi8(operand, swap_bytes);
713 /* We know that the output buffer has at least 64 bytes of space available
714 * So don't bother trimming the output down to only include the non-surrogate prefix;
715 * rather, write out an entire block of 64 (or 32) bytes, then bump our output pointer
716 * forward just past the 'good part', so the 'bad part' will be overwritten on the next
717 * iteration of this loop */
718 _mm256_storeu_si256((__m256i*)out, _mm256_cvtepu16_epi32(_mm256_castsi256_si128(operand)));
719 if (n_chars > 8) {
720 _mm256_storeu_si256((__m256i*)(out + 8), _mm256_cvtepu16_epi32(_mm256_extracti128_si256(operand, 1)));
721 }
722 out += n_chars;
723 bufsize -= n_chars;
724 p += n_chars * 2;
725 len -= n_chars * 2;
726 } else {
727 /* Some prefix of the current block is (valid or invalid) surrogates
728 * Handle those using non-vectorized code */
729 surrogate_bitvec = ~surrogate_bitvec;
730 unsigned int n_chars = surrogate_bitvec ? zend_ulong_ntz(surrogate_bitvec) >> 1 : 16;
731 do {
732 unsigned char c1 = *p++;
733 unsigned char c2 = *p++;
734
735 if (c1 & 0x4 || len < 4) {
736 /* 2nd part of surrogate pair has come first OR string ended abruptly
737 * after 1st part of surrogate pair */
738 *out++ = MBFL_BAD_INPUT;
739 bufsize--;
740 n_chars--;
741 len -= 2;
742 continue;
743 }
744
745 uint16_t n = (c1 << 8) | c2;
746 unsigned char c3 = *p++;
747 unsigned char c4 = *p++;
748
749 if ((c3 & 0xFC) == 0xDC) {
750 /* Valid surrogate pair */
751 uint16_t n2 = (c3 << 8) | c4;
752 *out++ = (((n & 0x3FF) << 10) | (n2 & 0x3FF)) + 0x10000;
753 bufsize--;
754 len -= 4;
755#ifdef PHP_HAVE_BUILTIN_USUB_OVERFLOW
756 /* Subtracting 2 from `n_chars` will automatically set the CPU's flags;
757 * branch directly off the appropriate flag (CF on x86) rather than using
758 * another instruction (CMP on x86) to check for underflow */
759 if (__builtin_usub_overflow(n_chars, 2, &n_chars)) {
760 /* The last 2 bytes of this block and the first 2 bytes of the following
761 * block form a valid surrogate pair; now just make sure we don't get
762 * stuck in this loop due to underflow of the loop index */
763 break;
764 }
765#else
766 n_chars -= 2;
767 if (n_chars == UINT_MAX) {
768 break;
769 }
770#endif
771 } else {
772 /* First half of surrogate pair was followed by another first half
773 * OR by a non-surrogate character */
774 *out++ = MBFL_BAD_INPUT;
775 bufsize--;
776 n_chars--;
777 len -= 2;
778 p -= 2; /* Back up so the last 2 bytes will be processed again */
779 }
780 } while (n_chars);
781 }
782 } while (len >= 32 && bufsize >= 16);
783
784 if (len && bufsize >= 4) {
785 /* Finish up trailing bytes which don't fill a 32-byte block */
786 out += mb_utf16be_to_wchar_default(&p, &len, out, bufsize, NULL);
787 }
788
789 *in = p;
790 *in_len = len;
791 return out - buf;
792 } else if (len) {
793 return mb_utf16be_to_wchar_default(in, in_len, buf, bufsize, NULL);
794 } else {
795 return 0;
796 }
797}
798
799#ifdef ZEND_INTRIN_AVX2_FUNC_PROTO
800void mb_wchar_to_utf16be_avx2(uint32_t *in, size_t len, mb_convert_buf *buf, bool end)
801#else
802static void mb_wchar_to_utf16be_avx2(uint32_t *in, size_t len, mb_convert_buf *buf, bool end)
803#endif
804{
805 if (len >= 8) {
806 unsigned char *out, *limit;
807 MB_CONVERT_BUF_LOAD(buf, out, limit);
808 MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2);
809
810 /* Used to find wchars which are outside the Unicode BMP (Basic Multilingual Plane) */
811 const __m256i bmp_mask = _mm256_set1_epi32(0xFFFF);
812 /* Used to extract 16 bits which we want from each of eight 32-bit values */
813 const __m256i pack_8x16 = _mm256_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, 12, 13, 8, 9, 4, 5, 0, 1, 12, 13, 8, 9, 4, 5, 0, 1, -1, -1, -1, -1, -1, -1, -1, -1);
814
815 do {
816 __m256i operand = _mm256_loadu_si256((__m256i*)in); /* Load 32 bytes */
817
818 uint32_t bmp_bitvec = _mm256_movemask_epi8(_mm256_cmpeq_epi32(_mm256_and_si256(operand, bmp_mask), operand));
819 if (bmp_bitvec == 0xFFFFFFFF) {
820 /* All eight wchars are in the BMP
821 * Shuffle bytes around to get the 16 bytes we want into the low 16 bytes of YMM register
822 * (which is equivalent to an XMM register) */
823 operand = _mm256_shuffle_epi8(operand, pack_8x16);
824 __m256i operand2 = _mm256_permute2x128_si256(operand, operand, 1);
825 operand = _mm256_alignr_epi8(operand2, operand, 8);
826 _mm_storeu_si128((__m128i*)out, _mm256_castsi256_si128(operand)); /* Store 16 bytes */
827 out += 16;
828 len -= 8;
829 in += 8;
830 } else if (bmp_bitvec & 1) {
831 /* Some prefix of this block are codepoints in the BMP */
832 unsigned int n_bytes = zend_ulong_ntz(~bmp_bitvec);
833 operand = _mm256_shuffle_epi8(operand, pack_8x16);
834 __m256i operand2 = _mm256_permute2x128_si256(operand, operand, 1);
835 operand = _mm256_alignr_epi8(operand2, operand, 8);
836 /* Store 16 bytes, but bump output pointer forward just past the 'good part',
837 * so the 'bad part' will be overwritten on the next iteration of this loop */
838 _mm_storeu_si128((__m128i*)out, _mm256_castsi256_si128(operand));
839 out += n_bytes >> 1;
840 len -= n_bytes >> 2;
841 in += n_bytes >> 2;
842 } else {
843 /* Some prefix of this block is codepoints outside the BMP OR error markers
844 * Handle them using non-vectorized code */
845 unsigned int n_words = bmp_bitvec ? zend_ulong_ntz(bmp_bitvec) >> 2 : 8;
846 do {
847 uint32_t w = *in++;
848 n_words--;
849 len--;
850
851 if (w < MBFL_WCSPLANE_UTF32MAX) {
852 uint16_t n1 = ((w >> 10) - 0x40) | 0xD800;
853 uint16_t n2 = (w & 0x3FF) | 0xDC00;
854 MB_CONVERT_BUF_ENSURE(buf, out, limit, (len * 2) + 4);
855 out = mb_convert_buf_add4(out, (n1 >> 8) & 0xFF, n1 & 0xFF, (n2 >> 8) & 0xFF, n2 & 0xFF);
856 } else {
857 MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_utf16be_default);
858 MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2);
859 }
860 } while (n_words);
861 }
862 } while (len >= 8);
863
865 }
866
867 if (len) {
868 mb_wchar_to_utf16be_default(in, len, buf, end);
869 }
870}
871
872#ifdef ZEND_INTRIN_AVX2_FUNC_PROTO
873size_t mb_utf16le_to_wchar_avx2(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state)
874#else
875static size_t mb_utf16le_to_wchar_avx2(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state)
876#endif
877{
878 /* Most of this function is the same as `mb_utf16be_to_wchar_avx2`, above;
879 * See it for more detailed code comments */
880
881 size_t len = *in_len;
882
883 if (len >= 32 && bufsize >= 16) {
884 unsigned char *p = *in;
885 uint32_t *out = buf;
886
887 const __m256i _f8 = _mm256_set1_epi16(0xF800);
888 const __m256i _d8 = _mm256_set1_epi16(0xD800);
889
890 do {
891 __m256i operand = _mm256_loadu_si256((__m256i*)p);
892
893 uint32_t surrogate_bitvec = _mm256_movemask_epi8(_mm256_cmpeq_epi16(_mm256_and_si256(operand, _f8), _d8));
894 if (surrogate_bitvec == 0) {
895 /* There are no surrogates among these 16 characters */
896 _mm256_storeu_si256((__m256i*)out, _mm256_cvtepu16_epi32(_mm256_castsi256_si128(operand)));
897 _mm256_storeu_si256((__m256i*)(out + 8), _mm256_cvtepu16_epi32(_mm256_extracti128_si256(operand, 1)));
898 out += 16;
899 bufsize -= 16;
900 p += sizeof(__m256i);
901 len -= sizeof(__m256i);
902 } else if ((surrogate_bitvec & 1) == 0) {
903 /* Some prefix of the current block is non-surrogates */
904 uint8_t n_chars = zend_ulong_ntz(surrogate_bitvec) >> 1;
905 _mm256_storeu_si256((__m256i*)out, _mm256_cvtepu16_epi32(_mm256_castsi256_si128(operand)));
906 if (n_chars > 8) {
907 _mm256_storeu_si256((__m256i*)(out + 8), _mm256_cvtepu16_epi32(_mm256_extracti128_si256(operand, 1)));
908 }
909 out += n_chars;
910 bufsize -= n_chars;
911 p += n_chars * 2;
912 len -= n_chars * 2;
913 } else {
914 /* Some prefix of the current block is (valid or invalid) surrogates */
915 surrogate_bitvec = ~surrogate_bitvec;
916 unsigned int n_chars = surrogate_bitvec ? zend_ulong_ntz(surrogate_bitvec) >> 1 : 16;
917 do {
918 unsigned char c1 = *p++;
919 unsigned char c2 = *p++;
920
921 if (c2 & 0x4 || len < 4) {
922 /* 2nd part of surrogate pair has come first OR string ended abruptly
923 * after 1st part of surrogate pair */
924 *out++ = MBFL_BAD_INPUT;
925 bufsize--;
926 n_chars--;
927 len -= 2;
928 continue;
929 }
930
931 uint16_t n = (c2 << 8) | c1;
932 unsigned char c3 = *p++;
933 unsigned char c4 = *p++;
934
935 if ((c4 & 0xFC) == 0xDC) {
936 /* Valid surrogate pair */
937 uint16_t n2 = (c4 << 8) | c3;
938 *out++ = (((n & 0x3FF) << 10) | (n2 & 0x3FF)) + 0x10000;
939 bufsize--;
940 len -= 4;
941#ifdef PHP_HAVE_BUILTIN_USUB_OVERFLOW
942 if (__builtin_usub_overflow(n_chars, 2, &n_chars)) {
943 break;
944 }
945#else
946 n_chars -= 2;
947 if (n_chars == UINT_MAX) {
948 break;
949 }
950#endif
951 } else {
952 /* First half of surrogate pair was followed by another first half
953 * OR by a non-surrogate character */
954 *out++ = MBFL_BAD_INPUT;
955 bufsize--;
956 n_chars--;
957 len -= 2;
958 p -= 2; /* Back up so the last 2 bytes will be processed again */
959 }
960 } while (n_chars);
961 }
962 } while (len >= 32 && bufsize >= 16);
963
964 if (len && bufsize >= 4) {
965 out += mb_utf16le_to_wchar_default(&p, &len, out, bufsize, NULL);
966 }
967
968 *in = p;
969 *in_len = len;
970 return out - buf;
971 } else if (len) {
972 return mb_utf16le_to_wchar_default(in, in_len, buf, bufsize, NULL);
973 } else {
974 return 0;
975 }
976}
977
978#ifdef ZEND_INTRIN_AVX2_FUNC_PROTO
979void mb_wchar_to_utf16le_avx2(uint32_t *in, size_t len, mb_convert_buf *buf, bool end)
980#else
981static void mb_wchar_to_utf16le_avx2(uint32_t *in, size_t len, mb_convert_buf *buf, bool end)
982#endif
983{
984 if (len >= 8) {
985 unsigned char *out, *limit;
986 MB_CONVERT_BUF_LOAD(buf, out, limit);
987 MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2);
988
989 /* Used to find wchars which are outside the Unicode BMP (Basic Multilingual Plane) */
990 const __m256i bmp_mask = _mm256_set1_epi32(0xFFFF);
991 /* Used to extract 16 bits which we want from each of eight 32-bit values */
992 const __m256i pack_8x16 = _mm256_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, 13, 12, 9, 8, 5, 4, 1, 0, 13, 12, 9, 8, 5, 4, 1, 0, -1, -1, -1, -1, -1, -1, -1, -1);
993
994 do {
995 __m256i operand = _mm256_loadu_si256((__m256i*)in);
996
997 uint32_t bmp_bitvec = _mm256_movemask_epi8(_mm256_cmpeq_epi32(_mm256_and_si256(operand, bmp_mask), operand));
998 if (bmp_bitvec == 0xFFFFFFFF) {
999 /* All eight wchars are in the BMP
1000 * Shuffle bytes around to get the 16 bytes we want into the low 16 bytes of YMM register
1001 * (which is equivalent to an XMM register) */
1002 operand = _mm256_shuffle_epi8(operand, pack_8x16);
1003 __m256i operand2 = _mm256_permute2x128_si256(operand, operand, 1);
1004 operand = _mm256_alignr_epi8(operand2, operand, 8);
1005 _mm_storeu_si128((__m128i*)out, _mm256_castsi256_si128(operand));
1006 out += 16;
1007 len -= 8;
1008 in += 8;
1009 } else if (bmp_bitvec & 1) {
1010 /* Some prefix of this block are codepoints in the BMP */
1011 unsigned int n_bytes = zend_ulong_ntz(~bmp_bitvec);
1012 operand = _mm256_shuffle_epi8(operand, pack_8x16);
1013 __m256i operand2 = _mm256_permute2x128_si256(operand, operand, 1);
1014 operand = _mm256_alignr_epi8(operand2, operand, 8);
1015 _mm_storeu_si128((__m128i*)out, _mm256_castsi256_si128(operand));
1016 out += n_bytes >> 1;
1017 len -= n_bytes >> 2;
1018 in += n_bytes >> 2;
1019 } else {
1020 /* Some prefix of this block is codepoints outside the BMP OR error markers */
1021 unsigned int n_words = bmp_bitvec ? zend_ulong_ntz(bmp_bitvec) >> 2 : 8;
1022 do {
1023 uint32_t w = *in++;
1024 n_words--;
1025 len--;
1026
1027 if (w < MBFL_WCSPLANE_UTF32MAX) {
1028 uint16_t n1 = ((w >> 10) - 0x40) | 0xD800;
1029 uint16_t n2 = (w & 0x3FF) | 0xDC00;
1030 MB_CONVERT_BUF_ENSURE(buf, out, limit, (len * 2) + 4);
1031 out = mb_convert_buf_add4(out, n1 & 0xFF, (n1 >> 8) & 0xFF, n2 & 0xFF, (n2 >> 8) & 0xFF);
1032 } else {
1033 MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_utf16le_default);
1034 MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2);
1035 }
1036 } while (n_words);
1037 }
1038 } while (len >= 8);
1039
1040 MB_CONVERT_BUF_STORE(buf, out, limit);
1041 }
1042
1043 if (len) {
1044 mb_wchar_to_utf16le_default(in, len, buf, end);
1045 }
1046}
1047
1048#endif /* defined(ZEND_INTRIN_AVX2_NATIVE) || defined(ZEND_INTRIN_AVX2_RESOLVER) */
1049
1050static zend_string* mb_cut_utf16be(unsigned char *str, size_t from, size_t len, unsigned char *end)
1051{
1052 if (len > end - (str + from)) {
1053 len = end - (str + from);
1054 }
1055 from &= ~1;
1056 len &= ~1;
1057 unsigned char *start = str + from;
1058 if (len < 2 || (end - start) < 2) {
1059 return zend_empty_string;
1060 }
1061 /* Check if 1st codepoint is 2nd part of surrogate pair */
1062 if (from > 0) {
1063 uint32_t start_cp = (*start << 8) + *(start + 1);
1064 if (start_cp >= 0xDC00 && start_cp <= 0xDFFF) {
1065 uint32_t preceding_cp = (*(start - 2) << 8) + *(start - 1);
1066 if (preceding_cp >= 0xD800 && preceding_cp <= 0xDBFF) {
1067 from -= 2;
1068 }
1069 }
1070 }
1071 /* Same for ending cut point */
1072 unsigned char *_end = start + len;
1073 if (_end > end) {
1074 _end = end;
1075 }
1076 uint32_t ending_cp = (*(_end - 2) << 8) + *(_end - 1);
1077 if (ending_cp >= 0xD800 && ending_cp <= 0xDBFF) {
1078 _end -= 2;
1079 }
1080 return zend_string_init_fast((char*)start, _end - start);
1081}
1082
1083static zend_string* mb_cut_utf16le(unsigned char *str, size_t from, size_t len, unsigned char *end)
1084{
1085 if (len > end - (str + from)) {
1086 len = end - (str + from);
1087 }
1088 from &= ~1;
1089 len &= ~1;
1090 unsigned char *start = str + from;
1091 if (len < 2 || (end - start) < 2) {
1092 return zend_empty_string;
1093 }
1094 /* Check if 1st codepoint is 2nd part of surrogate pair */
1095 if (from > 0) {
1096 uint32_t start_cp = (*(start + 1) << 8) + *start;
1097 if (start_cp >= 0xDC00 && start_cp <= 0xDFFF) {
1098 uint32_t preceding_cp = (*(start - 1) << 8) + *(start - 2);
1099 if (preceding_cp >= 0xD800 && preceding_cp <= 0xDBFF) {
1100 from -= 2;
1101 }
1102 }
1103 }
1104 /* Same for ending cut point */
1105 unsigned char *_end = start + len;
1106 if (_end > end) {
1107 _end = end;
1108 }
1109 uint32_t ending_cp = (*(_end - 1) << 8) + *(_end - 2);
1110 if (ending_cp >= 0xD800 && ending_cp <= 0xDBFF) {
1111 _end -= 2;
1112 }
1113 return zend_string_init_fast((char*)start, _end - start);
1114}
1115
1116static zend_string* mb_cut_utf16(unsigned char *str, size_t from, size_t len, unsigned char *end)
1117{
1118 if (len < 2 || (end - str) < 2) {
1119 return zend_empty_string;
1120 }
1121 uint32_t cp = (*str << 8) + *(str + 1);
1122 if (cp == 0xFFFE) {
1123 /* Little-endian BOM */
1124 if (from < 2) {
1125 from = 2;
1126 }
1127 return mb_cut_utf16le(str, from, len, end);
1128 } else {
1129 if (cp == 0xFEFF && from < 2) {
1130 from = 2;
1131 }
1132 return mb_cut_utf16be(str, from, len, end);
1133 }
1134}
size_t len
Definition apprentice.c:174
zend_long n
Definition ffi.c:4979
buf start
Definition ffi.c:4687
zend_ffi_ctype_name_buf buf
Definition ffi.c:4685
#define __attribute__(a)
Definition file.h:131
#define NULL
Definition gdcache.h:45
#define DETECTED_BE
#define DETECTED_LE
int mbfl_filt_conv_utf16_wchar(int c, mbfl_convert_filter *filter)
#define mb_utf16be_to_wchar
int mbfl_filt_conv_wchar_utf16le(int c, mbfl_convert_filter *filter)
const mbfl_encoding mbfl_encoding_utf16be
#define CK(statement)
int mbfl_filt_conv_utf16be_wchar(int c, mbfl_convert_filter *filter)
const struct mbfl_convert_vtbl vtbl_utf16le_wchar
int mbfl_filt_conv_wchar_utf16be(int c, mbfl_convert_filter *filter)
const mbfl_encoding mbfl_encoding_utf16le
const struct mbfl_convert_vtbl vtbl_utf16be_wchar
int mbfl_filt_conv_utf16le_wchar(int c, mbfl_convert_filter *filter)
#define mb_wchar_to_utf16le
const mbfl_encoding mbfl_encoding_utf16
#define mb_utf16le_to_wchar
const struct mbfl_convert_vtbl vtbl_wchar_utf16be
const struct mbfl_convert_vtbl vtbl_wchar_utf16le
const struct mbfl_convert_vtbl vtbl_utf16_wchar
const struct mbfl_convert_vtbl vtbl_wchar_utf16
#define mb_wchar_to_utf16be
#define MBFL_WCSPLANE_UCS2MAX
Definition mbfl_consts.h:39
#define MBFL_WCSPLANE_SUPMAX
Definition mbfl_consts.h:42
#define MBFL_WCSPLANE_UTF32MAX
Definition mbfl_consts.h:40
#define MBFL_BAD_INPUT
Definition mbfl_consts.h:45
#define MBFL_WCSPLANE_SUPMIN
Definition mbfl_consts.h:41
int mbfl_filt_conv_common_flush(mbfl_convert_filter *filter)
int mbfl_filt_conv_illegal_output(int c, mbfl_convert_filter *filter)
void mbfl_filt_conv_common_ctor(mbfl_convert_filter *filter)
struct _mbfl_convert_filter mbfl_convert_filter
@ mbfl_no_encoding_utf16be
@ mbfl_no_encoding_utf16
@ mbfl_no_encoding_wchar
@ mbfl_no_encoding_utf16le
size_t(* mb_to_wchar_fn)(unsigned char **in, size_t *in_len, uint32_t *out, size_t out_len, unsigned int *state)
#define MB_CONVERT_BUF_STORE(buf, _out, _limit)
#define MB_CONVERT_BUF_ENSURE(buf, out, limit, needed)
#define MB_CONVERT_ERROR(buf, out, limit, bad_cp, conv_fn)
void(* mb_from_wchar_fn)(uint32_t *in, size_t in_len, mb_convert_buf *out, bool end)
#define MB_CONVERT_BUF_LOAD(buf, _out, _limit)
unsigned const char * end
Definition php_ffi.h:51
p
Definition session.c:1105
output_function_t output_function
int(* filter_function)(int c, mbfl_convert_filter *filter)
flush_function_t flush_function
enum mbfl_no_encoding from
#define ZEND_NO_SANITIZE_ADDRESS
struct _zend_string zend_string
#define ZEND_INTRIN_AVX2_FUNC_DECL(func)
#define ZEND_ATTRIBUTE_UNUSED
ZEND_API zend_string * zend_empty_string
Definition zend_string.c:51
out($f, $s)