24static const char base64_table[] = {
25 'A',
'B',
'C',
'D',
'E',
'F',
'G',
'H',
'I',
'J',
'K',
'L',
'M',
26 'N',
'O',
'P',
'Q',
'R',
'S',
'T',
'U',
'V',
'W',
'X',
'Y',
'Z',
27 'a',
'b',
'c',
'd',
'e',
'f',
'g',
'h',
'i',
'j',
'k',
'l',
'm',
28 'n',
'o',
'p',
'q',
'r',
's',
't',
'u',
'v',
'w',
'x',
'y',
'z',
29 '0',
'1',
'2',
'3',
'4',
'5',
'6',
'7',
'8',
'9',
'+',
'/',
'\0'
32static const char base64_pad =
'=';
34static const short base64_reverse_table[256] = {
35 -2, -2, -2, -2, -2, -2, -2, -2, -2, -1, -1, -2, -2, -1, -2, -2,
36 -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
37 -1, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, 62, -2, -2, -2, 63,
38 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -2, -2, -2, -2, -2, -2,
39 -2, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
40 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -2, -2, -2, -2, -2,
41 -2, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
42 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -2, -2, -2, -2, -2,
43 -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
44 -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
45 -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
46 -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
47 -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
48 -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
49 -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
50 -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2
54#if defined(__aarch64__) || defined(_M_ARM64)
57static zend_always_inline uint8x16_t encode_toascii(
const uint8x16_t input,
const uint8x16x2_t shift_LUT)
63 uint8x16_t
result = vqsubq_u8(input, vdupq_n_u8(51));
67 const uint8x16_t less = vcgtq_u8(vdupq_n_u8(26), input);
68 result = vorrq_u8(
result, vandq_u8(less, vdupq_n_u8(13)));
71 return vaddq_u8(
result, input);
74static zend_always_inline unsigned char *neon_base64_encode(
const unsigned char *in,
size_t inl,
unsigned char *
out,
size_t *
left)
76 const uint8_t shift_LUT_[32] = {
'a' - 26,
'0' - 52,
'0' - 52,
'0' - 52,
77 '0' - 52,
'0' - 52,
'0' - 52,
'0' - 52,
78 '0' - 52,
'0' - 52,
'0' - 52,
'+' - 62,
80 'a' - 26,
'0' - 52,
'0' - 52,
'0' - 52,
81 '0' - 52,
'0' - 52,
'0' - 52,
'0' - 52,
82 '0' - 52,
'0' - 52,
'0' - 52,
'+' - 62,
84 const uint8x16x2_t shift_LUT = *((
const uint8x16x2_t *)shift_LUT_);
88 const uint8x16x3_t x = vld3q_u8((
const uint8_t *)(in));
91 const uint8x16_t field_a = vshrq_n_u8(x.val[0], 2);
93 const uint8x16_t field_b =
94 vbslq_u8(vdupq_n_u8(0x30),
95 vshlq_n_u8(x.val[0], 4),
96 vshrq_n_u8(x.val[1], 4));
98 const uint8x16_t field_c =
99 vbslq_u8(vdupq_n_u8(0x3c),
100 vshlq_n_u8(x.val[1], 2),
101 vshrq_n_u8(x.val[2], 6));
104 const uint8x16_t field_d = vandq_u8(x.val[2], vdupq_n_u8(0x3f));
107 result.val[0] = encode_toascii(field_a, shift_LUT);
108 result.val[1] = encode_toascii(field_b, shift_LUT);
109 result.val[2] = encode_toascii(field_c, shift_LUT);
110 result.val[3] = encode_toascii(field_d, shift_LUT);
116 }
while (inl >= 16 * 3);
125#if defined(__aarch64__) || defined(_M_ARM64)
128 out = neon_base64_encode(in, inl,
out, &
left);
135 *
out++ = base64_table[in[0] >> 2];
136 *
out++ = base64_table[((in[0] & 0x03) << 4) + (in[1] >> 4)];
137 *
out++ = base64_table[((in[1] & 0x0f) << 2) + (in[2] >> 6)];
138 *
out++ = base64_table[in[2] & 0x3f];
146 *
out++ = base64_table[in[0] >> 2];
148 *
out++ = base64_table[((in[0] & 0x03) << 4) + (in[1] >> 4)];
149 *
out++ = base64_table[(in[1] & 0x0f) << 2];
154 *
out++ = base64_table[(in[0] & 0x03) << 4];
168#if defined(__aarch64__) || defined(_M_ARM64)
169static zend_always_inline uint8x16_t decode_fromascii(
const uint8x16_t input, uint8x16_t *
error,
const uint8x16x2_t shiftLUT,
const uint8x16x2_t maskLUT,
const uint8x16x2_t bitposLUT) {
170 const uint8x16_t higher_nibble = vshrq_n_u8(input, 4);
171 const uint8x16_t lower_nibble = vandq_u8(input, vdupq_n_u8(0x0f));
172 const uint8x16_t sh = vqtbl2q_u8(shiftLUT, higher_nibble);
173 const uint8x16_t eq_2f = vceqq_u8(input, vdupq_n_u8(0x2f));
174 const uint8x16_t shift = vbslq_u8(eq_2f, vdupq_n_u8(16), sh);
175 const uint8x16_t
M = vqtbl2q_u8(maskLUT, lower_nibble);
176 const uint8x16_t bit = vqtbl2q_u8(bitposLUT, higher_nibble);
177 *
error = vceqq_u8(vandq_u8(
M, bit), vdupq_n_u8(0));
178 return vaddq_u8(input, shift);
181static zend_always_inline size_t neon_base64_decode(
const unsigned char *in,
size_t inl,
unsigned char *
out,
size_t *
left) {
182 unsigned char *out_orig =
out;
183 const uint8_t shiftLUT_[32] = {
184 0, 0, 19, 4, (uint8_t)-65, (uint8_t)-65, (uint8_t)-71, (uint8_t)-71,
185 0, 0, 0, 0, 0, 0, 0, 0,
186 0, 0, 19, 4, (uint8_t)-65, (uint8_t)-65, (uint8_t)-71, (uint8_t)-71,
187 0, 0, 0, 0, 0, 0, 0, 0};
188 const uint8_t maskLUT_[32] = {
190 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8,
197 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8,
203 const uint8_t bitposLUT_[32] = {
204 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80,
205 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
207 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80,
208 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
210 const uint8x16x2_t shiftLUT = *((
const uint8x16x2_t *)shiftLUT_);
211 const uint8x16x2_t maskLUT = *((
const uint8x16x2_t *)maskLUT_);
212 const uint8x16x2_t bitposLUT = *((
const uint8x16x2_t *)bitposLUT_);;
215 const uint8x16x4_t x = vld4q_u8((
const unsigned char *)in);
220 uint8x16_t field_a = decode_fromascii(x.val[0], &error_a, shiftLUT, maskLUT, bitposLUT);
221 uint8x16_t field_b = decode_fromascii(x.val[1], &error_b, shiftLUT, maskLUT, bitposLUT);
222 uint8x16_t field_c = decode_fromascii(x.val[2], &error_c, shiftLUT, maskLUT, bitposLUT);
223 uint8x16_t field_d = decode_fromascii(x.val[3], &error_d, shiftLUT, maskLUT, bitposLUT);
225 const uint8x16_t
err = vorrq_u8(vorrq_u8(error_a, error_b), vorrq_u8(error_c, error_d));
226 union {uint8_t mem[16]; uint64_t dw[2]; }
error;
236 result.val[0] = vorrq_u8(vshrq_n_u8(field_b, 4), vshlq_n_u8(field_a, 2));
237 result.val[1] = vorrq_u8(vshrq_n_u8(field_c, 2), vshlq_n_u8(field_b, 4));
238 result.val[2] = vorrq_u8(field_d, vshlq_n_u8(field_c, 6));
244 }
while (inl >= 16 * 4);
246 return out - out_orig;
250static zend_always_inline int php_base64_decode_impl(
const unsigned char *in,
size_t inl,
unsigned char *
out,
size_t *outl,
bool strict)
253 size_t i = 0, padding = 0,
j = *outl;
255#if defined(__aarch64__) || defined(_M_ARM64)
258 j += neon_base64_decode(in, inl,
out, &
left);
268 if (
ch == base64_pad) {
273 ch = base64_reverse_table[
ch];
285 if (
ch == -2 || padding) {
296 out[
j] = (
ch & 0x0f) << 4;
300 out[
j] = (
ch & 0x03) << 6;
310 if (strict && i % 4 == 1) {
316 if (strict && padding && (padding > 2 || (i + padding) % 4 != 0)) {
332#ifdef ZEND_INTRIN_AVX2_NATIVE
333# undef ZEND_INTRIN_SSSE3_NATIVE
334# undef ZEND_INTRIN_SSSE3_RESOLVER
335# undef ZEND_INTRIN_SSSE3_FUNC_PROTO
336# undef ZEND_INTRIN_SSSE3_FUNC_PTR
337#elif defined(ZEND_INTRIN_AVX2_FUNC_PROTO) && defined(ZEND_INTRIN_SSSE3_NATIVE)
338# undef ZEND_INTRIN_SSSE3_NATIVE
339# undef ZEND_INTRIN_SSSE3_RESOLVER
340# define ZEND_INTRIN_SSSE3_RESOLVER 1
341# define ZEND_INTRIN_SSSE3_FUNC_PROTO 1
342# undef ZEND_INTRIN_SSSE3_FUNC_DECL
343# ifdef HAVE_FUNC_ATTRIBUTE_TARGET
344# define ZEND_INTRIN_SSSE3_FUNC_DECL(func) ZEND_API func __attribute__((target("ssse3")))
346# define ZEND_INTRIN_SSSE3_FUNC_DECL(func) ZEND_API func
348#elif defined(ZEND_INTRIN_AVX2_FUNC_PTR) && defined(ZEND_INTRIN_SSSE3_NATIVE)
349# undef ZEND_INTRIN_SSSE3_NATIVE
350# undef ZEND_INTRIN_SSSE3_RESOLVER
351# define ZEND_INTRIN_SSSE3_RESOLVER 1
352# define ZEND_INTRIN_SSSE3_FUNC_PTR 1
353# undef ZEND_INTRIN_SSSE3_FUNC_DECL
354# ifdef HAVE_FUNC_ATTRIBUTE_TARGET
355# define ZEND_INTRIN_SSSE3_FUNC_DECL(func) ZEND_API func __attribute__((target("ssse3")))
357# define ZEND_INTRIN_SSSE3_FUNC_DECL(func) ZEND_API func
362#if defined(ZEND_INTRIN_AVX2_FUNC_PROTO) && defined(ZEND_INTRIN_AVX512_FUNC_PROTO)
363#define BASE64_INTRIN_AVX512_FUNC_PROTO 1
365#if defined(ZEND_INTRIN_AVX2_FUNC_PTR) && defined(ZEND_INTRIN_AVX512_FUNC_PTR)
366#define BASE64_INTRIN_AVX512_FUNC_PTR 1
368#if defined(ZEND_INTRIN_AVX2_FUNC_PROTO) && defined(ZEND_INTRIN_AVX512_VBMI_FUNC_PROTO)
369#define BASE64_INTRIN_AVX512_VBMI_FUNC_PROTO 1
371#if defined(ZEND_INTRIN_AVX2_FUNC_PTR) && defined(ZEND_INTRIN_AVX512_VBMI_FUNC_PTR)
372#define BASE64_INTRIN_AVX512_VBMI_FUNC_PTR 1
375#ifdef ZEND_INTRIN_AVX2_NATIVE
376# include <immintrin.h>
377#elif defined(ZEND_INTRIN_SSSE3_NATIVE)
378# include <tmmintrin.h>
379#elif defined(ZEND_INTRIN_SSSE3_RESOLVER) || defined(ZEND_INTRIN_AVX2_RESOLVER)
380# ifdef ZEND_INTRIN_AVX2_RESOLVER
381# include <immintrin.h>
383# include <tmmintrin.h>
387# if defined(BASE64_INTRIN_AVX512_FUNC_PROTO) || defined(BASE64_INTRIN_AVX512_FUNC_PTR)
391# if defined(BASE64_INTRIN_AVX512_VBMI_FUNC_PROTO) || defined(BASE64_INTRIN_AVX512_VBMI_FUNC_PTR)
396# ifdef ZEND_INTRIN_AVX2_RESOLVER
401# ifdef ZEND_INTRIN_SSSE3_RESOLVER
407zend_string *php_base64_decode_ex_default(
const unsigned char *str,
size_t length,
bool strict);
409# if (defined(ZEND_INTRIN_AVX2_FUNC_PROTO) || defined(ZEND_INTRIN_SSSE3_FUNC_PROTO) || defined(BASE64_INTRIN_AVX512_FUNC_PROTO) || defined(BASE64_INTRIN_AVX512_VBMI_FUNC_PROTO))
414typedef zend_string *(*base64_decode_func_t)(
const unsigned char *, size_t, bool);
418static base64_encode_func_t resolve_base64_encode(
void) {
419# ifdef BASE64_INTRIN_AVX512_VBMI_FUNC_PROTO
420 if (zend_cpu_supports_avx512_vbmi()) {
421 return php_base64_encode_avx512_vbmi;
424# ifdef BASE64_INTRIN_AVX512_FUNC_PROTO
425 if (zend_cpu_supports_avx512()) {
426 return php_base64_encode_avx512;
429# ifdef ZEND_INTRIN_AVX2_FUNC_PROTO
430 if (zend_cpu_supports_avx2()) {
431 return php_base64_encode_avx2;
434#ifdef ZEND_INTRIN_SSSE3_FUNC_PROTO
435 if (zend_cpu_supports_ssse3()) {
436 return php_base64_encode_ssse3;
439 return php_base64_encode_default;
444static base64_decode_func_t resolve_base64_decode(
void) {
445# ifdef BASE64_INTRIN_AVX512_VBMI_FUNC_PROTO
446 if (zend_cpu_supports_avx512_vbmi()) {
447 return php_base64_decode_ex_avx512_vbmi;
450# ifdef BASE64_INTRIN_AVX512_FUNC_PROTO
451 if (zend_cpu_supports_avx512()) {
452 return php_base64_decode_ex_avx512;
455# ifdef ZEND_INTRIN_AVX2_FUNC_PROTO
456 if (zend_cpu_supports_avx2()) {
457 return php_base64_decode_ex_avx2;
460#ifdef ZEND_INTRIN_SSSE3_FUNC_PROTO
461 if (zend_cpu_supports_ssse3()) {
462 return php_base64_decode_ex_ssse3;
465 return php_base64_decode_ex_default;
470PHPAPI zend_string *(*php_base64_decode_ex_ptr)(
const unsigned char *str,
size_t length,
bool strict) =
NULL;
473 return php_base64_encode_ptr(str, length,
flags);
476 return php_base64_decode_ex_ptr(str, length, strict);
481# ifdef BASE64_INTRIN_AVX512_VBMI_FUNC_PTR
482 if (zend_cpu_supports_avx512_vbmi()) {
483 php_base64_encode_ptr = php_base64_encode_avx512_vbmi;
484 php_base64_decode_ex_ptr = php_base64_decode_ex_avx512_vbmi;
487# ifdef BASE64_INTRIN_AVX512_FUNC_PTR
488 if (zend_cpu_supports_avx512()) {
489 php_base64_encode_ptr = php_base64_encode_avx512;
490 php_base64_decode_ex_ptr = php_base64_decode_ex_avx512;
493# ifdef ZEND_INTRIN_AVX2_FUNC_PTR
494 if (zend_cpu_supports_avx2()) {
495 php_base64_encode_ptr = php_base64_encode_avx2;
496 php_base64_decode_ex_ptr = php_base64_decode_ex_avx2;
499#ifdef ZEND_INTRIN_SSSE3_FUNC_PTR
500 if (zend_cpu_supports_ssse3()) {
501 php_base64_encode_ptr = php_base64_encode_ssse3;
502 php_base64_decode_ex_ptr = php_base64_decode_ex_ssse3;
506 php_base64_encode_ptr = php_base64_encode_default;
507 php_base64_decode_ex_ptr = php_base64_decode_ex_default;
514#if defined(BASE64_INTRIN_AVX512_VBMI_FUNC_PROTO) || defined(BASE64_INTRIN_AVX512_VBMI_FUNC_PTR)
517 const unsigned char *c = str;
521 result = zend_string_safe_alloc(((length + 2) / 3), 4 *
sizeof(
char), 0, 0);
524 const __m512i shuffle_splitting = _mm512_setr_epi32(
525 0x01020001, 0x04050304, 0x07080607, 0x0a0b090a, 0x0d0e0c0d, 0x10110f10,
526 0x13141213, 0x16171516, 0x191a1819, 0x1c1d1b1c, 0x1f201e1f, 0x22232122,
527 0x25262425, 0x28292728, 0x2b2c2a2b, 0x2e2f2d2e);
528 const __m512i multi_shifts = _mm512_set1_epi64(0x3036242a1016040a);
529 const char *ascii_lookup_tbl =
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
530 const __m512i ascii_lookup = _mm512_loadu_si512((__m512i *)ascii_lookup_tbl);
532 while (length > 63) {
534 __m512i str = _mm512_loadu_si512((
const __m512i *)c);
537 str = _mm512_permutexvar_epi8(shuffle_splitting, str);
540 str = _mm512_multishift_epi64_epi8(multi_shifts, str);
543 str = _mm512_permutexvar_epi8(str, ascii_lookup);
546 _mm512_storeu_si512((__m512i *)o, str);
552 o = php_base64_encode_impl(c, length, o,
flags);
559zend_string *php_base64_decode_ex_avx512_vbmi(
const unsigned char *str,
size_t length,
bool strict)
561 const unsigned char *c = str;
566 result = zend_string_alloc(length, 0);
569 const __m512i lookup_0 = _mm512_setr_epi32(
570 0x80808080, 0x80808080, 0x80808080, 0x80808080, 0x80808080, 0x80808080,
571 0x80808080, 0x80808080, 0x80808080, 0x80808080, 0x3e808080, 0x3f808080,
572 0x37363534, 0x3b3a3938, 0x80803d3c, 0x80808080);
573 const __m512i lookup_1 = _mm512_setr_epi32(
574 0x02010080, 0x06050403, 0x0a090807, 0x0e0d0c0b, 0x1211100f, 0x16151413,
575 0x80191817, 0x80808080, 0x1c1b1a80, 0x201f1e1d, 0x24232221, 0x28272625,
576 0x2c2b2a29, 0x302f2e2d, 0x80333231, 0x80808080);
578 const __m512i merge_mask1 = _mm512_set1_epi32(0x01400140);
579 const __m512i merge_mask2 = _mm512_set1_epi32(0x00011000);
581 const __m512i continuous_mask = _mm512_setr_epi32(
582 0x06000102, 0x090a0405, 0x0c0d0e08, 0x16101112, 0x191a1415, 0x1c1d1e18,
583 0x26202122, 0x292a2425, 0x2c2d2e28, 0x36303132, 0x393a3435, 0x3c3d3e38,
584 0x00000000, 0x00000000, 0x00000000, 0x00000000);
586 while (length > 64) {
588 const __m512i input = _mm512_loadu_si512((__m512i *)c);
591 __m512i str = _mm512_permutex2var_epi8(lookup_0, input, lookup_1);
592 const uint64_t mask = _mm512_movepi8_mask(_mm512_or_epi64(str, input));
598 const __m512i merge_ab_and_bc = _mm512_maddubs_epi16(str, merge_mask1);
599 str = _mm512_madd_epi16(merge_ab_and_bc, merge_mask2);
602 str = _mm512_permutexvar_epi8(continuous_mask, str);
605 _mm512_storeu_si512((__m512i *)o, str);
613 if (!php_base64_decode_impl(c, length, (
unsigned char*)
ZSTR_VAL(
result), &outl, strict)) {
614 zend_string_efree(
result);
624#if defined(BASE64_INTRIN_AVX512_FUNC_PROTO) || defined(BASE64_INTRIN_AVX512_FUNC_PTR)
627 const unsigned char *c = str;
631 result = zend_string_safe_alloc(((length + 2) / 3), 4 *
sizeof(
char), 0, 0);
634 while (length > 63) {
637 __m512i str = _mm512_loadu_si512((
const __m512i *)c);
641 str = _mm512_permutexvar_epi32(
642 _mm512_set_epi32(-1, 11, 10, 9, -1, 8, 7, 6, -1, 5, 4, 3, -1, 2, 1, 0), str);
644 str = _mm512_shuffle_epi8(str, _mm512_set4_epi32(0x0a0b090a, 0x07080607, 0x04050304, 0x01020001));
649 const __m512i t0 = _mm512_and_si512(str, _mm512_set1_epi32(0x0fc0fc00));
651 const __m512i
t1 = _mm512_srlv_epi16(t0, _mm512_set1_epi32(0x0006000a));
653 const __m512i
t2 = _mm512_sllv_epi16(str, _mm512_set1_epi32(0x00080004));
655 str = _mm512_ternarylogic_epi32(_mm512_set1_epi32(0x3f003f00),
t2,
t1, 0xca);
658 __m512i
result = _mm512_subs_epu8(str, _mm512_set1_epi8(51));
659 const __mmask64 less = _mm512_cmpgt_epi8_mask(_mm512_set1_epi8(26), str);
660 result = _mm512_mask_mov_epi8(
result, less, _mm512_set1_epi8(13));
661 const __m512i lut = _mm512_set4_epi32(0x000041f0, 0xedfcfcfc, 0xfcfcfcfc, 0xfcfcfc47);
666 _mm512_storeu_si512((__m512i *)o,
result);
672 o = php_base64_encode_impl(c, length, o,
flags);
679#define build_dword(b0, b1, b2, b3) \
680 ((uint32_t)(uint8_t)b0 << 0) | ((uint32_t)(uint8_t)b1 << 8) | \
681 ((uint32_t)(uint8_t)b2 << 16) | ((uint32_t)(uint8_t)b3 << 24)
683#define _mm512_set4lanes_epi8(b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, b10, b11, b12, b13, b14, b15) \
684 _mm512_setr4_epi32(build_dword(b0, b1, b2, b3), build_dword(b4, b5, b6, b7), \
685 build_dword(b8, b9, b10, b11), build_dword(b12, b13, b14, b15))
687zend_string *php_base64_decode_ex_avx512(
const unsigned char *str,
size_t length,
bool strict)
689 const unsigned char *c = str;
694 result = zend_string_alloc(length, 0);
697 while (length > 64) {
699 __m512i str = _mm512_loadu_si512((__m512i *)c);
702 const __m512i higher_nibble = _mm512_and_si512(_mm512_srli_epi32(str, 4), _mm512_set1_epi8(0x0f));
703 const __m512i lower_nibble = _mm512_and_si512(str, _mm512_set1_epi8(0x0f));
704 const __m512i shiftLUT = _mm512_set4lanes_epi8(
705 0, 0, 19, 4, -65, -65, -71, -71, 0, 0, 0, 0, 0, 0, 0, 0);
706 const __m512i maskLUT = _mm512_set4lanes_epi8(
708 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8,
713 const __m512i bitposLUT = _mm512_set4lanes_epi8(
714 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80,
715 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
716 const __m512i
M = _mm512_shuffle_epi8(maskLUT, lower_nibble);
717 const __m512i bit = _mm512_shuffle_epi8(bitposLUT, higher_nibble);
718 const uint64_t
match = _mm512_test_epi8_mask(
M, bit);
719 if (
match != (uint64_t)-1) {
722 const __m512i sh = _mm512_shuffle_epi8(shiftLUT, higher_nibble);
723 const __mmask64 eq_2f = _mm512_cmpeq_epi8_mask(str, _mm512_set1_epi8(0x2f));
724 const __m512i shift = _mm512_mask_mov_epi8(sh, eq_2f, _mm512_set1_epi8(16));
725 str = _mm512_add_epi8(str, shift);
728 const __m512i merge_ab_and_bc = _mm512_maddubs_epi16(str, _mm512_set1_epi32(0x01400140));
729 str = _mm512_madd_epi16(merge_ab_and_bc, _mm512_set1_epi32(0x00011000));
732 const __m512i
t1 = _mm512_shuffle_epi8(str,
733 _mm512_set4lanes_epi8(2, 1, 0, 6, 5, 4, 10, 9, 8, 14, 13, 12, -1, -1, -1, -1));
734 const __m512i s6 = _mm512_setr_epi32(0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14, 0, 0, 0, 0);
735 const __m512i
t2 = _mm512_permutexvar_epi32(s6,
t1);
738 _mm512_storeu_si512((__m512i *)o,
t2);
746 if (!php_base64_decode_impl(c, length, (
unsigned char*)
ZSTR_VAL(
result), &outl, strict)) {
747 zend_string_efree(
result);
757#if defined(ZEND_INTRIN_AVX2_NATIVE) || defined(ZEND_INTRIN_AVX2_RESOLVER)
758# if defined(ZEND_INTRIN_AVX2_RESOLVER) && defined(HAVE_FUNC_ATTRIBUTE_TARGET)
759static __m256i php_base64_encode_avx2_reshuffle(__m256i in)
__attribute__((target(
"avx2")));
760static __m256i php_base64_encode_avx2_translate(__m256i in)
__attribute__((target(
"avx2")));
762static __m256i php_base64_encode_avx2_reshuffle(__m256i in)
771 in = _mm256_shuffle_epi8(in, _mm256_set_epi8(
782 t0 = _mm256_and_si256(in, _mm256_set1_epi32(0x0fc0fc00));
784 t1 = _mm256_mulhi_epu16(t0, _mm256_set1_epi32(0x04000040));
786 t2 = _mm256_and_si256(in, _mm256_set1_epi32(0x003f03f0));
788 t3 = _mm256_mullo_epi16(
t2, _mm256_set1_epi32(0x01000010));
790 return _mm256_or_si256(
t1,
t3);
801static __m256i php_base64_encode_avx2_translate(__m256i in)
803 __m256i lut, indices, mask;
805 lut = _mm256_setr_epi8(
806 65, 71, -4, -4, -4, -4, -4, -4,
807 -4, -4, -4, -4, -19, -16, 0, 0,
808 65, 71, -4, -4, -4, -4, -4, -4,
809 -4, -4, -4, -4, -19, -16, 0, 0);
811 indices = _mm256_subs_epu8(in, _mm256_set1_epi8(51));
813 mask = _mm256_cmpgt_epi8(in, _mm256_set1_epi8(25));
815 indices = _mm256_sub_epi8(indices, mask);
817 return _mm256_add_epi8(in, _mm256_shuffle_epi8(lut, indices));
822#if defined(ZEND_INTRIN_SSSE3_NATIVE) || defined(ZEND_INTRIN_SSSE3_RESOLVER)
824# if defined(ZEND_INTRIN_SSSE3_RESOLVER) && defined(HAVE_FUNC_ATTRIBUTE_TARGET)
825static __m128i php_base64_encode_ssse3_reshuffle(__m128i in)
__attribute__((target(
"ssse3")));
826static __m128i php_base64_encode_ssse3_translate(__m128i in)
__attribute__((target(
"ssse3")));
829static __m128i php_base64_encode_ssse3_reshuffle(__m128i in)
835 in = _mm_shuffle_epi8(in, _mm_set_epi8(
841 t0 = _mm_and_si128(in, _mm_set1_epi32(0x0fc0fc00));
843 t1 = _mm_mulhi_epu16(t0, _mm_set1_epi32(0x04000040));
845 t2 = _mm_and_si128(in, _mm_set1_epi32(0x003f03f0));
847 t3 = _mm_mullo_epi16(
t2, _mm_set1_epi32(0x01000010));
854 return _mm_or_si128(
t1,
t3);
857static __m128i php_base64_encode_ssse3_translate(__m128i in)
859 __m128i mask, indices;
860 __m128i lut = _mm_setr_epi8(
877 indices = _mm_subs_epu8(in, _mm_set1_epi8(51));
880 mask = _mm_cmpgt_epi8(in, _mm_set1_epi8(25));
883 indices = _mm_sub_epi8(indices, mask);
886 return _mm_add_epi8(in, _mm_shuffle_epi8(lut, indices));
889#define PHP_BASE64_ENCODE_SSSE3_LOOP \
890 while (length > 15) { \
891 __m128i s = _mm_loadu_si128((__m128i *)c); \
893 s = php_base64_encode_ssse3_reshuffle(s); \
895 s = php_base64_encode_ssse3_translate(s); \
897 _mm_storeu_si128((__m128i *)o, s); \
905#if defined(ZEND_INTRIN_AVX2_NATIVE) || defined(ZEND_INTRIN_AVX2_RESOLVER) || defined(ZEND_INTRIN_SSSE3_NATIVE) || defined(ZEND_INTRIN_SSSE3_RESOLVER)
906# if defined(ZEND_INTRIN_AVX2_NATIVE) || defined(ZEND_INTRIN_SSSE3_NATIVE)
908# elif defined(ZEND_INTRIN_AVX2_RESOLVER)
914 const unsigned char *c = str;
918 result = zend_string_safe_alloc(((length + 2) / 3), 4 *
sizeof(
char), 0, 0);
920# if defined(ZEND_INTRIN_AVX2_NATIVE) || defined(ZEND_INTRIN_AVX2_RESOLVER)
922 __m256i
s = _mm256_loadu_si256((__m256i *)c);
924 s = _mm256_permutevar8x32_epi32(
s, _mm256_setr_epi32(0, 0, 1, 2, 3, 4, 5, 6));
927 s = php_base64_encode_avx2_reshuffle(
s);
929 s = php_base64_encode_avx2_translate(
s);
931 _mm256_storeu_si256((__m256i *)o,
s);
938 s = _mm256_loadu_si256((__m256i *)(c - 4));
942 PHP_BASE64_ENCODE_SSSE3_LOOP;
945 o = php_base64_encode_impl(c, length, o,
flags);
952# if defined(ZEND_INTRIN_SSSE3_RESOLVER) && defined(ZEND_INTRIN_AVX2_RESOLVER)
955 const unsigned char *c = str;
959 result = zend_string_safe_alloc(((length + 2) / 3), 4 *
sizeof(
char), 0, 0);
962 PHP_BASE64_ENCODE_SSSE3_LOOP;
964 o = php_base64_encode_impl(c, length, o,
flags);
975#if defined(ZEND_INTRIN_AVX2_NATIVE) || defined(ZEND_INTRIN_AVX2_RESOLVER)
976# if defined(ZEND_INTRIN_AVX2_RESOLVER) && defined(HAVE_FUNC_ATTRIBUTE_TARGET)
977static __m256i php_base64_decode_avx2_reshuffle(__m256i in)
__attribute__((target(
"avx2")));
980static __m256i php_base64_decode_avx2_reshuffle(__m256i in)
982 __m256i merge_ab_and_bc,
out;
984 merge_ab_and_bc = _mm256_maddubs_epi16(in, _mm256_set1_epi32(0x01400140));
986 out = _mm256_madd_epi16(merge_ab_and_bc, _mm256_set1_epi32(0x00011000));
988 out = _mm256_shuffle_epi8(
out, _mm256_setr_epi8(
989 2, 1, 0, 6, 5, 4, 10, 9, 8, 14, 13, 12, -1, -1, -1, -1,
990 2, 1, 0, 6, 5, 4, 10, 9, 8, 14, 13, 12, -1, -1, -1, -1));
992 return _mm256_permutevar8x32_epi32(
out, _mm256_setr_epi32(0, 1, 2, 4, 5, 6, -1, -1));
996#if defined(ZEND_INTRIN_SSSE3_NATIVE) || defined(ZEND_INTRIN_SSSE3_RESOLVER)
997# if defined(ZEND_INTRIN_SSSE3_RESOLVER) && defined(HAVE_FUNC_ATTRIBUTE_TARGET)
998static __m128i php_base64_decode_ssse3_reshuffle(__m128i in)
__attribute__((target(
"ssse3")));
1001static __m128i php_base64_decode_ssse3_reshuffle(__m128i in)
1003 __m128i merge_ab_and_bc,
out;
1005 merge_ab_and_bc = _mm_maddubs_epi16(in, _mm_set1_epi32(0x01400140));
1011 out = _mm_madd_epi16(merge_ab_and_bc, _mm_set1_epi32(0x00011000));
1017 return _mm_shuffle_epi8(
out, _mm_setr_epi8(
1029#define PHP_BASE64_DECODE_SSSE3_LOOP \
1030 while (length > 15 + 6 + 2) { \
1031 __m128i lut_lo, lut_hi, lut_roll; \
1032 __m128i hi_nibbles, lo_nibbles, hi, lo; \
1033 __m128i s = _mm_loadu_si128((__m128i *)c); \
1035 lut_lo = _mm_setr_epi8( \
1036 0x15, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, \
1037 0x11, 0x11, 0x13, 0x1A, 0x1B, 0x1B, 0x1B, 0x1A); \
1038 lut_hi = _mm_setr_epi8( \
1039 0x10, 0x10, 0x01, 0x02, 0x04, 0x08, 0x04, 0x08, \
1040 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10); \
1041 lut_roll = _mm_setr_epi8( \
1042 0, 16, 19, 4, -65, -65, -71, -71, \
1043 0, 0, 0, 0, 0, 0, 0, 0); \
1045 hi_nibbles = _mm_and_si128( \
1046 _mm_srli_epi32(s, 4), _mm_set1_epi8(0x2f)); \
1047 lo_nibbles = _mm_and_si128(s, _mm_set1_epi8(0x2f)); \
1048 hi = _mm_shuffle_epi8(lut_hi, hi_nibbles); \
1049 lo = _mm_shuffle_epi8(lut_lo, lo_nibbles); \
1053 _mm_movemask_epi8( \
1055 _mm_and_si128(lo, hi), _mm_set1_epi8(0))))) { \
1058 __m128i eq_2f, roll; \
1060 eq_2f = _mm_cmpeq_epi8(s, _mm_set1_epi8(0x2f)); \
1061 roll = _mm_shuffle_epi8( \
1062 lut_roll, _mm_add_epi8(eq_2f, hi_nibbles)); \
1064 s = _mm_add_epi8(s, roll); \
1065 s = php_base64_decode_ssse3_reshuffle(s); \
1067 _mm_storeu_si128((__m128i *)o, s); \
1078#if defined(ZEND_INTRIN_AVX2_NATIVE) || defined(ZEND_INTRIN_AVX2_RESOLVER) || defined(ZEND_INTRIN_SSSE3_NATIVE) || defined(ZEND_INTRIN_SSSE3_RESOLVER)
1079# if defined(ZEND_INTRIN_AVX2_NATIVE) || defined(ZEND_INTRIN_SSSE3_NATIVE)
1081# elif defined(ZEND_INTRIN_AVX2_RESOLVER)
1082zend_string *php_base64_decode_ex_avx2(
const unsigned char *str,
size_t length,
bool strict)
1084zend_string *php_base64_decode_ex_ssse3(
const unsigned char *str,
size_t length,
bool strict)
1087 const unsigned char *c = str;
1092 result = zend_string_alloc(length, 0);
1097# if defined(ZEND_INTRIN_AVX2_NATIVE) || defined(ZEND_INTRIN_AVX2_RESOLVER)
1098 while (length > 31 + 11 + 2) {
1099 __m256i lut_lo, lut_hi, lut_roll;
1100 __m256i hi_nibbles, lo_nibbles, hi, lo;
1101 __m256i str = _mm256_loadu_si256((__m256i *)c);
1103 lut_lo = _mm256_setr_epi8(
1104 0x15, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11,
1105 0x11, 0x11, 0x13, 0x1A, 0x1B, 0x1B, 0x1B, 0x1A,
1106 0x15, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11,
1107 0x11, 0x11, 0x13, 0x1A, 0x1B, 0x1B, 0x1B, 0x1A);
1109 lut_hi = _mm256_setr_epi8(
1110 0x10, 0x10, 0x01, 0x02, 0x04, 0x08, 0x04, 0x08,
1111 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
1112 0x10, 0x10, 0x01, 0x02, 0x04, 0x08, 0x04, 0x08,
1113 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10);
1115 lut_roll = _mm256_setr_epi8(
1116 0, 16, 19, 4, -65, -65, -71, -71,
1117 0, 0, 0, 0, 0, 0, 0, 0,
1118 0, 16, 19, 4, -65, -65, -71, -71,
1119 0, 0, 0, 0, 0, 0, 0, 0);
1121 hi_nibbles = _mm256_and_si256(_mm256_srli_epi32(str, 4), _mm256_set1_epi8(0x2f));
1122 lo_nibbles = _mm256_and_si256(str, _mm256_set1_epi8(0x2f));
1123 hi = _mm256_shuffle_epi8(lut_hi, hi_nibbles);
1124 lo = _mm256_shuffle_epi8(lut_lo, lo_nibbles);
1126 if (!_mm256_testz_si256(lo, hi)) {
1129 __m256i eq_2f, roll;
1130 eq_2f = _mm256_cmpeq_epi8(str, _mm256_set1_epi8(0x2f));
1131 roll = _mm256_shuffle_epi8(lut_roll, _mm256_add_epi8(eq_2f, hi_nibbles));
1134 str = _mm256_add_epi8(str, roll);
1136 str = php_base64_decode_avx2_reshuffle(str);
1138 _mm256_storeu_si256((__m256i *)o, str);
1147 PHP_BASE64_DECODE_SSSE3_LOOP;
1150 if (!php_base64_decode_impl(c, length, (
unsigned char*)
ZSTR_VAL(
result), &outl, strict)) {
1151 zend_string_efree(
result);
1160# if defined(ZEND_INTRIN_SSSE3_RESOLVER) && defined(ZEND_INTRIN_AVX2_RESOLVER)
1161zend_string *php_base64_decode_ex_ssse3(
const unsigned char *str,
size_t length,
bool strict)
1163 const unsigned char *c = str;
1168 result = zend_string_alloc(length, 0);
1171 PHP_BASE64_DECODE_SSSE3_LOOP;
1173 if (!php_base64_decode_impl(c, length, (
unsigned char*)
ZSTR_VAL(
result), &outl, strict)) {
1174 zend_string_efree(
result);
1185#if !defined(ZEND_INTRIN_AVX2_NATIVE) && !defined(ZEND_INTRIN_SSSE3_NATIVE)
1186#if defined(ZEND_INTRIN_AVX2_RESOLVER) || defined(ZEND_INTRIN_SSSE3_RESOLVER)
1195 result = zend_string_safe_alloc(((length + 2) / 3), 4 *
sizeof(
char), 0, 0);
1198 p = php_base64_encode_impl(str, length,
p,
flags);
1206#if !defined(ZEND_INTRIN_AVX2_NATIVE) && !defined(ZEND_INTRIN_SSSE3_NATIVE)
1207#if defined(ZEND_INTRIN_AVX2_RESOLVER) || defined(ZEND_INTRIN_SSSE3_RESOLVER)
1208zend_string *php_base64_decode_ex_default(
const unsigned char *str,
size_t length,
bool strict)
1216 result = zend_string_alloc(length, 0);
1218 if (!php_base64_decode_impl(str, length, (
unsigned char*)
ZSTR_VAL(
result), &outl, strict)) {
1219 zend_string_efree(
result);
1241 result = php_base64_encode((
unsigned char*)str, str_len);
PHPAPI zend_string * php_base64_encode_ex(const unsigned char *str, size_t length, zend_long flags)
PHPAPI zend_string * php_base64_decode_ex(const unsigned char *str, size_t length, bool strict)
#define PHP_BASE64_NO_PADDING
base64_encode(string $string)
base64_decode(string $string, bool $strict=false)
#define PHP_MINIT_FUNCTION
file_private int match(struct magic_set *, struct magic *, size_t, const struct buffer *, size_t, int, int, int, uint16_t *, uint16_t *, int *, int *, int *, int *, int *)
#define ZEND_PARSE_PARAMETERS_END()
#define Z_PARAM_STRING(dest, dest_len)
#define ZEND_PARSE_PARAMETERS_START(min_num_args, max_num_args)
#define Z_PARAM_BOOL(dest)
#define ZEND_NO_SANITIZE_ADDRESS
struct _zend_string zend_string
#define ZEND_INTRIN_SSSE3_FUNC_DECL(func)
#define ZEND_INTRIN_AVX2_FUNC_DECL(func)
#define ZEND_ATTRIBUTE_UNUSED
#define zend_always_inline
#define ZEND_INTRIN_AVX512_FUNC_DECL(func)
#define ZEND_INTRIN_AVX512_VBMI_FUNC_DECL(func)