php-internal-docs 8.4.8
Unofficial docs for php/php-src
Loading...
Searching...
No Matches
convert.c
Go to the documentation of this file.
1/*
2 +----------------------------------------------------------------------+
3 | Copyright (c) The PHP Group |
4 +----------------------------------------------------------------------+
5 | This source file is subject to version 3.01 of the PHP license, |
6 | that is bundled with this package in the file LICENSE, and is |
7 | available through the world-wide-web at the following url: |
8 | https://www.php.net/license/3_01.txt |
9 | If you did not receive a copy of the PHP license and are unable to |
10 | obtain it through the world-wide-web, please send a note to |
11 | license@php.net so we can mail you a copy immediately. |
12 +----------------------------------------------------------------------+
13 | Authors: Niels Dossche <nielsdos@php.net> |
14 +----------------------------------------------------------------------+
15*/
16
17#include "bcmath.h"
18#include "convert.h"
19#include "private.h"
20#ifdef __SSE2__
21# include <emmintrin.h>
22#endif
23
24char *bc_copy_and_toggle_bcd(char *restrict dest, const char *source, const char *source_end)
25{
26 const size_t bulk_shift = SWAR_REPEAT('0');
27
28#ifdef __SSE2__
29 /* SIMD SSE2 bulk shift + copy */
30 __m128i shift_vector = _mm_set1_epi8('0');
31 while (source + sizeof(__m128i) <= source_end) {
32 __m128i bytes = _mm_loadu_si128((const __m128i *) source);
33 bytes = _mm_xor_si128(bytes, shift_vector);
34 _mm_storeu_si128((__m128i *) dest, bytes);
35
36 source += sizeof(__m128i);
37 dest += sizeof(__m128i);
38 }
39#endif
40
41 /* Handle sizeof(size_t) (i.e. 4/8) bytes at once.
42 * We know that adding/subtracting an individual byte cannot overflow,
43 * so it is possible to add/subtract an entire word of bytes at once
44 * by using SWAR_REPEAT. */
45 while (source + sizeof(size_t) <= source_end) {
46 size_t bytes;
47 memcpy(&bytes, source, sizeof(bytes));
48
49 bytes ^= bulk_shift;
50 memcpy(dest, &bytes, sizeof(bytes));
51
52 source += sizeof(size_t);
53 dest += sizeof(size_t);
54 }
55
56 while (source < source_end) {
57 *dest = *source ^ '0';
58 dest++;
59 source++;
60 }
61
62 return dest;
63}
64
65/* This is based on the technique described in https://kholdstare.github.io/technical/2020/05/26/faster-integer-parsing.html.
66 * This function transforms AABBCCDD into 1000 * AA + 100 * BB + 10 * CC + DD,
67 * with the caveat that all components must be in the interval [0, 25] to prevent overflow
68 * due to the multiplication by power of 10 (10 * 25 = 250 is the largest number that fits in a byte).
69 * The advantage of this method instead of using shifts + 3 multiplications is that this is cheaper
70 * due to its divide-and-conquer nature.
71 */
72#if SIZEOF_SIZE_T == 4
73BC_VECTOR bc_parse_chunk_chars(const char *str)
74{
75 BC_VECTOR tmp;
76 memcpy(&tmp, str, sizeof(tmp));
77#if !BC_LITTLE_ENDIAN
78 tmp = BC_BSWAP(tmp);
79#endif
80
81 BC_VECTOR lower_digits = (tmp & 0x0f000f00) >> 8;
82 BC_VECTOR upper_digits = (tmp & 0x000f000f) * 10;
83
84 tmp = lower_digits + upper_digits;
85
86 lower_digits = (tmp & 0x00ff0000) >> 16;
87 upper_digits = (tmp & 0x000000ff) * 100;
88
89 return lower_digits + upper_digits;
90}
91#elif SIZEOF_SIZE_T == 8
92BC_VECTOR bc_parse_chunk_chars(const char *str)
93{
94 BC_VECTOR tmp;
95 memcpy(&tmp, str, sizeof(tmp));
96#if !BC_LITTLE_ENDIAN
97 tmp = BC_BSWAP(tmp);
98#endif
99
100 BC_VECTOR lower_digits = (tmp & 0x0f000f000f000f00) >> 8;
101 BC_VECTOR upper_digits = (tmp & 0x000f000f000f000f) * 10;
102
103 tmp = lower_digits + upper_digits;
104
105 lower_digits = (tmp & 0x00ff000000ff0000) >> 16;
106 upper_digits = (tmp & 0x000000ff000000ff) * 100;
107
108 tmp = lower_digits + upper_digits;
109
110 lower_digits = (tmp & 0x0000ffff00000000) >> 32;
111 upper_digits = (tmp & 0x000000000000ffff) * 10000;
112
113 return lower_digits + upper_digits;
114}
115#endif
116
117#if BC_LITTLE_ENDIAN
118# define BC_ENCODE_LUT(A, B) ((A) | (B) << 4)
119#else
120# define BC_ENCODE_LUT(A, B) ((B) | (A) << 4)
121#endif
122
123#define LUT_ITERATE(_, A) \
124 _(A, 0), _(A, 1), _(A, 2), _(A, 3), _(A, 4), _(A, 5), _(A, 6), _(A, 7), _(A, 8), _(A, 9)
125
126/* This LUT encodes the decimal representation of numbers 0-100
127 * such that we can avoid taking modulos and divisions which would be slow. */
128static const unsigned char LUT[100] = {
139};
140
141static inline unsigned short bc_expand_lut(unsigned char c)
142{
143 return (c & 0x0f) | (c & 0xf0) << 4;
144}
145
146/* Writes the character representation of the number encoded in value.
147 * E.g. if value = 1234, then the string "1234" will be written to str. */
148void bc_write_bcd_representation(uint32_t value, char *str)
149{
150 uint32_t upper = value / 100; /* e.g. 12 */
151 uint32_t lower = value % 100; /* e.g. 34 */
152
153#if BC_LITTLE_ENDIAN
154 /* Note: little endian, so `lower` comes before `upper`! */
155 uint32_t digits = bc_expand_lut(LUT[lower]) << 16 | bc_expand_lut(LUT[upper]);
156#else
157 /* Note: big endian, so `upper` comes before `lower`! */
158 uint32_t digits = bc_expand_lut(LUT[upper]) << 16 | bc_expand_lut(LUT[lower]);
159#endif
160 memcpy(str, &digits, sizeof(digits));
161}
void bc_write_bcd_representation(uint32_t value, char *str)
Definition convert.c:148
char * bc_copy_and_toggle_bcd(char *restrict dest, const char *source, const char *source_end)
Definition convert.c:24
#define LUT_ITERATE(_, A)
Definition convert.c:123
#define BC_ENCODE_LUT(A, B)
Definition convert.c:118
BC_VECTOR bc_parse_chunk_chars(const char *str)
memcpy(ptr1, ptr2, size)
uint32_t BC_VECTOR
Definition private.h:52
#define BC_BSWAP(u)
Definition private.h:51
#define SWAR_REPEAT(x)
Definition private.h:42
value