42#if !(defined SUPPORT_VALGRIND)
44#if ((defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) \
45 || (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X) \
46 || (defined SLJIT_CONFIG_LOONGARCH_64 && SLJIT_CONFIG_LOONGARCH_64))
49 vector_compare_match1,
50 vector_compare_match1i,
51 vector_compare_match2,
54#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86)
57#if PCRE2_CODE_UNIT_WIDTH == 8
61#elif PCRE2_CODE_UNIT_WIDTH == 16
65#elif PCRE2_CODE_UNIT_WIDTH == 32
70#error "Unsupported unit width"
76#if PCRE2_CODE_UNIT_WIDTH == 8
78#elif PCRE2_CODE_UNIT_WIDTH == 16
80#elif PCRE2_CODE_UNIT_WIDTH == 32
83#error "Unsupported unit width"
88#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
91#if PCRE2_CODE_UNIT_WIDTH == 8
94#elif PCRE2_CODE_UNIT_WIDTH == 16
98#error "Unknown code width"
105#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86)
110#if PCRE2_CODE_UNIT_WIDTH == 8
111#define SIMD_COMPARE_TYPE_INDEX 0
113#elif PCRE2_CODE_UNIT_WIDTH == 16
114#define SIMD_COMPARE_TYPE_INDEX 1
116#elif PCRE2_CODE_UNIT_WIDTH == 32
117#define SIMD_COMPARE_TYPE_INDEX 2
120#error "Unsupported unit width"
124static void fast_forward_char_pair_sse2_compare(
struct sljit_compiler *compiler, vector_compare_type compare_type,
131 instruction[0] = 0x66;
132 instruction[1] = 0x0f;
137 instruction[0] = 0xc5;
138 instruction[1] = 0xfd;
143if (compare_type != vector_compare_match2)
147 if (compare_type == vector_compare_match1i)
151 instruction[1] ^= (dst_ind << 3);
154 instruction[2] = 0xeb;
155 instruction[3] = 0xc0 | (dst_ind << 3) | cmp2_ind;
166 instruction[1] ^= (dst_ind << 3);
169 instruction[2] = 0x74 + SIMD_COMPARE_TYPE_INDEX;
170 instruction[3] = 0xc0 | (dst_ind << 3) | cmp1_ind;
183 instruction[1] ^= (dst_ind << 3);
194 instruction[2] = 0x6f;
195 instruction[3] = 0xc0 | (tmp_ind << 3) | dst_ind;
202 instruction[1] ^= (dst_ind << 3);
205 instruction[2] = 0x74 + SIMD_COMPARE_TYPE_INDEX;
206 instruction[3] = 0xc0 | (dst_ind << 3) | cmp1_ind;
213 instruction[2] = 0x74 + SIMD_COMPARE_TYPE_INDEX;
214 instruction[3] = 0xc0 | (tmp_ind << 3) | cmp2_ind;
221 instruction[1] ^= (dst_ind << 3);
224 instruction[2] = 0xeb;
225 instruction[3] = 0xc0 | (dst_ind << 3) | tmp_ind;
231#define JIT_HAS_FAST_FORWARD_CHAR_SIMD (sljit_has_cpu_feature(SLJIT_HAS_SIMD))
242#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
247vector_compare_type compare_type = vector_compare_match1;
261 compare_type = vector_compare_match1i;
263 if (!is_powerof2(bit))
266 compare_type = vector_compare_match2;
272 add_jump(compiler, &common->failed_match, partial_quit[0]);
288#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
299for (i = 0; i < 4; i++)
300 fast_forward_char_pair_sse2_compare(compiler, compare_type, reg_type, i, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
303OP2(
SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
308OP2(
SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
318 add_jump(compiler, &common->failed_match, partial_quit[1]);
322for (i = 0; i < 4; i++)
323 fast_forward_char_pair_sse2_compare(compiler, compare_type, reg_type, i, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
332instruction[0] = 0x0f;
333instruction[1] = 0xbc;
334instruction[2] = 0xc0 | (tmp1_reg_ind << 3) | tmp1_reg_ind;
337OP2(
SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
341 JUMPHERE(partial_quit[0]);
342 JUMPHERE(partial_quit[1]);
349#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
350if (common->utf &&
offset > 0)
356 quit = jump_if_utf_char_start(compiler, TMP1);
368#define JIT_HAS_FAST_REQUESTED_CHAR_SIMD (sljit_has_cpu_feature(SLJIT_HAS_SIMD))
380jump_list *not_found =
NULL;
381vector_compare_type compare_type = vector_compare_match1;
393 compare_type = vector_compare_match1i;
395 if (!is_powerof2(bit))
398 compare_type = vector_compare_match2;
428for (i = 0; i < 4; i++)
429 fast_forward_char_pair_sse2_compare(compiler, compare_type, reg_type, i, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
432OP2(
SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
437OP2(
SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
450for (i = 0; i < 4; i++)
451 fast_forward_char_pair_sse2_compare(compiler, compare_type, reg_type, i, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
460instruction[0] = 0x0f;
461instruction[1] = 0xbc;
462instruction[2] = 0xc0 | (tmp1_reg_ind << 3) | tmp1_reg_ind;
465OP2(
SLJIT_ADD, TMP1, 0, TMP1, 0, STR_PTR, 0);
474#define JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD (sljit_has_cpu_feature(SLJIT_HAS_SIMD))
476static void fast_forward_char_pair_simd(compiler_common *common,
sljit_s32 offs1,
485vector_compare_type compare1_type = vector_compare_match1;
486vector_compare_type compare2_type = vector_compare_match1;
489sljit_u32 diff = IN_UCHARS(offs1 - offs2);
500#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
507SLJIT_ASSERT(diff <= (
unsigned)IN_UCHARS(max_fast_forward_char_pair_offset()));
510if (common->match_end_ptr != 0)
517 SELECT(
SLJIT_LESS, STR_END, TMP1, 0, STR_END);
527 bit1 = char1a ^ char1b;
528 if (is_powerof2(bit1))
530 compare1_type = vector_compare_match1i;
536 compare1_type = vector_compare_match2;
553 bit2 = char2a ^ char2b;
554 if (is_powerof2(bit2))
556 compare2_type = vector_compare_match1i;
562 compare2_type = vector_compare_match2;
582#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
606 instruction[0] = 0xc5;
607 instruction[1] = (
sljit_u8)(0xf9 ^ (data2_ind << 3));
608 instruction[2] = 0x73;
609 instruction[3] = 0xc0 | (7 << 3) | data1_ind;
610 instruction[4] = diff & 0xf;
614 instruction[0] = 0xc4;
615 instruction[1] = 0xe3;
621 instruction[2] = (
sljit_u8)(0x7d ^ (data2_ind << 3));
622 instruction[3] = 0x38;
625 instruction[5] = (
sljit_u8)(16 - diff);
634 value = (diff == 16) ? data1_ind : data2_ind;
636 instruction[3] = 0x46;
637 instruction[4] = 0xc0 | (data2_ind << 3) |
value;
638 instruction[5] = 0x08;
645 instruction[0] = 0x66;
646 instruction[1] = 0x0f;
647 instruction[2] = 0x6f;
648 instruction[3] = 0xc0 | (data2_ind << 3) | data1_ind;
654 instruction[2] = 0x73;
655 instruction[3] = 0xc0 | (7 << 3) | data2_ind;
656 instruction[4] = diff;
665for (i = 0; i < 4; i++)
667 fast_forward_char_pair_sse2_compare(compiler, compare2_type, reg_type, i, data2_ind, cmp2a_ind, cmp2b_ind, tmp2_ind);
668 fast_forward_char_pair_sse2_compare(compiler, compare1_type, reg_type, i, data1_ind, cmp1a_ind, cmp1b_ind, tmp1_ind);
675OP2(
SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
680OP2(
SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
693for (i = 0; i < 4; i++)
695 fast_forward_char_pair_sse2_compare(compiler, compare1_type, reg_type, i, data1_ind, cmp1a_ind, cmp1b_ind, tmp2_ind);
696 fast_forward_char_pair_sse2_compare(compiler, compare2_type, reg_type, i, data2_ind, cmp2a_ind, cmp2b_ind, tmp1_ind);
708instruction[0] = 0x0f;
709instruction[1] = 0xbc;
710instruction[2] = 0xc0 | (tmp1_reg_ind << 3) | tmp1_reg_ind;
713OP2(
SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
717#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
720 OP1(MOV_UCHAR, TMP1, 0,
SLJIT_MEM1(STR_PTR), IN_UCHARS(-offs1));
722 jump[0] = jump_if_utf_char_start(compiler, TMP1);
725 CMPTO(
SLJIT_LESS, STR_PTR, 0, STR_END, 0, restart);
727 add_jump(compiler, &common->failed_match, JUMP(
SLJIT_JUMP));
735if (common->match_end_ptr != 0)
741#undef SIMD_COMPARE_TYPE_INDEX
745#if (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64 && (defined __ARM_NEON || defined __ARM_NEON__))
751 struct {
unsigned char c1, c2, c3, c4; } c;
754#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
757#if PCRE2_CODE_UNIT_WIDTH == 8
758return (*
s & 0xc0) == 0x80;
759#elif PCRE2_CODE_UNIT_WIDTH == 16
760return (*
s & 0xfc00) == 0xdc00;
762#error "Unknown code width"
767#if PCRE2_CODE_UNIT_WIDTH == 8
768# define VECTOR_FACTOR 16
769# define vect_t uint8x16_t
770# define VLD1Q(X) vld1q_u8((sljit_u8 *)(X))
771# define VCEQQ vceqq_u8
772# define VORRQ vorrq_u8
773# define VST1Q vst1q_u8
774# define VDUPQ vdupq_n_u8
775# define VEXTQ vextq_u8
776# define VANDQ vandq_u8
781#elif PCRE2_CODE_UNIT_WIDTH == 16
782# define VECTOR_FACTOR 8
783# define vect_t uint16x8_t
784# define VLD1Q(X) vld1q_u16((sljit_u16 *)(X))
785# define VCEQQ vceqq_u16
786# define VORRQ vorrq_u16
787# define VST1Q vst1q_u16
788# define VDUPQ vdupq_n_u16
789# define VEXTQ vextq_u16
790# define VANDQ vandq_u16
796# define VECTOR_FACTOR 4
797# define vect_t uint32x4_t
798# define VLD1Q(X) vld1q_u32((sljit_u32 *)(X))
799# define VCEQQ vceqq_u32
800# define VORRQ vorrq_u32
801# define VST1Q vst1q_u32
802# define VDUPQ vdupq_n_u32
803# define VEXTQ vextq_u32
804# define VANDQ vandq_u32
813#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
822#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
831#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
838#define JIT_HAS_FAST_FORWARD_CHAR_SIMD 1
851GET_LOCAL_BASE(
SLJIT_R1, 0, LOCALS0);
860#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
861 if (common->utf &&
offset > 0)
875 if (is_powerof2(mask))
877 ic.c.c1 = char1 | mask;
881#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
882 if (common->utf &&
offset > 0)
899#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
900 if (common->utf &&
offset > 0)
919 add_jump(compiler, &common->failed_match, partial_quit);
926 JUMPHERE(partial_quit);
939static inline vect_t fast_forward_char_pair_compare(compare_type
ctype, vect_t dst, vect_t cmp1, vect_t cmp2)
941if (
ctype == compare_match2)
944 dst = VCEQQ(dst, cmp1);
945 tmp = VCEQQ(tmp, cmp2);
946 dst = VORRQ(dst, tmp);
950if (
ctype == compare_match1i)
951 dst = VORRQ(dst, cmp2);
952dst = VCEQQ(dst, cmp1);
958#if PCRE2_CODE_UNIT_WIDTH == 8
960#elif PCRE2_CODE_UNIT_WIDTH == 16
962#elif PCRE2_CODE_UNIT_WIDTH == 32
965#error "Unsupported unit width"
972vect_t zero = VDUPQ(0);
975#define C(X) case X: return VEXTQ(zero, a, VECTOR_FACTOR - X);
979#if PCRE2_CODE_UNIT_WIDTH != 32
980 C(4);
C(5);
C(6);
C(7);
981# if PCRE2_CODE_UNIT_WIDTH != 16
982 C(8);
C(9);
C(10);
C(11);
C(12);
C(13);
C(14);
C(15);
994#define FFCPS_CHAR1A2A
998#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
1005#undef FFCPS_CHAR1A2A
1009#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
1018#define FFCPS_DEFAULT
1020#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
1027#define JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD 1
1029static void fast_forward_char_pair_simd(compiler_common *common,
sljit_s32 offs1,
1033sljit_u32 diff = IN_UCHARS(offs1 - offs2);
1037SLJIT_ASSERT(diff <= IN_UCHARS(max_fast_forward_char_pair_offset()));
1044if (common->match_end_ptr == 0)
1055GET_LOCAL_BASE(
SLJIT_R1, 0, LOCALS0);
1065 if (char1a == char1b && char2a == char2b) {
1066#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
1075#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
1085#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
1100add_jump(compiler, &common->failed_match, partial_quit);
1105JUMPHERE(partial_quit);
1110#if (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X)
1112#if PCRE2_CODE_UNIT_WIDTH == 8
1113#define VECTOR_ELEMENT_SIZE 0
1114#elif PCRE2_CODE_UNIT_WIDTH == 16
1115#define VECTOR_ELEMENT_SIZE 1
1116#elif PCRE2_CODE_UNIT_WIDTH == 32
1117#define VECTOR_ELEMENT_SIZE 2
1119#error "Unsupported unit width"
1127instruction[0] = (
sljit_u16)(0xe700 | (dst_vreg << 4) | index_reg);
1128instruction[1] = (
sljit_u16)(base_reg << 12);
1129instruction[2] = (
sljit_u16)((0x8 << 8) | (vlbb ? 0x07 : 0x06));
1134#if PCRE2_CODE_UNIT_WIDTH == 32
1149 instruction[0] = (
sljit_u16)(0xe700 | (dst_vreg << 4));
1151 instruction[2] = (
sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0x8 << 8) | 0x45);
1163 instruction[2] = (
sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0x8 << 8) | 0x22);
1169instruction[0] = (
sljit_u16)(0xe700 | (dst_vreg << 4) | dst_vreg);
1171instruction[2] = (
sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0xc << 8) | 0x4d);
1177static void fast_forward_char_pair_sse2_compare(
struct sljit_compiler *compiler, vector_compare_type compare_type,
1187 instruction[0] = (
sljit_u16)(0xe700 | (dst_ind << 4) | dst_ind);
1188 instruction[1] = (
sljit_u16)(cmp1_ind << 12);
1189 instruction[2] = (
sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0xe << 8) | 0xf8);
1194if (compare_type != vector_compare_match2)
1196 if (step == 0 && compare_type == vector_compare_match1i)
1199 instruction[0] = (
sljit_u16)(0xe700 | (dst_ind << 4) | dst_ind);
1200 instruction[1] = (
sljit_u16)(cmp2_ind << 12);
1201 instruction[2] = (
sljit_u16)((0xe << 8) | 0x6a);
1211 instruction[0] = (
sljit_u16)(0xe700 | (tmp_ind << 4) | dst_ind);
1212 instruction[1] = (
sljit_u16)(cmp2_ind << 12);
1213 instruction[2] = (
sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0xe << 8) | 0xf8);
1219 instruction[0] = (
sljit_u16)(0xe700 | (dst_ind << 4) | dst_ind);
1220 instruction[1] = (
sljit_u16)(tmp_ind << 12);
1221 instruction[2] = (
sljit_u16)((0xe << 8) | 0x6a);
1227#define JIT_HAS_FAST_FORWARD_CHAR_SIMD 1
1234#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
1239vector_compare_type compare_type = vector_compare_match1;
1254 bit = char1 ^ char2;
1255 compare_type = vector_compare_match1i;
1257 if (!is_powerof2(bit))
1260 compare_type = vector_compare_match2;
1266 add_jump(compiler, &common->failed_match, partial_quit[0]);
1272#if PCRE2_CODE_UNIT_WIDTH != 32
1275instruction[0] = (
sljit_u16)(0xe700 | (cmp1_ind << 4));
1276instruction[1] = (
sljit_u16)(char1 | bit);
1277instruction[2] = (
sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0x8 << 8) | 0x45);
1283 instruction[0] = (
sljit_u16)(0xe700 | (cmp2_ind << 4));
1284 instruction[1] = (
sljit_u16)(bit != 0 ? bit : char2);
1291for (
int i = 0; i < 2; i++)
1293 replicate_imm_vector(compiler, i, cmp1_ind, char1 | bit, TMP1);
1296 replicate_imm_vector(compiler, i, cmp2_ind, bit != 0 ? bit : char2, TMP1);
1301if (compare_type == vector_compare_match2)
1304 instruction[0] = (
sljit_u16)(0xe700 | (zero_ind << 4));
1306 instruction[2] = (
sljit_u16)((0x8 << 8) | 0x45);
1310#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
1314load_from_mem_vector(compiler,
TRUE, data_ind, str_ptr_reg_ind, 0);
1317if (compare_type != vector_compare_match2)
1319 if (compare_type == vector_compare_match1i)
1320 fast_forward_char_pair_sse2_compare(compiler, compare_type, 0, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
1323 instruction[0] = (
sljit_u16)(0xe700 | (data_ind << 4) | data_ind);
1324 instruction[1] = (
sljit_u16)((cmp1_ind << 12) | (1 << 4));
1325 instruction[2] = (
sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0xe << 8) | 0x80);
1330 for (i = 0; i < 3; i++)
1331 fast_forward_char_pair_sse2_compare(compiler, compare_type, i, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
1334 instruction[0] = (
sljit_u16)(0xe700 | (data_ind << 4) | data_ind);
1335 instruction[1] = (
sljit_u16)((zero_ind << 12) | (1 << 4));
1336 instruction[2] = (
sljit_u16)((0xe << 8) | 0x81);
1341instruction[0] = (
sljit_u16)(0xe700 | (tmp1_reg_ind << 4) | data_ind);
1343instruction[2] = (
sljit_u16)((0x4 << 8) | 0x21);
1346OP2(
SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
1358 add_jump(compiler, &common->failed_match, partial_quit[1]);
1360load_from_mem_vector(compiler,
TRUE, data_ind, str_ptr_reg_ind, 0);
1362if (compare_type != vector_compare_match2)
1364 if (compare_type == vector_compare_match1i)
1365 fast_forward_char_pair_sse2_compare(compiler, compare_type, 0, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
1368 instruction[0] = (
sljit_u16)(0xe700 | (data_ind << 4) | data_ind);
1369 instruction[1] = (
sljit_u16)((cmp1_ind << 12) | (1 << 4));
1370 instruction[2] = (
sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0xe << 8) | 0x80);
1375 for (i = 0; i < 3; i++)
1376 fast_forward_char_pair_sse2_compare(compiler, compare_type, i, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
1379 instruction[0] = (
sljit_u16)(0xe700 | (data_ind << 4) | data_ind);
1380 instruction[1] = (
sljit_u16)((zero_ind << 12) | (1 << 4));
1381 instruction[2] = (
sljit_u16)((0xe << 8) | 0x81);
1389instruction[0] = (
sljit_u16)(0xe700 | (tmp1_reg_ind << 4) | data_ind);
1391instruction[2] = (
sljit_u16)((0x4 << 8) | 0x21);
1394OP2(
SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
1400 JUMPHERE(partial_quit[0]);
1401 JUMPHERE(partial_quit[1]);
1408#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
1409if (common->utf &&
offset > 0)
1415 quit = jump_if_utf_char_start(compiler, TMP1);
1428#define JIT_HAS_FAST_REQUESTED_CHAR_SIMD 1
1436jump_list *not_found =
NULL;
1437vector_compare_type compare_type = vector_compare_match1;
1450 bit = char1 ^ char2;
1451 compare_type = vector_compare_match1i;
1453 if (!is_powerof2(bit))
1456 compare_type = vector_compare_match2;
1466#if PCRE2_CODE_UNIT_WIDTH != 32
1469instruction[0] = (
sljit_u16)(0xe700 | (cmp1_ind << 4));
1470instruction[1] = (
sljit_u16)(char1 | bit);
1471instruction[2] = (
sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0x8 << 8) | 0x45);
1477 instruction[0] = (
sljit_u16)(0xe700 | (cmp2_ind << 4));
1478 instruction[1] = (
sljit_u16)(bit != 0 ? bit : char2);
1485for (
int i = 0; i < 2; i++)
1487 replicate_imm_vector(compiler, i, cmp1_ind, char1 | bit, TMP3);
1490 replicate_imm_vector(compiler, i, cmp2_ind, bit != 0 ? bit : char2, TMP3);
1495if (compare_type == vector_compare_match2)
1498 instruction[0] = (
sljit_u16)(0xe700 | (zero_ind << 4));
1500 instruction[2] = (
sljit_u16)((0x8 << 8) | 0x45);
1504load_from_mem_vector(compiler,
TRUE, data_ind, tmp1_reg_ind, 0);
1507if (compare_type != vector_compare_match2)
1509 if (compare_type == vector_compare_match1i)
1510 fast_forward_char_pair_sse2_compare(compiler, compare_type, 0, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
1513 instruction[0] = (
sljit_u16)(0xe700 | (data_ind << 4) | data_ind);
1514 instruction[1] = (
sljit_u16)((cmp1_ind << 12) | (1 << 4));
1515 instruction[2] = (
sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0xe << 8) | 0x80);
1520 for (i = 0; i < 3; i++)
1521 fast_forward_char_pair_sse2_compare(compiler, compare_type, i, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
1524 instruction[0] = (
sljit_u16)(0xe700 | (data_ind << 4) | data_ind);
1525 instruction[1] = (
sljit_u16)((zero_ind << 12) | (1 << 4));
1526 instruction[2] = (
sljit_u16)((0xe << 8) | 0x81);
1531instruction[0] = (
sljit_u16)(0xe700 | (tmp3_reg_ind << 4) | data_ind);
1533instruction[2] = (
sljit_u16)((0x4 << 8) | 0x21);
1536OP2(
SLJIT_ADD, TMP1, 0, TMP1, 0, TMP3, 0);
1548load_from_mem_vector(compiler,
TRUE, data_ind, tmp1_reg_ind, 0);
1550if (compare_type != vector_compare_match2)
1552 if (compare_type == vector_compare_match1i)
1553 fast_forward_char_pair_sse2_compare(compiler, compare_type, 0, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
1556 instruction[0] = (
sljit_u16)(0xe700 | (data_ind << 4) | data_ind);
1557 instruction[1] = (
sljit_u16)((cmp1_ind << 12) | (1 << 4));
1558 instruction[2] = (
sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0xe << 8) | 0x80);
1563 for (i = 0; i < 3; i++)
1564 fast_forward_char_pair_sse2_compare(compiler, compare_type, i, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
1567 instruction[0] = (
sljit_u16)(0xe700 | (data_ind << 4) | data_ind);
1568 instruction[1] = (
sljit_u16)((zero_ind << 12) | (1 << 4));
1569 instruction[2] = (
sljit_u16)((0xe << 8) | 0x81);
1577instruction[0] = (
sljit_u16)(0xe700 | (tmp3_reg_ind << 4) | data_ind);
1579instruction[2] = (
sljit_u16)((0x4 << 8) | 0x21);
1582OP2(
SLJIT_ADD, TMP1, 0, TMP1, 0, TMP3, 0);
1590#define JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD 1
1592static void fast_forward_char_pair_simd(compiler_common *common,
sljit_s32 offs1,
1598#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
1603vector_compare_type compare1_type = vector_compare_match1;
1604vector_compare_type compare2_type = vector_compare_match1;
1607sljit_s32 diff = IN_UCHARS(offs2 - offs1);
1626if (char1a != char1b)
1628 bit1 = char1a ^ char1b;
1629 compare1_type = vector_compare_match1i;
1631 if (!is_powerof2(bit1))
1634 compare1_type = vector_compare_match2;
1638if (char2a != char2b)
1640 bit2 = char2a ^ char2b;
1641 compare2_type = vector_compare_match1i;
1643 if (!is_powerof2(bit2))
1646 compare2_type = vector_compare_match2;
1651if (common->match_end_ptr != 0)
1658 SELECT(
SLJIT_LESS, STR_END, TMP1, 0, STR_END);
1665#if PCRE2_CODE_UNIT_WIDTH != 32
1670instruction[0] = (
sljit_u16)(0xe700 | (cmp1a_ind << 4));
1671instruction[1] = (
sljit_u16)(char1a | bit1);
1672instruction[2] = (
sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0x8 << 8) | 0x45);
1675if (char1a != char1b)
1678 instruction[0] = (
sljit_u16)(0xe700 | (cmp1b_ind << 4));
1679 instruction[1] = (
sljit_u16)(bit1 != 0 ? bit1 : char1b);
1685instruction[0] = (
sljit_u16)(0xe700 | (cmp2a_ind << 4));
1686instruction[1] = (
sljit_u16)(char2a | bit2);
1690if (char2a != char2b)
1693 instruction[0] = (
sljit_u16)(0xe700 | (cmp2b_ind << 4));
1694 instruction[1] = (
sljit_u16)(bit2 != 0 ? bit2 : char2b);
1701for (
int i = 0; i < 2; i++)
1703 replicate_imm_vector(compiler, i, cmp1a_ind, char1a | bit1, TMP1);
1705 if (char1a != char1b)
1706 replicate_imm_vector(compiler, i, cmp1b_ind, bit1 != 0 ? bit1 : char1b, TMP1);
1708 replicate_imm_vector(compiler, i, cmp2a_ind, char2a | bit2, TMP1);
1710 if (char2a != char2b)
1711 replicate_imm_vector(compiler, i, cmp2b_ind, bit2 != 0 ? bit2 : char2b, TMP1);
1719instruction[0] = (
sljit_u16)(0xe700 | (zero_ind << 4));
1721instruction[2] = (
sljit_u16)((0x8 << 8) | 0x45);
1724#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
1729load_from_mem_vector(compiler,
TRUE, data2_ind, tmp1_reg_ind, 0);
1732load_from_mem_vector(compiler,
FALSE, data2_ind, tmp1_reg_ind, 0);
1735load_from_mem_vector(compiler,
TRUE, data1_ind, str_ptr_reg_ind, 0);
1738for (i = 0; i < 3; i++)
1740 fast_forward_char_pair_sse2_compare(compiler, compare1_type, i, data1_ind, cmp1a_ind, cmp1b_ind, tmp1_ind);
1741 fast_forward_char_pair_sse2_compare(compiler, compare2_type, i, data2_ind, cmp2a_ind, cmp2b_ind, tmp2_ind);
1745instruction[0] = (
sljit_u16)(0xe700 | (data1_ind << 4) | data1_ind);
1746instruction[1] = (
sljit_u16)(data2_ind << 12);
1747instruction[2] = (
sljit_u16)((0xe << 8) | 0x68);
1751instruction[0] = (
sljit_u16)(0xe700 | (data1_ind << 4) | data1_ind);
1752instruction[1] = (
sljit_u16)((zero_ind << 12) | (1 << 4));
1753instruction[2] = (
sljit_u16)((0xe << 8) | 0x81);
1757instruction[0] = (
sljit_u16)(0xe700 | (tmp1_reg_ind << 4) | data1_ind);
1759instruction[2] = (
sljit_u16)((0x4 << 8) | 0x21);
1762OP2(
SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
1774load_from_mem_vector(compiler,
FALSE, data1_ind, str_ptr_reg_ind, 0);
1775load_from_mem_vector(compiler,
FALSE, data2_ind, str_ptr_reg_ind, tmp1_reg_ind);
1777for (i = 0; i < 3; i++)
1779 fast_forward_char_pair_sse2_compare(compiler, compare1_type, i, data1_ind, cmp1a_ind, cmp1b_ind, tmp1_ind);
1780 fast_forward_char_pair_sse2_compare(compiler, compare2_type, i, data2_ind, cmp2a_ind, cmp2b_ind, tmp2_ind);
1784instruction[0] = (
sljit_u16)(0xe700 | (data1_ind << 4) | data1_ind);
1785instruction[1] = (
sljit_u16)(data2_ind << 12);
1786instruction[2] = (
sljit_u16)((0xe << 8) | 0x68);
1790instruction[0] = (
sljit_u16)(0xe700 | (data1_ind << 4) | data1_ind);
1791instruction[1] = (
sljit_u16)((zero_ind << 12) | (1 << 4));
1792instruction[2] = (
sljit_u16)((0xe << 8) | 0x81);
1799instruction[0] = (
sljit_u16)(0xe700 | (tmp2_reg_ind << 4) | data1_ind);
1801instruction[2] = (
sljit_u16)((0x4 << 8) | 0x21);
1804OP2(
SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
1810#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
1815 OP1(MOV_UCHAR, TMP1, 0,
SLJIT_MEM1(STR_PTR), IN_UCHARS(-offs1));
1817 quit = jump_if_utf_char_start(compiler, TMP1);
1833if (common->match_end_ptr != 0)
1839#if (defined SLJIT_CONFIG_LOONGARCH_64 && SLJIT_CONFIG_LOONGARCH_64)
1843#include <sys/auxv.h>
1844#define LOONGARCH_HWCAP_LSX (1 << 4)
1845#define HAS_LSX_SUPPORT ((getauxval(AT_HWCAP) & LOONGARCH_HWCAP_LSX) != 0)
1847#define HAS_LSX_SUPPORT 0
1852#define SI12_IMM_MASK 0x003ffc00
1853#define UI5_IMM_MASK 0x00007c00
1854#define UI2_IMM_MASK 0x00000c00
1856#define VD(vd) ((sljit_ins)vd << 0)
1857#define VJ(vj) ((sljit_ins)vj << 5)
1858#define VK(vk) ((sljit_ins)vk << 10)
1859#define RD_V(rd) ((sljit_ins)rd << 0)
1860#define RJ_V(rj) ((sljit_ins)rj << 5)
1862#define IMM_SI12(imm) (((sljit_ins)(imm) << 10) & SI12_IMM_MASK)
1863#define IMM_UI5(imm) (((sljit_ins)(imm) << 10) & UI5_IMM_MASK)
1864#define IMM_UI2(imm) (((sljit_ins)(imm) << 10) & UI2_IMM_MASK)
1867#define VLD 0x2c000000
1868#define VOR_V 0x71268000
1869#define VAND_V 0x71260000
1870#define VBSLL_V 0x728e0000
1871#define VMSKLTZ_B 0x729c4000
1872#define VPICKVE2GR_WU 0x72f3e000
1874#if PCRE2_CODE_UNIT_WIDTH == 8
1875#define VREPLGR2VR 0x729f0000
1876#define VSEQ 0x70000000
1877#elif PCRE2_CODE_UNIT_WIDTH == 16
1878#define VREPLGR2VR 0x729f0400
1879#define VSEQ 0x70008000
1881#define VREPLGR2VR 0x729f0800
1882#define VSEQ 0x70010000
1885static void fast_forward_char_pair_lsx_compare(
struct sljit_compiler *compiler, vector_compare_type compare_type,
1888if (compare_type != vector_compare_match2)
1890 if (compare_type == vector_compare_match1i)
1893 push_inst(compiler,
VOR_V |
VD(dst_ind) | VJ(cmp2_ind) |
VK(dst_ind));
1897 push_inst(compiler, VSEQ |
VD(dst_ind) | VJ(dst_ind) |
VK(cmp1_ind));
1902push_inst(compiler, VBSLL_V |
VD(tmp_ind) | VJ(dst_ind) | IMM_UI5(0));
1905push_inst(compiler, VSEQ |
VD(dst_ind) | VJ(dst_ind) |
VK(cmp1_ind));
1908push_inst(compiler, VSEQ |
VD(tmp_ind) | VJ(tmp_ind) |
VK(cmp2_ind));
1911push_inst(compiler,
VOR_V |
VD(dst_ind) | VJ(tmp_ind) |
VK(dst_ind));
1915#define JIT_HAS_FAST_FORWARD_CHAR_SIMD HAS_LSX_SUPPORT
1921#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
1926vector_compare_type compare_type = vector_compare_match1;
1939 bit = char1 ^ char2;
1940 compare_type = vector_compare_match1i;
1942 if (!is_powerof2(bit))
1945 compare_type = vector_compare_match2;
1951 add_jump(compiler, &common->failed_match, partial_quit[0]);
1958push_inst(compiler,
VREPLGR2VR |
VD(cmp1_ind) | RJ_V(tmp1_reg_ind));
1965 push_inst(compiler,
VREPLGR2VR |
VD(cmp2_ind) | RJ_V(tmp1_reg_ind));
1970#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
1975OP2(
SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
1978push_inst(compiler,
VLD |
VD(data_ind) | RJ_V(str_ptr_reg_ind) | IMM_SI12(0));
1979fast_forward_char_pair_lsx_compare(compiler, compare_type, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
1982push_inst(compiler, VMSKLTZ_B |
VD(tmp_ind) | VJ(data_ind));
1985push_inst(compiler, VPICKVE2GR_WU | RD_V(tmp1_reg_ind) | VJ(tmp_ind) | IMM_UI2(0));
1987OP2(
SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
1992OP2(
SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2001 add_jump(compiler, &common->failed_match, partial_quit[1]);
2004push_inst(compiler,
VLD |
VD(data_ind) | RJ_V(str_ptr_reg_ind) | IMM_SI12(0));
2005fast_forward_char_pair_lsx_compare(compiler, compare_type, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
2008push_inst(compiler, VMSKLTZ_B |
VD(tmp_ind) | VJ(data_ind));
2011push_inst(compiler, VPICKVE2GR_WU | RD_V(tmp1_reg_ind) | VJ(tmp_ind) | IMM_UI2(0));
2018push_inst(compiler,
CTZ_W | RD_V(tmp1_reg_ind) | RJ_V(tmp1_reg_ind));
2020OP2(
SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2024 JUMPHERE(partial_quit[0]);
2025 JUMPHERE(partial_quit[1]);
2032#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
2033if (common->utf &&
offset > 0)
2039 quit = jump_if_utf_char_start(compiler, TMP1);
2051#define JIT_HAS_FAST_REQUESTED_CHAR_SIMD HAS_LSX_SUPPORT
2058jump_list *not_found =
NULL;
2059vector_compare_type compare_type = vector_compare_match1;
2070 bit = char1 ^ char2;
2071 compare_type = vector_compare_match1i;
2073 if (!is_powerof2(bit))
2076 compare_type = vector_compare_match2;
2089push_inst(compiler,
VREPLGR2VR |
VD(cmp1_ind) | RJ_V(tmp1_reg_ind));
2095 push_inst(compiler,
VREPLGR2VR |
VD(cmp2_ind) | RJ_V(tmp1_reg_ind));
2100OP2(
SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2103push_inst(compiler,
VLD |
VD(data_ind) | RJ_V(str_ptr_reg_ind) | IMM_SI12(0));
2104fast_forward_char_pair_lsx_compare(compiler, compare_type, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
2107push_inst(compiler, VMSKLTZ_B |
VD(tmp_ind) | VJ(data_ind));
2110push_inst(compiler, VPICKVE2GR_WU | RD_V(tmp1_reg_ind) | VJ(tmp_ind) | IMM_UI2(0));
2112OP2(
SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2117OP2(
SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2127push_inst(compiler,
VLD |
VD(data_ind) | RJ_V(str_ptr_reg_ind) | IMM_SI12(0));
2128fast_forward_char_pair_lsx_compare(compiler, compare_type, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
2131push_inst(compiler, VMSKLTZ_B |
VD(tmp_ind) | VJ(data_ind));
2134push_inst(compiler, VPICKVE2GR_WU | RD_V(tmp1_reg_ind) | VJ(tmp_ind) | IMM_UI2(0));
2141push_inst(compiler,
CTZ_W | RD_V(tmp1_reg_ind) | RJ_V(tmp1_reg_ind));
2143OP2(
SLJIT_ADD, TMP1, 0, TMP1, 0, STR_PTR, 0);
2150#define JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD HAS_LSX_SUPPORT
2152static void fast_forward_char_pair_simd(compiler_common *common,
sljit_s32 offs1,
2156vector_compare_type compare1_type = vector_compare_match1;
2157vector_compare_type compare2_type = vector_compare_match1;
2160sljit_u32 diff = IN_UCHARS(offs1 - offs2);
2173#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
2179SLJIT_ASSERT(diff <= (
unsigned)IN_UCHARS(max_fast_forward_char_pair_offset()));
2182if (common->match_end_ptr != 0)
2189 SELECT(
SLJIT_LESS, STR_END, TMP1, 0, STR_END);
2195if (char1a == char1b)
2199 bit1 = char1a ^ char1b;
2200 if (is_powerof2(bit1))
2202 compare1_type = vector_compare_match1i;
2208 compare1_type = vector_compare_match2;
2216push_inst(compiler,
VREPLGR2VR |
VD(cmp1a_ind) | RJ_V(tmp1_reg_ind));
2218if (char1a != char1b)
2221 push_inst(compiler,
VREPLGR2VR |
VD(cmp1b_ind) | RJ_V(tmp2_reg_ind));
2224if (char2a == char2b)
2228 bit2 = char2a ^ char2b;
2229 if (is_powerof2(bit2))
2231 compare2_type = vector_compare_match1i;
2237 compare2_type = vector_compare_match2;
2245push_inst(compiler,
VREPLGR2VR |
VD(cmp2a_ind) | RJ_V(tmp1_reg_ind));
2247if (char2a != char2b)
2250 push_inst(compiler,
VREPLGR2VR |
VD(cmp2b_ind) | RJ_V(tmp2_reg_ind));
2253#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
2260OP2(
SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2263push_inst(compiler,
VLD |
VD(data1_ind) | RJ_V(str_ptr_reg_ind) | IMM_SI12(0));
2268push_inst(compiler,
VLD |
VD(data2_ind) | RJ_V(str_ptr_reg_ind) | IMM_SI12(-(
sljit_s8)diff));
2274push_inst(compiler, VBSLL_V |
VD(data2_ind) | VJ(data1_ind) | IMM_UI5(diff));
2278fast_forward_char_pair_lsx_compare(compiler, compare2_type, data2_ind, cmp2a_ind, cmp2b_ind, tmp2_ind);
2279fast_forward_char_pair_lsx_compare(compiler, compare1_type, data1_ind, cmp1a_ind, cmp1b_ind, tmp1_ind);
2282push_inst(compiler,
VOR_V |
VD(data1_ind) | VJ(data1_ind) |
VK(data2_ind));
2285push_inst(compiler, VMSKLTZ_B |
VD(tmp1_ind) | VJ(data1_ind));
2288push_inst(compiler, VPICKVE2GR_WU | RD_V(tmp1_reg_ind) | VJ(tmp1_ind) | IMM_UI2(0));
2291OP2(
SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2296OP2(
SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2305push_inst(compiler,
VLD |
VD(data1_ind) | RJ_V(str_ptr_reg_ind) | IMM_SI12(0));
2306push_inst(compiler,
VLD |
VD(data2_ind) | RJ_V(str_ptr_reg_ind) | IMM_SI12(-(
sljit_s8)diff));
2308fast_forward_char_pair_lsx_compare(compiler, compare1_type, data1_ind, cmp1a_ind, cmp1b_ind, tmp2_ind);
2309fast_forward_char_pair_lsx_compare(compiler, compare2_type, data2_ind, cmp2a_ind, cmp2b_ind, tmp1_ind);
2312push_inst(compiler,
VAND_V |
VD(data1_ind) | VJ(data1_ind) |
VK(data2_ind));
2315push_inst(compiler, VMSKLTZ_B |
VD(tmp1_ind) | VJ(data1_ind));
2318push_inst(compiler, VPICKVE2GR_WU | RD_V(tmp1_reg_ind) | VJ(tmp1_ind) | IMM_UI2(0));
2325push_inst(compiler,
CTZ_W | RD_V(tmp1_reg_ind) | RJ_V(tmp1_reg_ind));
2327OP2(
SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2331#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
2334 OP1(MOV_UCHAR, TMP1, 0,
SLJIT_MEM1(STR_PTR), IN_UCHARS(-offs1));
2336 jump[0] = jump_if_utf_char_start(compiler, TMP1);
2339 CMPTO(
SLJIT_LESS, STR_PTR, 0, STR_END, 0, restart);
2341 add_jump(compiler, &common->failed_match, JUMP(
SLJIT_JUMP));
2349if (common->match_end_ptr != 0)
#define PCRE2_JIT_COMPLETE
unsigned short int sljit_u16
#define SLJIT_UNUSED_ARG(arg)
#define SLJIT_SIMD_MEM_ALIGNED_256
#define SLJIT_SIMD_OP2_AND
#define SLJIT_SIMD_ELEM_32
#define SLJIT_SET_OVERFLOW
#define SLJIT_FUNC_ADDR(func_name)
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 type, sljit_s32 reg)
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_op2(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 dst_freg, sljit_s32 src1_freg, sljit_s32 src2_freg)
#define SLJIT_SIMD_ELEM_8
SLJIT_API_FUNC_ATTRIBUTE void sljit_set_current_flags(struct sljit_compiler *compiler, sljit_s32 current_flags)
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_mov(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 freg, sljit_s32 lane_index, sljit_s32 srcdst, sljit_sw srcdstw)
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_mov(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 freg, sljit_s32 srcdst, sljit_sw srcdstw)
#define SLJIT_SIMD_LANE_ZERO
#define SLJIT_FLOAT_REGISTER
#define SLJIT_GREATER_EQUAL
#define SLJIT_GP_REGISTER
#define SLJIT_SIMD_REG_256
#define SLJIT_ARGS4(ret, arg1, arg2, arg3, arg4)
#define SLJIT_SIMD_MEM_ALIGNED_128
#define SLJIT_SIMD_REG_128
#define SLJIT_SET_GREATER
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_sign(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 freg, sljit_s32 dst, sljit_sw dstw)
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler, void *instruction, sljit_u32 size)
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 arg_types, sljit_s32 src, sljit_sw srcw)
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_replicate(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 freg, sljit_s32 src, sljit_s32 src_lane_index)