php-internal-docs 8.4.8
Unofficial docs for php/php-src
Loading...
Searching...
No Matches
sljitNativeX86_common.c
Go to the documentation of this file.
1/*
2 * Stack-less Just-In-Time compiler
3 *
4 * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without modification, are
7 * permitted provided that the following conditions are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright notice, this list of
10 * conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright notice, this list
13 * of conditions and the following disclaimer in the documentation and/or other materials
14 * provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
17 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
19 * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
21 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
24 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
28{
29 return "x86" SLJIT_CPUINFO;
30}
31
32/*
33 32b register indexes:
34 0 - EAX
35 1 - ECX
36 2 - EDX
37 3 - EBX
38 4 - ESP
39 5 - EBP
40 6 - ESI
41 7 - EDI
42*/
43
44/*
45 64b register indexes:
46 0 - RAX
47 1 - RCX
48 2 - RDX
49 3 - RBX
50 4 - RSP
51 5 - RBP
52 6 - RSI
53 7 - RDI
54 8 - R8 - From now on REX prefix is required
55 9 - R9
56 10 - R10
57 11 - R11
58 12 - R12
59 13 - R13
60 14 - R14
61 15 - R15
62*/
63
64#define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2)
65#define TMP_FREG (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1)
66
67#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
68
69static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 3] = {
70 0, 0, 2, 1, 0, 0, 0, 0, 0, 0, 5, 7, 6, 4, 3
71};
72
73static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2] = {
74 0, 1, 2, 3, 4, 5, 6, 7, 0
75};
76
77#define CHECK_EXTRA_REGS(p, w, do) \
78 if (p >= SLJIT_R3 && p <= SLJIT_S3) { \
79 w = (2 * SSIZE_OF(sw)) + ((p) - SLJIT_R3) * SSIZE_OF(sw); \
80 p = SLJIT_MEM1(SLJIT_SP); \
81 do; \
82 }
83
84#else /* SLJIT_CONFIG_X86_32 */
85
86#define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3)
87
88/* Note: r12 & 0x7 == 0b100, which decoded as SIB byte present
89 Note: avoid to use r12 and r13 for memory addressing
90 therefore r12 is better to be a higher saved register. */
91#ifndef _WIN64
92/* Args: rdi(=7), rsi(=6), rdx(=2), rcx(=1), r8, r9. Scratches: rax(=0), r10, r11 */
93static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 4] = {
94 0, 0, 6, 7, 1, 8, 11, 10, 12, 5, 13, 14, 15, 3, 4, 2, 9
95};
96/* low-map. reg_map & 0x7. */
97static const sljit_u8 reg_lmap[SLJIT_NUMBER_OF_REGISTERS + 4] = {
98 0, 0, 6, 7, 1, 0, 3, 2, 4, 5, 5, 6, 7, 3, 4, 2, 1
99};
100#else
101/* Args: rcx(=1), rdx(=2), r8, r9. Scratches: rax(=0), r10, r11 */
102static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 4] = {
103 0, 0, 2, 8, 1, 11, 12, 5, 13, 14, 15, 7, 6, 3, 4, 9, 10
104};
105/* low-map. reg_map & 0x7. */
106static const sljit_u8 reg_lmap[SLJIT_NUMBER_OF_REGISTERS + 4] = {
107 0, 0, 2, 0, 1, 3, 4, 5, 5, 6, 7, 7, 6, 3, 4, 1, 2
108};
109#endif
110
111/* Args: xmm0-xmm3 */
112static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2] = {
113 0, 0, 1, 2, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 4
114};
115/* low-map. freg_map & 0x7. */
116static const sljit_u8 freg_lmap[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2] = {
117 0, 0, 1, 2, 3, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 4
118};
119
120#define REX_W 0x48
121#define REX_R 0x44
122#define REX_X 0x42
123#define REX_B 0x41
124#define REX 0x40
125
126#ifndef _WIN64
127#define HALFWORD_MAX 0x7fffffffl
128#define HALFWORD_MIN -0x80000000l
129#else
130#define HALFWORD_MAX 0x7fffffffll
131#define HALFWORD_MIN -0x80000000ll
132#endif
133
134#define IS_HALFWORD(x) ((x) <= HALFWORD_MAX && (x) >= HALFWORD_MIN)
135#define NOT_HALFWORD(x) ((x) > HALFWORD_MAX || (x) < HALFWORD_MIN)
136
137#define CHECK_EXTRA_REGS(p, w, do)
138
139#endif /* SLJIT_CONFIG_X86_32 */
140
141#define U8(v) ((sljit_u8)(v))
142
143/* Size flags for emit_x86_instruction: */
144#define EX86_BIN_INS ((sljit_uw)0x000010)
145#define EX86_SHIFT_INS ((sljit_uw)0x000020)
146#define EX86_BYTE_ARG ((sljit_uw)0x000040)
147#define EX86_HALF_ARG ((sljit_uw)0x000080)
148/* Size flags for both emit_x86_instruction and emit_vex_instruction: */
149#define EX86_REX ((sljit_uw)0x000100)
150#define EX86_NO_REXW ((sljit_uw)0x000200)
151#define EX86_PREF_66 ((sljit_uw)0x000400)
152#define EX86_PREF_F2 ((sljit_uw)0x000800)
153#define EX86_PREF_F3 ((sljit_uw)0x001000)
154#define EX86_SSE2_OP1 ((sljit_uw)0x002000)
155#define EX86_SSE2_OP2 ((sljit_uw)0x004000)
156#define EX86_SSE2 (EX86_SSE2_OP1 | EX86_SSE2_OP2)
157#define EX86_VEX_EXT ((sljit_uw)0x008000)
158/* Op flags for emit_vex_instruction: */
159#define VEX_OP_0F38 ((sljit_uw)0x010000)
160#define VEX_OP_0F3A ((sljit_uw)0x020000)
161#define VEX_SSE2_OPV ((sljit_uw)0x040000)
162#define VEX_AUTO_W ((sljit_uw)0x080000)
163#define VEX_W ((sljit_uw)0x100000)
164#define VEX_256 ((sljit_uw)0x200000)
165
166#define EX86_SELECT_66(op) (((op) & SLJIT_32) ? 0 : EX86_PREF_66)
167#define EX86_SELECT_F2_F3(op) (((op) & SLJIT_32) ? EX86_PREF_F3 : EX86_PREF_F2)
168
169/* --------------------------------------------------------------------- */
170/* Instruction forms */
171/* --------------------------------------------------------------------- */
172
173#define ADD (/* BINARY */ 0 << 3)
174#define ADD_EAX_i32 0x05
175#define ADD_r_rm 0x03
176#define ADD_rm_r 0x01
177#define ADDSD_x_xm 0x58
178#define ADC (/* BINARY */ 2 << 3)
179#define ADC_EAX_i32 0x15
180#define ADC_r_rm 0x13
181#define ADC_rm_r 0x11
182#define AND (/* BINARY */ 4 << 3)
183#define AND_EAX_i32 0x25
184#define AND_r_rm 0x23
185#define AND_rm_r 0x21
186#define ANDPD_x_xm 0x54
187#define BSR_r_rm (/* GROUP_0F */ 0xbd)
188#define BSF_r_rm (/* GROUP_0F */ 0xbc)
189#define BSWAP_r (/* GROUP_0F */ 0xc8)
190#define CALL_i32 0xe8
191#define CALL_rm (/* GROUP_FF */ 2 << 3)
192#define CDQ 0x99
193#define CMOVE_r_rm (/* GROUP_0F */ 0x44)
194#define CMP (/* BINARY */ 7 << 3)
195#define CMP_EAX_i32 0x3d
196#define CMP_r_rm 0x3b
197#define CMP_rm_r 0x39
198#define CMPS_x_xm 0xc2
199#define CMPXCHG_rm_r 0xb1
200#define CMPXCHG_rm8_r 0xb0
201#define CVTPD2PS_x_xm 0x5a
202#define CVTPS2PD_x_xm 0x5a
203#define CVTSI2SD_x_rm 0x2a
204#define CVTTSD2SI_r_xm 0x2c
205#define DIV (/* GROUP_F7 */ 6 << 3)
206#define DIVSD_x_xm 0x5e
207#define EXTRACTPS_x_xm 0x17
208#define FLDS 0xd9
209#define FLDL 0xdd
210#define FSTPS 0xd9
211#define FSTPD 0xdd
212#define INSERTPS_x_xm 0x21
213#define INT3 0xcc
214#define IDIV (/* GROUP_F7 */ 7 << 3)
215#define IMUL (/* GROUP_F7 */ 5 << 3)
216#define IMUL_r_rm (/* GROUP_0F */ 0xaf)
217#define IMUL_r_rm_i8 0x6b
218#define IMUL_r_rm_i32 0x69
219#define JL_i8 0x7c
220#define JE_i8 0x74
221#define JNC_i8 0x73
222#define JNE_i8 0x75
223#define JMP_i8 0xeb
224#define JMP_i32 0xe9
225#define JMP_rm (/* GROUP_FF */ 4 << 3)
226#define LEA_r_m 0x8d
227#define LOOP_i8 0xe2
228#define LZCNT_r_rm (/* GROUP_F3 */ /* GROUP_0F */ 0xbd)
229#define MOV_r_rm 0x8b
230#define MOV_r_i32 0xb8
231#define MOV_rm_r 0x89
232#define MOV_rm_i32 0xc7
233#define MOV_rm8_i8 0xc6
234#define MOV_rm8_r8 0x88
235#define MOVAPS_x_xm 0x28
236#define MOVAPS_xm_x 0x29
237#define MOVD_x_rm 0x6e
238#define MOVD_rm_x 0x7e
239#define MOVDDUP_x_xm 0x12
240#define MOVDQA_x_xm 0x6f
241#define MOVDQA_xm_x 0x7f
242#define MOVHLPS_x_x 0x12
243#define MOVHPD_m_x 0x17
244#define MOVHPD_x_m 0x16
245#define MOVLHPS_x_x 0x16
246#define MOVLPD_m_x 0x13
247#define MOVLPD_x_m 0x12
248#define MOVMSKPS_r_x (/* GROUP_0F */ 0x50)
249#define MOVQ_x_xm (/* GROUP_0F */ 0x7e)
250#define MOVSD_x_xm 0x10
251#define MOVSD_xm_x 0x11
252#define MOVSHDUP_x_xm 0x16
253#define MOVSXD_r_rm 0x63
254#define MOVSX_r_rm8 (/* GROUP_0F */ 0xbe)
255#define MOVSX_r_rm16 (/* GROUP_0F */ 0xbf)
256#define MOVUPS_x_xm 0x10
257#define MOVZX_r_rm8 (/* GROUP_0F */ 0xb6)
258#define MOVZX_r_rm16 (/* GROUP_0F */ 0xb7)
259#define MUL (/* GROUP_F7 */ 4 << 3)
260#define MULSD_x_xm 0x59
261#define NEG_rm (/* GROUP_F7 */ 3 << 3)
262#define NOP 0x90
263#define NOT_rm (/* GROUP_F7 */ 2 << 3)
264#define OR (/* BINARY */ 1 << 3)
265#define OR_r_rm 0x0b
266#define OR_EAX_i32 0x0d
267#define OR_rm_r 0x09
268#define OR_rm8_r8 0x08
269#define ORPD_x_xm 0x56
270#define PACKSSWB_x_xm (/* GROUP_0F */ 0x63)
271#define PAND_x_xm 0xdb
272#define PCMPEQD_x_xm 0x76
273#define PINSRB_x_rm_i8 0x20
274#define PINSRW_x_rm_i8 0xc4
275#define PINSRD_x_rm_i8 0x22
276#define PEXTRB_rm_x_i8 0x14
277#define PEXTRW_rm_x_i8 0x15
278#define PEXTRD_rm_x_i8 0x16
279#define PMOVMSKB_r_x (/* GROUP_0F */ 0xd7)
280#define PMOVSXBD_x_xm 0x21
281#define PMOVSXBQ_x_xm 0x22
282#define PMOVSXBW_x_xm 0x20
283#define PMOVSXDQ_x_xm 0x25
284#define PMOVSXWD_x_xm 0x23
285#define PMOVSXWQ_x_xm 0x24
286#define PMOVZXBD_x_xm 0x31
287#define PMOVZXBQ_x_xm 0x32
288#define PMOVZXBW_x_xm 0x30
289#define PMOVZXDQ_x_xm 0x35
290#define PMOVZXWD_x_xm 0x33
291#define PMOVZXWQ_x_xm 0x34
292#define POP_r 0x58
293#define POP_rm 0x8f
294#define POPF 0x9d
295#define POR_x_xm 0xeb
296#define PREFETCH 0x18
297#define PSHUFB_x_xm 0x00
298#define PSHUFD_x_xm 0x70
299#define PSHUFLW_x_xm 0x70
300#define PSRLDQ_x 0x73
301#define PSLLD_x_i8 0x72
302#define PSLLQ_x_i8 0x73
303#define PUSH_i32 0x68
304#define PUSH_r 0x50
305#define PUSH_rm (/* GROUP_FF */ 6 << 3)
306#define PUSHF 0x9c
307#define PXOR_x_xm 0xef
308#define ROL (/* SHIFT */ 0 << 3)
309#define ROR (/* SHIFT */ 1 << 3)
310#define RET_near 0xc3
311#define RET_i16 0xc2
312#define SBB (/* BINARY */ 3 << 3)
313#define SBB_EAX_i32 0x1d
314#define SBB_r_rm 0x1b
315#define SBB_rm_r 0x19
316#define SAR (/* SHIFT */ 7 << 3)
317#define SHL (/* SHIFT */ 4 << 3)
318#define SHLD (/* GROUP_0F */ 0xa5)
319#define SHRD (/* GROUP_0F */ 0xad)
320#define SHR (/* SHIFT */ 5 << 3)
321#define SHUFPS_x_xm 0xc6
322#define SUB (/* BINARY */ 5 << 3)
323#define SUB_EAX_i32 0x2d
324#define SUB_r_rm 0x2b
325#define SUB_rm_r 0x29
326#define SUBSD_x_xm 0x5c
327#define TEST_EAX_i32 0xa9
328#define TEST_rm_r 0x85
329#define TZCNT_r_rm (/* GROUP_F3 */ /* GROUP_0F */ 0xbc)
330#define UCOMISD_x_xm 0x2e
331#define UNPCKLPD_x_xm 0x14
332#define UNPCKLPS_x_xm 0x14
333#define VBROADCASTSD_x_xm 0x19
334#define VBROADCASTSS_x_xm 0x18
335#define VEXTRACTF128_x_ym 0x19
336#define VEXTRACTI128_x_ym 0x39
337#define VINSERTF128_y_y_xm 0x18
338#define VINSERTI128_y_y_xm 0x38
339#define VPBROADCASTB_x_xm 0x78
340#define VPBROADCASTD_x_xm 0x58
341#define VPBROADCASTQ_x_xm 0x59
342#define VPBROADCASTW_x_xm 0x79
343#define VPERMPD_y_ym 0x01
344#define VPERMQ_y_ym 0x00
345#define XCHG_EAX_r 0x90
346#define XCHG_r_rm 0x87
347#define XOR (/* BINARY */ 6 << 3)
348#define XOR_EAX_i32 0x35
349#define XOR_r_rm 0x33
350#define XOR_rm_r 0x31
351#define XORPD_x_xm 0x57
352
353#define GROUP_0F 0x0f
354#define GROUP_66 0x66
355#define GROUP_F3 0xf3
356#define GROUP_F7 0xf7
357#define GROUP_FF 0xff
358#define GROUP_BINARY_81 0x81
359#define GROUP_BINARY_83 0x83
360#define GROUP_SHIFT_1 0xd1
361#define GROUP_SHIFT_N 0xc1
362#define GROUP_SHIFT_CL 0xd3
363#define GROUP_LOCK 0xf0
364
365#define MOD_REG 0xc0
366#define MOD_DISP8 0x40
367
368#define INC_SIZE(s) (*inst++ = U8(s), compiler->size += (s))
369
370#define PUSH_REG(r) (*inst++ = U8(PUSH_r + (r)))
371#define POP_REG(r) (*inst++ = U8(POP_r + (r)))
372#define RET() (*inst++ = RET_near)
373#define RET_I16(n) (*inst++ = RET_i16, *inst++ = U8(n), *inst++ = 0)
374
375#define SLJIT_INST_LABEL 255
376#define SLJIT_INST_JUMP 254
377#define SLJIT_INST_MOV_ADDR 253
378#define SLJIT_INST_CONST 252
379
380/* Multithreading does not affect these static variables, since they store
381 built-in CPU features. Therefore they can be overwritten by different threads
382 if they detect the CPU features in the same time. */
383#define CPU_FEATURE_DETECTED 0x001
384#if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
385#define CPU_FEATURE_SSE2 0x002
386#endif
387#define CPU_FEATURE_SSE41 0x004
388#define CPU_FEATURE_LZCNT 0x008
389#define CPU_FEATURE_TZCNT 0x010
390#define CPU_FEATURE_CMOV 0x020
391#define CPU_FEATURE_AVX 0x040
392#define CPU_FEATURE_AVX2 0x080
393#define CPU_FEATURE_OSXSAVE 0x100
394
395static sljit_u32 cpu_feature_list = 0;
396
397#ifdef _WIN32_WCE
398#include <cmnintrin.h>
399#elif defined(_MSC_VER) && _MSC_VER >= 1400
400#include <intrin.h>
401#endif
402
403/******************************************************/
404/* Unaligned-store functions */
405/******************************************************/
406
407static SLJIT_INLINE void sljit_unaligned_store_s16(void *addr, sljit_s16 value)
408{
409 SLJIT_MEMCPY(addr, &value, sizeof(value));
410}
411
412static SLJIT_INLINE void sljit_unaligned_store_s32(void *addr, sljit_s32 value)
413{
414 SLJIT_MEMCPY(addr, &value, sizeof(value));
415}
416
417static SLJIT_INLINE void sljit_unaligned_store_sw(void *addr, sljit_sw value)
418{
419 SLJIT_MEMCPY(addr, &value, sizeof(value));
420}
421
422/******************************************************/
423/* Utility functions */
424/******************************************************/
425
426static void execute_cpu_id(sljit_u32 info[4])
427{
428#if defined(_MSC_VER) && _MSC_VER >= 1400
429
430 __cpuidex((int*)info, (int)info[0], (int)info[2]);
431
432#elif defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__SUNPRO_C) || defined(__TINYC__)
433
434 /* AT&T syntax. */
435 __asm__ (
436#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
437 "movl %0, %%esi\n"
438 "movl (%%esi), %%eax\n"
439 "movl 8(%%esi), %%ecx\n"
440 "pushl %%ebx\n"
441 "cpuid\n"
442 "movl %%eax, (%%esi)\n"
443 "movl %%ebx, 4(%%esi)\n"
444 "popl %%ebx\n"
445 "movl %%ecx, 8(%%esi)\n"
446 "movl %%edx, 12(%%esi)\n"
447#else /* !SLJIT_CONFIG_X86_32 */
448 "movq %0, %%rsi\n"
449 "movl (%%rsi), %%eax\n"
450 "movl 8(%%rsi), %%ecx\n"
451 "cpuid\n"
452 "movl %%eax, (%%rsi)\n"
453 "movl %%ebx, 4(%%rsi)\n"
454 "movl %%ecx, 8(%%rsi)\n"
455 "movl %%edx, 12(%%rsi)\n"
456#endif /* SLJIT_CONFIG_X86_32 */
457 :
458 : "r" (info)
459#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
460 : "memory", "eax", "ecx", "edx", "esi"
461#else /* !SLJIT_CONFIG_X86_32 */
462 : "memory", "rax", "rbx", "rcx", "rdx", "rsi"
463#endif /* SLJIT_CONFIG_X86_32 */
464 );
465
466#else /* _MSC_VER < 1400 */
467
468 /* Intel syntax. */
469 __asm {
470#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
471 mov esi, info
472 mov eax, [esi]
473 mov ecx, [esi + 8]
474 cpuid
475 mov [esi], eax
476 mov [esi + 4], ebx
477 mov [esi + 8], ecx
478 mov [esi + 12], edx
479#else /* !SLJIT_CONFIG_X86_32 */
480 mov rsi, info
481 mov eax, [rsi]
482 mov ecx, [rsi + 8]
483 cpuid
484 mov [rsi], eax
485 mov [rsi + 4], ebx
486 mov [rsi + 8], ecx
487 mov [rsi + 12], edx
488#endif /* SLJIT_CONFIG_X86_32 */
489 }
490
491#endif /* _MSC_VER && _MSC_VER >= 1400 */
492}
493
494static sljit_u32 execute_get_xcr0_low(void)
495{
496 sljit_u32 xcr0;
497
498#if defined(_MSC_VER) && _MSC_VER >= 1400
499
500 xcr0 = (sljit_u32)_xgetbv(0);
501
502#elif defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__SUNPRO_C) || defined(__TINYC__)
503
504 /* AT&T syntax. */
505 __asm__ (
506 "xorl %%ecx, %%ecx\n"
507 "xgetbv\n"
508 : "=a" (xcr0)
509 :
510#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
511 : "ecx", "edx"
512#else /* !SLJIT_CONFIG_X86_32 */
513 : "rcx", "rdx"
514#endif /* SLJIT_CONFIG_X86_32 */
515 );
516
517#else /* _MSC_VER < 1400 */
518
519 /* Intel syntax. */
520 __asm {
521 mov ecx, 0
522 xgetbv
523 mov xcr0, eax
524 }
525
526#endif /* _MSC_VER && _MSC_VER >= 1400 */
527 return xcr0;
528}
529
530static void get_cpu_features(void)
531{
532 sljit_u32 feature_list = CPU_FEATURE_DETECTED;
533 sljit_u32 info[4] = {0};
534 sljit_u32 max_id;
535
536 execute_cpu_id(info);
537 max_id = info[0];
538
539 if (max_id >= 7) {
540 info[0] = 7;
541 info[2] = 0;
542 execute_cpu_id(info);
543
544 if (info[1] & 0x8)
545 feature_list |= CPU_FEATURE_TZCNT;
546 if (info[1] & 0x20)
547 feature_list |= CPU_FEATURE_AVX2;
548 }
549
550 if (max_id >= 1) {
551 info[0] = 1;
552 execute_cpu_id(info);
553
554 if (info[2] & 0x80000)
555 feature_list |= CPU_FEATURE_SSE41;
556 if (info[2] & 0x8000000)
557 feature_list |= CPU_FEATURE_OSXSAVE;
558 if (info[2] & 0x10000000)
559 feature_list |= CPU_FEATURE_AVX;
560#if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
561 if (info[3] & 0x4000000)
562 feature_list |= CPU_FEATURE_SSE2;
563#endif
564 if (info[3] & 0x8000)
565 feature_list |= CPU_FEATURE_CMOV;
566 }
567
568 info[0] = 0x80000001;
569 execute_cpu_id(info);
570
571 if (info[2] & 0x20)
572 feature_list |= CPU_FEATURE_LZCNT;
573
574 if ((feature_list & CPU_FEATURE_OSXSAVE) && (execute_get_xcr0_low() & 0x4) == 0)
575 feature_list &= ~(sljit_u32)(CPU_FEATURE_AVX | CPU_FEATURE_AVX2);
576
577 cpu_feature_list = feature_list;
578}
579
580static sljit_u8 get_jump_code(sljit_uw type)
581{
582 switch (type) {
583 case SLJIT_EQUAL:
585 case SLJIT_F_EQUAL:
587 return 0x84 /* je */;
588
589 case SLJIT_NOT_EQUAL:
593 return 0x85 /* jne */;
594
595 case SLJIT_LESS:
596 case SLJIT_CARRY:
597 case SLJIT_F_LESS:
600 return 0x82 /* jc */;
601
603 case SLJIT_NOT_CARRY:
607 return 0x83 /* jae */;
608
609 case SLJIT_GREATER:
610 case SLJIT_F_GREATER:
613 return 0x87 /* jnbe */;
614
615 case SLJIT_LESS_EQUAL:
619 return 0x86 /* jbe */;
620
621 case SLJIT_SIG_LESS:
622 return 0x8c /* jl */;
623
625 return 0x8d /* jnl */;
626
628 return 0x8f /* jnle */;
629
631 return 0x8e /* jle */;
632
633 case SLJIT_OVERFLOW:
634 return 0x80 /* jo */;
635
637 return 0x81 /* jno */;
638
639 case SLJIT_UNORDERED:
640 case SLJIT_ORDERED_EQUAL: /* NaN. */
641 return 0x8a /* jp */;
642
643 case SLJIT_ORDERED:
644 case SLJIT_UNORDERED_OR_NOT_EQUAL: /* Not NaN. */
645 return 0x8b /* jpo */;
646 }
647 return 0;
648}
649
650#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
651static sljit_u8* detect_far_jump_type(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_sw executable_offset);
652#else /* !SLJIT_CONFIG_X86_32 */
653static sljit_u8* detect_far_jump_type(struct sljit_jump *jump, sljit_u8 *code_ptr);
654static sljit_u8* generate_mov_addr_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_u8 *code, sljit_sw executable_offset);
655#endif /* SLJIT_CONFIG_X86_32 */
656
657static sljit_u8* detect_near_jump_type(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_u8 *code, sljit_sw executable_offset)
658{
659 sljit_uw type = jump->flags >> TYPE_SHIFT;
660 sljit_s32 short_jump;
661 sljit_uw label_addr;
662
663 if (jump->flags & JUMP_ADDR)
664 label_addr = jump->u.target - (sljit_uw)executable_offset;
665 else
666 label_addr = (sljit_uw)(code + jump->u.label->size);
667
668#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
669 if ((sljit_sw)(label_addr - (sljit_uw)(code_ptr + 6)) > HALFWORD_MAX || (sljit_sw)(label_addr - (sljit_uw)(code_ptr + 5)) < HALFWORD_MIN)
670 return detect_far_jump_type(jump, code_ptr);
671#endif /* SLJIT_CONFIG_X86_64 */
672
673 short_jump = (sljit_sw)(label_addr - (sljit_uw)(code_ptr + 2)) >= -0x80 && (sljit_sw)(label_addr - (sljit_uw)(code_ptr + 2)) <= 0x7f;
674
675 if (type == SLJIT_JUMP) {
676 if (short_jump)
677 *code_ptr++ = JMP_i8;
678 else
679 *code_ptr++ = JMP_i32;
680 } else if (type > SLJIT_JUMP) {
681 short_jump = 0;
682 *code_ptr++ = CALL_i32;
683 } else if (short_jump) {
684 *code_ptr++ = U8(get_jump_code(type) - 0x10);
685 } else {
686 *code_ptr++ = GROUP_0F;
687 *code_ptr++ = get_jump_code(type);
688 }
689
690 jump->addr = (sljit_uw)code_ptr;
691
692 if (short_jump) {
693 jump->flags |= PATCH_MB;
694 code_ptr += sizeof(sljit_s8);
695 } else {
696 jump->flags |= PATCH_MW;
697 code_ptr += sizeof(sljit_s32);
698 }
699
700 return code_ptr;
701}
702
703static void generate_jump_or_mov_addr(struct sljit_jump *jump, sljit_sw executable_offset)
704{
705 sljit_uw flags = jump->flags;
706 sljit_uw addr = (flags & JUMP_ADDR) ? jump->u.target : jump->u.label->u.addr;
707 sljit_uw jump_addr = jump->addr;
708 SLJIT_UNUSED_ARG(executable_offset);
709
710 if (SLJIT_UNLIKELY(flags & JUMP_MOV_ADDR)) {
711#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
712 sljit_unaligned_store_sw((void*)(jump_addr - sizeof(sljit_sw)), (sljit_sw)addr);
713#else /* SLJIT_CONFIG_X86_32 */
714 if (flags & PATCH_MD) {
716 sljit_unaligned_store_sw((void*)(jump_addr - sizeof(sljit_sw)), (sljit_sw)addr);
717 return;
718 }
719
720 if (flags & PATCH_MW) {
721 addr -= (sljit_uw)SLJIT_ADD_EXEC_OFFSET((sljit_u8*)jump_addr, executable_offset);
723 } else {
725 }
726 sljit_unaligned_store_s32((void*)(jump_addr - sizeof(sljit_s32)), (sljit_s32)addr);
727#endif /* !SLJIT_CONFIG_X86_32 */
728 return;
729 }
730
731#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
732 if (SLJIT_UNLIKELY(flags & PATCH_MD)) {
733 SLJIT_ASSERT(!(flags & JUMP_ADDR));
734 sljit_unaligned_store_sw((void*)jump_addr, (sljit_sw)addr);
735 return;
736 }
737#endif /* SLJIT_CONFIG_X86_64 */
738
739 addr -= (sljit_uw)SLJIT_ADD_EXEC_OFFSET((sljit_u8*)jump_addr, executable_offset);
740
741 if (flags & PATCH_MB) {
742 addr -= sizeof(sljit_s8);
743 SLJIT_ASSERT((sljit_sw)addr <= 0x7f && (sljit_sw)addr >= -0x80);
744 *(sljit_u8*)jump_addr = U8(addr);
745 return;
746 } else if (flags & PATCH_MW) {
747 addr -= sizeof(sljit_s32);
748#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
749 sljit_unaligned_store_sw((void*)jump_addr, (sljit_sw)addr);
750#else /* !SLJIT_CONFIG_X86_32 */
752 sljit_unaligned_store_s32((void*)jump_addr, (sljit_s32)addr);
753#endif /* SLJIT_CONFIG_X86_32 */
754 }
755}
756
757static void reduce_code_size(struct sljit_compiler *compiler)
758{
759 struct sljit_label *label;
760 struct sljit_jump *jump;
761 sljit_uw next_label_size;
762 sljit_uw next_jump_addr;
763 sljit_uw next_min_addr;
764 sljit_uw size_reduce = 0;
765 sljit_sw diff;
767#if (defined SLJIT_DEBUG && SLJIT_DEBUG)
768 sljit_uw size_reduce_max;
769#endif /* SLJIT_DEBUG */
770
771 label = compiler->labels;
772 jump = compiler->jumps;
773
774 next_label_size = SLJIT_GET_NEXT_SIZE(label);
775 next_jump_addr = SLJIT_GET_NEXT_ADDRESS(jump);
776
777 while (1) {
778 next_min_addr = next_label_size;
779 if (next_jump_addr < next_min_addr)
780 next_min_addr = next_jump_addr;
781
782 if (next_min_addr == SLJIT_MAX_ADDRESS)
783 break;
784
785 if (next_min_addr == next_label_size) {
786 label->size -= size_reduce;
787
788 label = label->next;
789 next_label_size = SLJIT_GET_NEXT_SIZE(label);
790 }
791
792 if (next_min_addr != next_jump_addr)
793 continue;
794
795 if (!(jump->flags & JUMP_MOV_ADDR)) {
796#if (defined SLJIT_DEBUG && SLJIT_DEBUG)
797 size_reduce_max = size_reduce + (((jump->flags >> TYPE_SHIFT) < SLJIT_JUMP) ? CJUMP_MAX_SIZE : JUMP_MAX_SIZE);
798#endif /* SLJIT_DEBUG */
799
800 if (!(jump->flags & SLJIT_REWRITABLE_JUMP)) {
801 if (jump->flags & JUMP_ADDR) {
802#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
803 if (jump->u.target <= 0xffffffffl)
804 size_reduce += sizeof(sljit_s32);
805#endif /* SLJIT_CONFIG_X86_64 */
806 } else {
807 /* Unit size: instruction. */
808 diff = (sljit_sw)jump->u.label->size - (sljit_sw)(jump->addr - size_reduce);
809 type = jump->flags >> TYPE_SHIFT;
810
811#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
812 if (type == SLJIT_JUMP) {
813 if (diff <= 0x7f + 2 && diff >= -0x80 + 2)
814 size_reduce += JUMP_MAX_SIZE - 2;
815 else if (diff <= HALFWORD_MAX + 5 && diff >= HALFWORD_MIN + 5)
816 size_reduce += JUMP_MAX_SIZE - 5;
817 } else if (type < SLJIT_JUMP) {
818 if (diff <= 0x7f + 2 && diff >= -0x80 + 2)
819 size_reduce += CJUMP_MAX_SIZE - 2;
820 else if (diff <= HALFWORD_MAX + 6 && diff >= HALFWORD_MIN + 6)
821 size_reduce += CJUMP_MAX_SIZE - 6;
822 } else {
823 if (diff <= HALFWORD_MAX + 5 && diff >= HALFWORD_MIN + 5)
824 size_reduce += JUMP_MAX_SIZE - 5;
825 }
826#else /* !SLJIT_CONFIG_X86_64 */
827 if (type == SLJIT_JUMP) {
828 if (diff <= 0x7f + 2 && diff >= -0x80 + 2)
829 size_reduce += JUMP_MAX_SIZE - 2;
830 } else if (type < SLJIT_JUMP) {
831 if (diff <= 0x7f + 2 && diff >= -0x80 + 2)
832 size_reduce += CJUMP_MAX_SIZE - 2;
833 }
834#endif /* SLJIT_CONFIG_X86_64 */
835 }
836 }
837
838#if (defined SLJIT_DEBUG && SLJIT_DEBUG)
839 jump->flags |= (size_reduce_max - size_reduce) << JUMP_SIZE_SHIFT;
840#endif /* SLJIT_DEBUG */
841#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
842 } else {
843#if (defined SLJIT_DEBUG && SLJIT_DEBUG)
844 size_reduce_max = size_reduce + 10;
845#endif /* SLJIT_DEBUG */
846
847 if (!(jump->flags & JUMP_ADDR)) {
848 diff = (sljit_sw)jump->u.label->size - (sljit_sw)(jump->addr - size_reduce - 3);
849
850 if (diff <= HALFWORD_MAX && diff >= HALFWORD_MIN)
851 size_reduce += 3;
852 } else if (jump->u.target <= 0xffffffffl)
853 size_reduce += (jump->flags & MOV_ADDR_HI) ? 4 : 5;
854
855#if (defined SLJIT_DEBUG && SLJIT_DEBUG)
856 jump->flags |= (size_reduce_max - size_reduce) << JUMP_SIZE_SHIFT;
857#endif /* SLJIT_DEBUG */
858#endif /* SLJIT_CONFIG_X86_64 */
859 }
860
861 jump = jump->next;
862 next_jump_addr = SLJIT_GET_NEXT_ADDRESS(jump);
863 }
864
865 compiler->size -= size_reduce;
866}
867
868SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler, sljit_s32 options, void *exec_allocator_data)
869{
871 sljit_u8 *code;
872 sljit_u8 *code_ptr;
873 sljit_u8 *buf_ptr;
874 sljit_u8 *buf_end;
876 sljit_sw executable_offset;
877#if (defined SLJIT_DEBUG && SLJIT_DEBUG)
879#endif /* SLJIT_DEBUG */
880
881 struct sljit_label *label;
882 struct sljit_jump *jump;
883 struct sljit_const *const_;
884
886 CHECK_PTR(check_sljit_generate_code(compiler));
887
888 reduce_code_size(compiler);
889
890 /* Second code generation pass. */
891 code = (sljit_u8*)allocate_executable_memory(compiler->size, options, exec_allocator_data, &executable_offset);
893
894 reverse_buf(compiler);
895 buf = compiler->buf;
896
897 code_ptr = code;
898 label = compiler->labels;
899 jump = compiler->jumps;
900 const_ = compiler->consts;
901
902 do {
903 buf_ptr = buf->memory;
904 buf_end = buf_ptr + buf->used_size;
905 do {
906 len = *buf_ptr++;
907 SLJIT_ASSERT(len > 0);
908 if (len < SLJIT_INST_CONST) {
909 /* The code is already generated. */
910 SLJIT_MEMCPY(code_ptr, buf_ptr, len);
911 code_ptr += len;
912 buf_ptr += len;
913 } else {
914 switch (len) {
915 case SLJIT_INST_LABEL:
916 label->u.addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
917 label->size = (sljit_uw)(code_ptr - code);
918 label = label->next;
919 break;
920 case SLJIT_INST_JUMP:
921#if (defined SLJIT_DEBUG && SLJIT_DEBUG)
922 addr = (sljit_uw)code_ptr;
923#endif /* SLJIT_DEBUG */
924 if (!(jump->flags & SLJIT_REWRITABLE_JUMP))
925 code_ptr = detect_near_jump_type(jump, code_ptr, code, executable_offset);
926 else {
927#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
928 code_ptr = detect_far_jump_type(jump, code_ptr, executable_offset);
929#else /* !SLJIT_CONFIG_X86_32 */
930 code_ptr = detect_far_jump_type(jump, code_ptr);
931#endif /* SLJIT_CONFIG_X86_32 */
932 }
933
934 SLJIT_ASSERT((sljit_uw)code_ptr - addr <= ((jump->flags >> JUMP_SIZE_SHIFT) & 0x1f));
935 jump = jump->next;
936 break;
938#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
939 code_ptr = generate_mov_addr_code(jump, code_ptr, code, executable_offset);
940#endif /* SLJIT_CONFIG_X86_64 */
941 jump->addr = (sljit_uw)code_ptr;
942 jump = jump->next;
943 break;
944 default:
946 const_->addr = ((sljit_uw)code_ptr) - sizeof(sljit_sw);
947 const_ = const_->next;
948 break;
949 }
950 }
951 } while (buf_ptr < buf_end);
952
953 SLJIT_ASSERT(buf_ptr == buf_end);
954 buf = buf->next;
955 } while (buf);
956
957 SLJIT_ASSERT(!label);
958 SLJIT_ASSERT(!jump);
959 SLJIT_ASSERT(!const_);
960 SLJIT_ASSERT(code_ptr <= code + compiler->size);
961
962 jump = compiler->jumps;
963 while (jump) {
964 generate_jump_or_mov_addr(jump, executable_offset);
965 jump = jump->next;
966 }
967
968 compiler->error = SLJIT_ERR_COMPILED;
969 compiler->executable_offset = executable_offset;
970 compiler->executable_size = (sljit_uw)(code_ptr - code);
971
972 code = (sljit_u8*)SLJIT_ADD_EXEC_OFFSET(code, executable_offset);
973
974 SLJIT_UPDATE_WX_FLAGS(code, (sljit_u8*)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset), 1);
975 return (void*)code;
976}
977
979{
980 switch (feature_type) {
981 case SLJIT_HAS_FPU:
982#ifdef SLJIT_IS_FPU_AVAILABLE
983 return (SLJIT_IS_FPU_AVAILABLE) != 0;
984#elif (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
985 if (cpu_feature_list == 0)
986 get_cpu_features();
987 return (cpu_feature_list & CPU_FEATURE_SSE2) != 0;
988#else /* SLJIT_DETECT_SSE2 */
989 return 1;
990#endif /* SLJIT_DETECT_SSE2 */
991
992#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
994 return 1;
995#endif /* SLJIT_CONFIG_X86_32 */
996
997 case SLJIT_HAS_CLZ:
998 if (cpu_feature_list == 0)
999 get_cpu_features();
1000
1001 return (cpu_feature_list & CPU_FEATURE_LZCNT) ? 1 : 2;
1002
1003 case SLJIT_HAS_CTZ:
1004 if (cpu_feature_list == 0)
1005 get_cpu_features();
1006
1007 return (cpu_feature_list & CPU_FEATURE_TZCNT) ? 1 : 2;
1008
1009 case SLJIT_HAS_CMOV:
1010 if (cpu_feature_list == 0)
1011 get_cpu_features();
1012 return (cpu_feature_list & CPU_FEATURE_CMOV) != 0;
1013
1014 case SLJIT_HAS_REV:
1015 case SLJIT_HAS_ROT:
1016 case SLJIT_HAS_PREFETCH:
1017 case SLJIT_HAS_COPY_F32:
1018 case SLJIT_HAS_COPY_F64:
1019 case SLJIT_HAS_ATOMIC:
1020 return 1;
1021
1022#if !(defined SLJIT_IS_FPU_AVAILABLE) || SLJIT_IS_FPU_AVAILABLE
1023 case SLJIT_HAS_AVX:
1024 if (cpu_feature_list == 0)
1025 get_cpu_features();
1026 return (cpu_feature_list & CPU_FEATURE_AVX) != 0;
1027 case SLJIT_HAS_AVX2:
1028 if (cpu_feature_list == 0)
1029 get_cpu_features();
1030 return (cpu_feature_list & CPU_FEATURE_AVX2) != 0;
1031 case SLJIT_HAS_SIMD:
1032 if (cpu_feature_list == 0)
1033 get_cpu_features();
1034 return (cpu_feature_list & CPU_FEATURE_SSE41) != 0;
1035#endif /* SLJIT_IS_FPU_AVAILABLE */
1036 default:
1037 return 0;
1038 }
1039}
1040
1042{
1043 switch (type) {
1046 return 2;
1047 }
1048
1049 return 0;
1050}
1051
1052/* --------------------------------------------------------------------- */
1053/* Operators */
1054/* --------------------------------------------------------------------- */
1055
1056#define BINARY_OPCODE(opcode) (((opcode ## _EAX_i32) << 24) | ((opcode ## _r_rm) << 16) | ((opcode ## _rm_r) << 8) | (opcode))
1057
1058#define BINARY_IMM32(op_imm, immw, arg, argw) \
1059 do { \
1060 inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \
1061 FAIL_IF(!inst); \
1062 *(inst + 1) |= (op_imm); \
1063 } while (0)
1064
1065#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1066
1067#define BINARY_IMM(op_imm, op_mr, immw, arg, argw) \
1068 do { \
1069 if (IS_HALFWORD(immw) || compiler->mode32) { \
1070 BINARY_IMM32(op_imm, immw, arg, argw); \
1071 } \
1072 else { \
1073 FAIL_IF(emit_load_imm64(compiler, FAST_IS_REG(arg) ? TMP_REG2 : TMP_REG1, immw)); \
1074 inst = emit_x86_instruction(compiler, 1, FAST_IS_REG(arg) ? TMP_REG2 : TMP_REG1, 0, arg, argw); \
1075 FAIL_IF(!inst); \
1076 *inst = (op_mr); \
1077 } \
1078 } while (0)
1079
1080#define BINARY_EAX_IMM(op_eax_imm, immw) \
1081 FAIL_IF(emit_do_imm32(compiler, (!compiler->mode32) ? REX_W : 0, (op_eax_imm), immw))
1082
1083#else /* !SLJIT_CONFIG_X86_64 */
1084
1085#define BINARY_IMM(op_imm, op_mr, immw, arg, argw) \
1086 BINARY_IMM32(op_imm, immw, arg, argw)
1087
1088#define BINARY_EAX_IMM(op_eax_imm, immw) \
1089 FAIL_IF(emit_do_imm(compiler, (op_eax_imm), immw))
1090
1091#endif /* SLJIT_CONFIG_X86_64 */
1092
1093static sljit_s32 emit_byte(struct sljit_compiler *compiler, sljit_u8 byte)
1094{
1095 sljit_u8 *inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
1096 FAIL_IF(!inst);
1097 INC_SIZE(1);
1098 *inst = byte;
1099 return SLJIT_SUCCESS;
1100}
1101
1102static sljit_s32 emit_mov(struct sljit_compiler *compiler,
1103 sljit_s32 dst, sljit_sw dstw,
1104 sljit_s32 src, sljit_sw srcw);
1105
1106#define EMIT_MOV(compiler, dst, dstw, src, srcw) \
1107 FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw));
1108
1109static sljit_s32 emit_groupf(struct sljit_compiler *compiler,
1110 sljit_uw op,
1111 sljit_s32 dst, sljit_s32 src, sljit_sw srcw);
1112
1113static sljit_s32 emit_groupf_ext(struct sljit_compiler *compiler,
1114 sljit_uw op,
1115 sljit_s32 dst, sljit_s32 src, sljit_sw srcw);
1116
1117static SLJIT_INLINE sljit_s32 emit_sse2_store(struct sljit_compiler *compiler,
1118 sljit_s32 single, sljit_s32 dst, sljit_sw dstw, sljit_s32 src);
1119
1120static SLJIT_INLINE sljit_s32 emit_sse2_load(struct sljit_compiler *compiler,
1121 sljit_s32 single, sljit_s32 dst, sljit_s32 src, sljit_sw srcw);
1122
1123static sljit_s32 emit_cmp_binary(struct sljit_compiler *compiler,
1124 sljit_s32 src1, sljit_sw src1w,
1125 sljit_s32 src2, sljit_sw src2w);
1126
1127static sljit_s32 emit_cmov_generic(struct sljit_compiler *compiler, sljit_s32 type,
1128 sljit_s32 dst_reg,
1129 sljit_s32 src, sljit_sw srcw);
1130
1131static SLJIT_INLINE sljit_s32 emit_endbranch(struct sljit_compiler *compiler)
1132{
1133#if (defined SLJIT_CONFIG_X86_CET && SLJIT_CONFIG_X86_CET)
1134 /* Emit endbr32/endbr64 when CET is enabled. */
1135 sljit_u8 *inst;
1136 inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
1137 FAIL_IF(!inst);
1138 INC_SIZE(4);
1139 inst[0] = GROUP_F3;
1140 inst[1] = GROUP_0F;
1141 inst[2] = 0x1e;
1142#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1143 inst[3] = 0xfb;
1144#else /* !SLJIT_CONFIG_X86_32 */
1145 inst[3] = 0xfa;
1146#endif /* SLJIT_CONFIG_X86_32 */
1147#else /* !SLJIT_CONFIG_X86_CET */
1148 SLJIT_UNUSED_ARG(compiler);
1149#endif /* SLJIT_CONFIG_X86_CET */
1150 return SLJIT_SUCCESS;
1151}
1152
1153#if (defined SLJIT_CONFIG_X86_CET && SLJIT_CONFIG_X86_CET) && defined (__SHSTK__)
1154
1155static SLJIT_INLINE sljit_s32 emit_rdssp(struct sljit_compiler *compiler, sljit_s32 reg)
1156{
1157 sljit_u8 *inst;
1159
1160#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1161 size = 5;
1162#else
1163 size = 4;
1164#endif
1165
1166 inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
1167 FAIL_IF(!inst);
1168 INC_SIZE(size);
1169 *inst++ = GROUP_F3;
1170#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1171 *inst++ = REX_W | (reg_map[reg] <= 7 ? 0 : REX_B);
1172#endif
1173 inst[0] = GROUP_0F;
1174 inst[1] = 0x1e;
1175#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1176 inst[2] = U8(MOD_REG | (0x1 << 3) | reg_lmap[reg]);
1177#else
1178 inst[2] = U8(MOD_REG | (0x1 << 3) | reg_map[reg]);
1179#endif
1180 return SLJIT_SUCCESS;
1181}
1182
1183static SLJIT_INLINE sljit_s32 emit_incssp(struct sljit_compiler *compiler, sljit_s32 reg)
1184{
1185 sljit_u8 *inst;
1187
1188#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1189 size = 5;
1190#else
1191 size = 4;
1192#endif
1193
1194 inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
1195 FAIL_IF(!inst);
1196 INC_SIZE(size);
1197 *inst++ = GROUP_F3;
1198#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1199 *inst++ = REX_W | (reg_map[reg] <= 7 ? 0 : REX_B);
1200#endif
1201 inst[0] = GROUP_0F;
1202 inst[1] = 0xae;
1203 inst[2] = (0x3 << 6) | (0x5 << 3) | (reg_map[reg] & 0x7);
1204 return SLJIT_SUCCESS;
1205}
1206
1207#endif /* SLJIT_CONFIG_X86_CET && __SHSTK__ */
1208
1209static SLJIT_INLINE sljit_s32 cpu_has_shadow_stack(void)
1210{
1211#if (defined SLJIT_CONFIG_X86_CET && SLJIT_CONFIG_X86_CET) && defined (__SHSTK__)
1212 return _get_ssp() != 0;
1213#else /* !SLJIT_CONFIG_X86_CET || !__SHSTK__ */
1214 return 0;
1215#endif /* SLJIT_CONFIG_X86_CET && __SHSTK__ */
1216}
1217
1218static SLJIT_INLINE sljit_s32 adjust_shadow_stack(struct sljit_compiler *compiler,
1219 sljit_s32 src, sljit_sw srcw)
1220{
1221#if (defined SLJIT_CONFIG_X86_CET && SLJIT_CONFIG_X86_CET) && defined (__SHSTK__)
1222 sljit_u8 *inst, *jz_after_cmp_inst;
1223 sljit_uw size_jz_after_cmp_inst;
1224
1225 sljit_uw size_before_rdssp_inst = compiler->size;
1226
1227 /* Generate "RDSSP TMP_REG1". */
1228 FAIL_IF(emit_rdssp(compiler, TMP_REG1));
1229
1230 /* Load return address on shadow stack into TMP_REG1. */
1231 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(TMP_REG1), 0);
1232
1233 /* Compare return address against TMP_REG1. */
1234 FAIL_IF(emit_cmp_binary (compiler, TMP_REG1, 0, src, srcw));
1235
1236 /* Generate JZ to skip shadow stack ajdustment when shadow
1237 stack matches normal stack. */
1238 inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
1239 FAIL_IF(!inst);
1240 INC_SIZE(2);
1241 *inst++ = get_jump_code(SLJIT_EQUAL) - 0x10;
1242 size_jz_after_cmp_inst = compiler->size;
1243 jz_after_cmp_inst = inst;
1244
1245#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1246 /* REX_W is not necessary. */
1247 compiler->mode32 = 1;
1248#endif
1249 /* Load 1 into TMP_REG1. */
1250 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, 1);
1251
1252 /* Generate "INCSSP TMP_REG1". */
1253 FAIL_IF(emit_incssp(compiler, TMP_REG1));
1254
1255 /* Jump back to "RDSSP TMP_REG1" to check shadow stack again. */
1256 inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
1257 FAIL_IF(!inst);
1258 INC_SIZE(2);
1259 inst[0] = JMP_i8;
1260 inst[1] = size_before_rdssp_inst - compiler->size;
1261
1262 *jz_after_cmp_inst = compiler->size - size_jz_after_cmp_inst;
1263#else /* !SLJIT_CONFIG_X86_CET || !__SHSTK__ */
1264 SLJIT_UNUSED_ARG(compiler);
1265 SLJIT_UNUSED_ARG(src);
1266 SLJIT_UNUSED_ARG(srcw);
1267#endif /* SLJIT_CONFIG_X86_CET && __SHSTK__ */
1268 return SLJIT_SUCCESS;
1269}
1270
1271#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1272#include "sljitNativeX86_32.c"
1273#else
1274#include "sljitNativeX86_64.c"
1275#endif
1276
1277static sljit_s32 emit_mov(struct sljit_compiler *compiler,
1278 sljit_s32 dst, sljit_sw dstw,
1279 sljit_s32 src, sljit_sw srcw)
1280{
1281 sljit_u8* inst;
1282
1283 if (FAST_IS_REG(src)) {
1284 inst = emit_x86_instruction(compiler, 1, src, 0, dst, dstw);
1285 FAIL_IF(!inst);
1286 *inst = MOV_rm_r;
1287 return SLJIT_SUCCESS;
1288 }
1289
1290 if (src == SLJIT_IMM) {
1291 if (FAST_IS_REG(dst)) {
1292#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1293 return emit_do_imm(compiler, MOV_r_i32 | reg_map[dst], srcw);
1294#else
1295 if (!compiler->mode32) {
1296 if (NOT_HALFWORD(srcw))
1297 return emit_load_imm64(compiler, dst, srcw);
1298 }
1299 else
1300 return emit_do_imm32(compiler, (reg_map[dst] >= 8) ? REX_B : 0, U8(MOV_r_i32 | reg_lmap[dst]), srcw);
1301#endif
1302 }
1303#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1304 if (!compiler->mode32 && NOT_HALFWORD(srcw)) {
1305 /* Immediate to memory move. Only SLJIT_MOV operation copies
1306 an immediate directly into memory so TMP_REG1 can be used. */
1307 FAIL_IF(emit_load_imm64(compiler, TMP_REG1, srcw));
1308 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
1309 FAIL_IF(!inst);
1310 *inst = MOV_rm_r;
1311 return SLJIT_SUCCESS;
1312 }
1313#endif
1314 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, dstw);
1315 FAIL_IF(!inst);
1316 *inst = MOV_rm_i32;
1317 return SLJIT_SUCCESS;
1318 }
1319 if (FAST_IS_REG(dst)) {
1320 inst = emit_x86_instruction(compiler, 1, dst, 0, src, srcw);
1321 FAIL_IF(!inst);
1322 *inst = MOV_r_rm;
1323 return SLJIT_SUCCESS;
1324 }
1325
1326 /* Memory to memory move. Only SLJIT_MOV operation copies
1327 data from memory to memory so TMP_REG1 can be used. */
1328 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src, srcw);
1329 FAIL_IF(!inst);
1330 *inst = MOV_r_rm;
1331 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
1332 FAIL_IF(!inst);
1333 *inst = MOV_rm_r;
1334 return SLJIT_SUCCESS;
1335}
1336
1337static sljit_s32 emit_cmov_generic(struct sljit_compiler *compiler, sljit_s32 type,
1338 sljit_s32 dst_reg,
1339 sljit_s32 src, sljit_sw srcw)
1340{
1341 sljit_u8* inst;
1342 sljit_uw size;
1343
1345
1346 inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
1347 FAIL_IF(!inst);
1348 INC_SIZE(2);
1349 inst[0] = U8(get_jump_code((sljit_uw)type ^ 0x1) - 0x10);
1350
1351 size = compiler->size;
1352 EMIT_MOV(compiler, dst_reg, 0, src, srcw);
1353
1354 inst[1] = U8(compiler->size - size);
1355 return SLJIT_SUCCESS;
1356}
1357
1359{
1360 sljit_u8 *inst;
1361#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1362 sljit_uw size;
1363#endif
1364
1365 CHECK_ERROR();
1366 CHECK(check_sljit_emit_op0(compiler, op));
1367
1368 switch (GET_OPCODE(op)) {
1369 case SLJIT_BREAKPOINT:
1370 return emit_byte(compiler, INT3);
1371 case SLJIT_NOP:
1372 return emit_byte(compiler, NOP);
1373 case SLJIT_LMUL_UW:
1374 case SLJIT_LMUL_SW:
1375 case SLJIT_DIVMOD_UW:
1376 case SLJIT_DIVMOD_SW:
1377 case SLJIT_DIV_UW:
1378 case SLJIT_DIV_SW:
1379#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1380#ifdef _WIN64
1382 reg_map[SLJIT_R0] == 0
1383 && reg_map[SLJIT_R1] == 2
1384 && reg_map[TMP_REG1] > 7);
1385#else
1387 reg_map[SLJIT_R0] == 0
1388 && reg_map[SLJIT_R1] < 7
1389 && reg_map[TMP_REG1] == 2);
1390#endif
1391 compiler->mode32 = op & SLJIT_32;
1392#endif
1393 SLJIT_COMPILE_ASSERT((SLJIT_DIVMOD_UW & 0x2) == 0 && SLJIT_DIV_UW - 0x2 == SLJIT_DIVMOD_UW, bad_div_opcode_assignments);
1394
1395 op = GET_OPCODE(op);
1396 if ((op | 0x2) == SLJIT_DIV_UW) {
1397#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64)
1398 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_R1, 0);
1399 inst = emit_x86_instruction(compiler, 1, SLJIT_R1, 0, SLJIT_R1, 0);
1400#else
1401 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, TMP_REG1, 0);
1402#endif
1403 FAIL_IF(!inst);
1404 *inst = XOR_r_rm;
1405 }
1406
1407 if ((op | 0x2) == SLJIT_DIV_SW) {
1408#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64)
1409 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_R1, 0);
1410#endif
1411
1412#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1413 FAIL_IF(emit_byte(compiler, CDQ));
1414#else
1415 if (!compiler->mode32) {
1416 inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
1417 FAIL_IF(!inst);
1418 INC_SIZE(2);
1419 inst[0] = REX_W;
1420 inst[1] = CDQ;
1421 } else
1422 FAIL_IF(emit_byte(compiler, CDQ));
1423#endif
1424 }
1425
1426#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1427 inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
1428 FAIL_IF(!inst);
1429 INC_SIZE(2);
1430 inst[0] = GROUP_F7;
1431 inst[1] = MOD_REG | ((op >= SLJIT_DIVMOD_UW) ? reg_map[TMP_REG1] : reg_map[SLJIT_R1]);
1432#else /* !SLJIT_CONFIG_X86_32 */
1433#ifdef _WIN64
1434 size = (!compiler->mode32 || op >= SLJIT_DIVMOD_UW) ? 3 : 2;
1435#else /* !_WIN64 */
1436 size = (!compiler->mode32) ? 3 : 2;
1437#endif /* _WIN64 */
1438 inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
1439 FAIL_IF(!inst);
1440 INC_SIZE(size);
1441#ifdef _WIN64
1442 if (!compiler->mode32)
1443 *inst++ = REX_W | ((op >= SLJIT_DIVMOD_UW) ? REX_B : 0);
1444 else if (op >= SLJIT_DIVMOD_UW)
1445 *inst++ = REX_B;
1446 inst[0] = GROUP_F7;
1447 inst[1] = MOD_REG | ((op >= SLJIT_DIVMOD_UW) ? reg_lmap[TMP_REG1] : reg_lmap[SLJIT_R1]);
1448#else /* !_WIN64 */
1449 if (!compiler->mode32)
1450 *inst++ = REX_W;
1451 inst[0] = GROUP_F7;
1452 inst[1] = MOD_REG | reg_map[SLJIT_R1];
1453#endif /* _WIN64 */
1454#endif /* SLJIT_CONFIG_X86_32 */
1455 switch (op) {
1456 case SLJIT_LMUL_UW:
1457 inst[1] |= MUL;
1458 break;
1459 case SLJIT_LMUL_SW:
1460 inst[1] |= IMUL;
1461 break;
1462 case SLJIT_DIVMOD_UW:
1463 case SLJIT_DIV_UW:
1464 inst[1] |= DIV;
1465 break;
1466 case SLJIT_DIVMOD_SW:
1467 case SLJIT_DIV_SW:
1468 inst[1] |= IDIV;
1469 break;
1470 }
1471#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && !defined(_WIN64)
1472 if (op <= SLJIT_DIVMOD_SW)
1473 EMIT_MOV(compiler, SLJIT_R1, 0, TMP_REG1, 0);
1474#else
1475 if (op >= SLJIT_DIV_UW)
1476 EMIT_MOV(compiler, SLJIT_R1, 0, TMP_REG1, 0);
1477#endif
1478 break;
1479 case SLJIT_ENDBR:
1480 return emit_endbranch(compiler);
1482 return skip_frames_before_return(compiler);
1483 }
1484
1485 return SLJIT_SUCCESS;
1486}
1487
1488static sljit_s32 emit_mov_byte(struct sljit_compiler *compiler, sljit_s32 sign,
1489 sljit_s32 dst, sljit_sw dstw,
1490 sljit_s32 src, sljit_sw srcw)
1491{
1492 sljit_u8* inst;
1493 sljit_s32 dst_r;
1494
1495#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1496 compiler->mode32 = 0;
1497#endif
1498
1499 if (src == SLJIT_IMM) {
1500 if (FAST_IS_REG(dst)) {
1501#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1502 return emit_do_imm(compiler, MOV_r_i32 | reg_map[dst], srcw);
1503#else
1504 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, 0);
1505 FAIL_IF(!inst);
1506 *inst = MOV_rm_i32;
1507 return SLJIT_SUCCESS;
1508#endif
1509 }
1510 inst = emit_x86_instruction(compiler, 1 | EX86_BYTE_ARG | EX86_NO_REXW, SLJIT_IMM, srcw, dst, dstw);
1511 FAIL_IF(!inst);
1512 *inst = MOV_rm8_i8;
1513 return SLJIT_SUCCESS;
1514 }
1515
1516 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
1517
1518 if ((dst & SLJIT_MEM) && FAST_IS_REG(src)) {
1519#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1520 if (reg_map[src] >= 4) {
1521 SLJIT_ASSERT(dst_r == TMP_REG1);
1522 EMIT_MOV(compiler, TMP_REG1, 0, src, 0);
1523 } else
1524 dst_r = src;
1525#else
1526 dst_r = src;
1527#endif
1528 } else {
1529#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1530 if (FAST_IS_REG(src) && reg_map[src] >= 4) {
1531 /* Both src and dst are registers. */
1532 SLJIT_ASSERT(FAST_IS_REG(dst));
1533
1534 if (src == dst && !sign) {
1535 inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 0xff, dst, 0);
1536 FAIL_IF(!inst);
1537 *(inst + 1) |= AND;
1538 return SLJIT_SUCCESS;
1539 }
1540
1541 EMIT_MOV(compiler, TMP_REG1, 0, src, 0);
1542 src = TMP_REG1;
1543 srcw = 0;
1544 }
1545#endif /* !SLJIT_CONFIG_X86_32 */
1546
1547 /* src can be memory addr or reg_map[src] < 4 on x86_32 architectures. */
1548 FAIL_IF(emit_groupf(compiler, sign ? MOVSX_r_rm8 : MOVZX_r_rm8, dst_r, src, srcw));
1549 }
1550
1551 if (dst & SLJIT_MEM) {
1552 inst = emit_x86_instruction(compiler, 1 | EX86_REX | EX86_NO_REXW, dst_r, 0, dst, dstw);
1553 FAIL_IF(!inst);
1554 *inst = MOV_rm8_r8;
1555 }
1556
1557 return SLJIT_SUCCESS;
1558}
1559
1560static sljit_s32 emit_prefetch(struct sljit_compiler *compiler, sljit_s32 op,
1561 sljit_s32 src, sljit_sw srcw)
1562{
1563 sljit_u8* inst;
1564
1565#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1566 compiler->mode32 = 1;
1567#endif
1568
1569 inst = emit_x86_instruction(compiler, 2, 0, 0, src, srcw);
1570 FAIL_IF(!inst);
1571 inst[0] = GROUP_0F;
1572 inst[1] = PREFETCH;
1573
1574 if (op == SLJIT_PREFETCH_L1)
1575 inst[2] |= (1 << 3);
1576 else if (op == SLJIT_PREFETCH_L2)
1577 inst[2] |= (2 << 3);
1578 else if (op == SLJIT_PREFETCH_L3)
1579 inst[2] |= (3 << 3);
1580
1581 return SLJIT_SUCCESS;
1582}
1583
1584static sljit_s32 emit_mov_half(struct sljit_compiler *compiler, sljit_s32 sign,
1585 sljit_s32 dst, sljit_sw dstw,
1586 sljit_s32 src, sljit_sw srcw)
1587{
1588 sljit_u8* inst;
1589 sljit_s32 dst_r;
1590
1591#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1592 compiler->mode32 = 0;
1593#endif
1594
1595 if (src == SLJIT_IMM) {
1596 if (FAST_IS_REG(dst)) {
1597#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1598 return emit_do_imm(compiler, MOV_r_i32 | reg_map[dst], srcw);
1599#else
1600 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, 0);
1601 FAIL_IF(!inst);
1602 *inst = MOV_rm_i32;
1603 return SLJIT_SUCCESS;
1604#endif
1605 }
1606 inst = emit_x86_instruction(compiler, 1 | EX86_HALF_ARG | EX86_NO_REXW | EX86_PREF_66, SLJIT_IMM, srcw, dst, dstw);
1607 FAIL_IF(!inst);
1608 *inst = MOV_rm_i32;
1609 return SLJIT_SUCCESS;
1610 }
1611
1612 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
1613
1614 if ((dst & SLJIT_MEM) && FAST_IS_REG(src))
1615 dst_r = src;
1616 else
1617 FAIL_IF(emit_groupf(compiler, sign ? MOVSX_r_rm16 : MOVZX_r_rm16, dst_r, src, srcw));
1618
1619 if (dst & SLJIT_MEM) {
1620 inst = emit_x86_instruction(compiler, 1 | EX86_NO_REXW | EX86_PREF_66, dst_r, 0, dst, dstw);
1621 FAIL_IF(!inst);
1622 *inst = MOV_rm_r;
1623 }
1624
1625 return SLJIT_SUCCESS;
1626}
1627
1628static sljit_s32 emit_unary(struct sljit_compiler *compiler, sljit_u8 opcode,
1629 sljit_s32 dst, sljit_sw dstw,
1630 sljit_s32 src, sljit_sw srcw)
1631{
1632 sljit_u8* inst;
1633
1634 if (dst == src && dstw == srcw) {
1635 /* Same input and output */
1636 inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
1637 FAIL_IF(!inst);
1638 inst[0] = GROUP_F7;
1639 inst[1] |= opcode;
1640 return SLJIT_SUCCESS;
1641 }
1642
1643 if (FAST_IS_REG(dst)) {
1644 EMIT_MOV(compiler, dst, 0, src, srcw);
1645 inst = emit_x86_instruction(compiler, 1, 0, 0, dst, 0);
1646 FAIL_IF(!inst);
1647 inst[0] = GROUP_F7;
1648 inst[1] |= opcode;
1649 return SLJIT_SUCCESS;
1650 }
1651
1652 EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
1653 inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
1654 FAIL_IF(!inst);
1655 inst[0] = GROUP_F7;
1656 inst[1] |= opcode;
1657 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
1658 return SLJIT_SUCCESS;
1659}
1660
1661#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1662static const sljit_sw emit_clz_arg = 32 + 31;
1663static const sljit_sw emit_ctz_arg = 32;
1664#endif
1665
1666static sljit_s32 emit_clz_ctz(struct sljit_compiler *compiler, sljit_s32 is_clz,
1667 sljit_s32 dst, sljit_sw dstw,
1668 sljit_s32 src, sljit_sw srcw)
1669{
1670 sljit_u8* inst;
1671 sljit_s32 dst_r;
1672 sljit_sw max;
1673
1674 SLJIT_ASSERT(cpu_feature_list != 0);
1675
1676 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
1677
1678 if (is_clz ? (cpu_feature_list & CPU_FEATURE_LZCNT) : (cpu_feature_list & CPU_FEATURE_TZCNT)) {
1679 FAIL_IF(emit_groupf(compiler, (is_clz ? LZCNT_r_rm : TZCNT_r_rm) | EX86_PREF_F3, dst_r, src, srcw));
1680
1681 if (dst & SLJIT_MEM)
1682 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
1683 return SLJIT_SUCCESS;
1684 }
1685
1686 FAIL_IF(emit_groupf(compiler, is_clz ? BSR_r_rm : BSF_r_rm, dst_r, src, srcw));
1687
1688#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1689 max = is_clz ? (32 + 31) : 32;
1690
1691 if (cpu_feature_list & CPU_FEATURE_CMOV) {
1692 if (dst_r != TMP_REG1) {
1693 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, max);
1694 inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG1, 0);
1695 }
1696 else
1697 inst = emit_x86_instruction(compiler, 2, dst_r, 0, SLJIT_MEM0(), is_clz ? (sljit_sw)&emit_clz_arg : (sljit_sw)&emit_ctz_arg);
1698
1699 FAIL_IF(!inst);
1700 inst[0] = GROUP_0F;
1701 inst[1] = CMOVE_r_rm;
1702 }
1703 else
1704 FAIL_IF(emit_cmov_generic(compiler, SLJIT_EQUAL, dst_r, SLJIT_IMM, max));
1705
1706 if (is_clz) {
1707 inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 31, dst_r, 0);
1708 FAIL_IF(!inst);
1709 *(inst + 1) |= XOR;
1710 }
1711#else
1712 if (is_clz)
1713 max = compiler->mode32 ? (32 + 31) : (64 + 63);
1714 else
1715 max = compiler->mode32 ? 32 : 64;
1716
1717 if (cpu_feature_list & CPU_FEATURE_CMOV) {
1718 EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, max);
1719 FAIL_IF(emit_groupf(compiler, CMOVE_r_rm, dst_r, TMP_REG2, 0));
1720 } else
1721 FAIL_IF(emit_cmov_generic(compiler, SLJIT_EQUAL, dst_r, SLJIT_IMM, max));
1722
1723 if (is_clz) {
1724 inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, max >> 1, dst_r, 0);
1725 FAIL_IF(!inst);
1726 *(inst + 1) |= XOR;
1727 }
1728#endif
1729
1730 if (dst & SLJIT_MEM)
1731 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
1732 return SLJIT_SUCCESS;
1733}
1734
1735static sljit_s32 emit_bswap(struct sljit_compiler *compiler,
1736 sljit_s32 op,
1737 sljit_s32 dst, sljit_sw dstw,
1738 sljit_s32 src, sljit_sw srcw)
1739{
1740 sljit_u8 *inst;
1741 sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
1742 sljit_uw size;
1743#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1744 sljit_u8 rex = 0;
1745#else /* !SLJIT_CONFIG_X86_64 */
1746 sljit_s32 dst_is_ereg = op & SLJIT_32;
1747#endif /* SLJIT_CONFIG_X86_64 */
1748
1749#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1750 if (op == SLJIT_REV_U32 || op == SLJIT_REV_S32)
1751 compiler->mode32 = 1;
1752#else /* !SLJIT_CONFIG_X86_64 */
1753 op &= ~SLJIT_32;
1754#endif /* SLJIT_CONFIG_X86_64 */
1755
1756 if (src != dst_r) {
1757 /* Only the lower 16 bit is read for eregs. */
1758 if (op == SLJIT_REV_U16 || op == SLJIT_REV_S16)
1759 FAIL_IF(emit_mov_half(compiler, 0, dst_r, 0, src, srcw));
1760 else
1761 EMIT_MOV(compiler, dst_r, 0, src, srcw);
1762 }
1763
1764 size = 2;
1765#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1766 if (!compiler->mode32)
1767 rex = REX_W;
1768
1769 if (reg_map[dst_r] >= 8)
1770 rex |= REX_B;
1771
1772 if (rex != 0)
1773 size++;
1774#endif /* SLJIT_CONFIG_X86_64 */
1775
1776 inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
1777 FAIL_IF(!inst);
1778 INC_SIZE(size);
1779
1780#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1781 if (rex != 0)
1782 *inst++ = rex;
1783
1784 inst[0] = GROUP_0F;
1785 inst[1] = BSWAP_r | reg_lmap[dst_r];
1786#else /* !SLJIT_CONFIG_X86_64 */
1787 inst[0] = GROUP_0F;
1788 inst[1] = BSWAP_r | reg_map[dst_r];
1789#endif /* SLJIT_CONFIG_X86_64 */
1790
1791 if (op == SLJIT_REV_U16 || op == SLJIT_REV_S16) {
1792#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1793 size = compiler->mode32 ? 16 : 48;
1794#else /* !SLJIT_CONFIG_X86_64 */
1795 size = 16;
1796#endif /* SLJIT_CONFIG_X86_64 */
1797
1798 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, (sljit_sw)size, dst_r, 0);
1799 FAIL_IF(!inst);
1800 if (op == SLJIT_REV_U16)
1801 inst[1] |= SHR;
1802 else
1803 inst[1] |= SAR;
1804 }
1805
1806 if (dst & SLJIT_MEM) {
1807#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1808 if (dst_is_ereg)
1809 op = SLJIT_REV;
1810#endif /* SLJIT_CONFIG_X86_32 */
1811 if (op == SLJIT_REV_U16 || op == SLJIT_REV_S16)
1812 return emit_mov_half(compiler, 0, dst, dstw, TMP_REG1, 0);
1813
1814 return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
1815 }
1816
1817#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1818 if (op == SLJIT_REV_S32) {
1819 compiler->mode32 = 0;
1820 inst = emit_x86_instruction(compiler, 1, dst, 0, dst, 0);
1821 FAIL_IF(!inst);
1822 *inst = MOVSXD_r_rm;
1823 }
1824#endif /* SLJIT_CONFIG_X86_64 */
1825
1826 return SLJIT_SUCCESS;
1827}
1828
1830 sljit_s32 dst, sljit_sw dstw,
1831 sljit_s32 src, sljit_sw srcw)
1832{
1833#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1834 sljit_s32 dst_is_ereg = 0;
1835#else /* !SLJIT_CONFIG_X86_32 */
1836 sljit_s32 op_flags = GET_ALL_FLAGS(op);
1837#endif /* SLJIT_CONFIG_X86_32 */
1838
1839 CHECK_ERROR();
1840 CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw));
1841 ADJUST_LOCAL_OFFSET(dst, dstw);
1842 ADJUST_LOCAL_OFFSET(src, srcw);
1843
1844 CHECK_EXTRA_REGS(dst, dstw, dst_is_ereg = 1);
1845 CHECK_EXTRA_REGS(src, srcw, (void)0);
1846#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1847 compiler->mode32 = op_flags & SLJIT_32;
1848#endif /* SLJIT_CONFIG_X86_64 */
1849
1850 op = GET_OPCODE(op);
1851
1852 if (op >= SLJIT_MOV && op <= SLJIT_MOV_P) {
1853#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1854 compiler->mode32 = 0;
1855#endif /* SLJIT_CONFIG_X86_64 */
1856
1857 if (FAST_IS_REG(src) && src == dst) {
1858 if (!TYPE_CAST_NEEDED(op))
1859 return SLJIT_SUCCESS;
1860 }
1861
1862#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1863 if (op_flags & SLJIT_32) {
1864 if (src & SLJIT_MEM) {
1865 if (op == SLJIT_MOV_S32)
1866 op = SLJIT_MOV_U32;
1867 }
1868 else if (src == SLJIT_IMM) {
1869 if (op == SLJIT_MOV_U32)
1870 op = SLJIT_MOV_S32;
1871 }
1872 }
1873#endif /* SLJIT_CONFIG_X86_64 */
1874
1875 if (src == SLJIT_IMM) {
1876 switch (op) {
1877 case SLJIT_MOV_U8:
1878 srcw = (sljit_u8)srcw;
1879 break;
1880 case SLJIT_MOV_S8:
1881 srcw = (sljit_s8)srcw;
1882 break;
1883 case SLJIT_MOV_U16:
1884 srcw = (sljit_u16)srcw;
1885 break;
1886 case SLJIT_MOV_S16:
1887 srcw = (sljit_s16)srcw;
1888 break;
1889#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1890 case SLJIT_MOV_U32:
1891 srcw = (sljit_u32)srcw;
1892 break;
1893 case SLJIT_MOV_S32:
1894 srcw = (sljit_s32)srcw;
1895 break;
1896#endif /* SLJIT_CONFIG_X86_64 */
1897 }
1898#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1899 if (SLJIT_UNLIKELY(dst_is_ereg))
1900 return emit_mov(compiler, dst, dstw, src, srcw);
1901#endif /* SLJIT_CONFIG_X86_32 */
1902 }
1903
1904#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1905 if (SLJIT_UNLIKELY(dst_is_ereg) && (!(op == SLJIT_MOV || op == SLJIT_MOV_U32 || op == SLJIT_MOV_S32 || op == SLJIT_MOV_P) || (src & SLJIT_MEM))) {
1907 dst = TMP_REG1;
1908 }
1909#endif /* SLJIT_CONFIG_X86_32 */
1910
1911 switch (op) {
1912 case SLJIT_MOV:
1913 case SLJIT_MOV_P:
1914#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1915 case SLJIT_MOV_U32:
1916 case SLJIT_MOV_S32:
1917 case SLJIT_MOV32:
1918#endif /* SLJIT_CONFIG_X86_32 */
1919 EMIT_MOV(compiler, dst, dstw, src, srcw);
1920 break;
1921 case SLJIT_MOV_U8:
1922 FAIL_IF(emit_mov_byte(compiler, 0, dst, dstw, src, srcw));
1923 break;
1924 case SLJIT_MOV_S8:
1925 FAIL_IF(emit_mov_byte(compiler, 1, dst, dstw, src, srcw));
1926 break;
1927 case SLJIT_MOV_U16:
1928 FAIL_IF(emit_mov_half(compiler, 0, dst, dstw, src, srcw));
1929 break;
1930 case SLJIT_MOV_S16:
1931 FAIL_IF(emit_mov_half(compiler, 1, dst, dstw, src, srcw));
1932 break;
1933#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1934 case SLJIT_MOV_U32:
1935 FAIL_IF(emit_mov_int(compiler, 0, dst, dstw, src, srcw));
1936 break;
1937 case SLJIT_MOV_S32:
1938 FAIL_IF(emit_mov_int(compiler, 1, dst, dstw, src, srcw));
1939 break;
1940 case SLJIT_MOV32:
1941 compiler->mode32 = 1;
1942 EMIT_MOV(compiler, dst, dstw, src, srcw);
1943 compiler->mode32 = 0;
1944 break;
1945#endif /* SLJIT_CONFIG_X86_64 */
1946 }
1947
1948#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1949 if (SLJIT_UNLIKELY(dst_is_ereg) && dst == TMP_REG1)
1950 return emit_mov(compiler, SLJIT_MEM1(SLJIT_SP), dstw, TMP_REG1, 0);
1951#endif /* SLJIT_CONFIG_X86_32 */
1952 return SLJIT_SUCCESS;
1953 }
1954
1955 switch (op) {
1956 case SLJIT_CLZ:
1957 case SLJIT_CTZ:
1958 return emit_clz_ctz(compiler, (op == SLJIT_CLZ), dst, dstw, src, srcw);
1959 case SLJIT_REV:
1960 case SLJIT_REV_U16:
1961 case SLJIT_REV_S16:
1962 case SLJIT_REV_U32:
1963 case SLJIT_REV_S32:
1964#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1965 if (dst_is_ereg)
1966 op |= SLJIT_32;
1967#endif /* SLJIT_CONFIG_X86_32 */
1968 return emit_bswap(compiler, op, dst, dstw, src, srcw);
1969 }
1970
1971 return SLJIT_SUCCESS;
1972}
1973
1974static sljit_s32 emit_cum_binary(struct sljit_compiler *compiler,
1975 sljit_u32 op_types,
1976 sljit_s32 dst, sljit_sw dstw,
1977 sljit_s32 src1, sljit_sw src1w,
1978 sljit_s32 src2, sljit_sw src2w)
1979{
1980 sljit_u8* inst;
1981 sljit_u8 op_eax_imm = U8(op_types >> 24);
1982 sljit_u8 op_rm = U8((op_types >> 16) & 0xff);
1983 sljit_u8 op_mr = U8((op_types >> 8) & 0xff);
1984 sljit_u8 op_imm = U8(op_types & 0xff);
1985
1986 if (dst == src1 && dstw == src1w) {
1987 if (src2 == SLJIT_IMM) {
1988#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1989 if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1990#else
1991 if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128)) {
1992#endif
1993 BINARY_EAX_IMM(op_eax_imm, src2w);
1994 }
1995 else {
1996 BINARY_IMM(op_imm, op_mr, src2w, dst, dstw);
1997 }
1998 }
1999 else if (FAST_IS_REG(dst)) {
2000 inst = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w);
2001 FAIL_IF(!inst);
2002 *inst = op_rm;
2003 }
2004 else if (FAST_IS_REG(src2)) {
2005 /* Special exception for sljit_emit_op_flags. */
2006 inst = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw);
2007 FAIL_IF(!inst);
2008 *inst = op_mr;
2009 }
2010 else {
2011 EMIT_MOV(compiler, TMP_REG1, 0, src2, src2w);
2012 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
2013 FAIL_IF(!inst);
2014 *inst = op_mr;
2015 }
2016 return SLJIT_SUCCESS;
2017 }
2018
2019 /* Only for cumulative operations. */
2020 if (dst == src2 && dstw == src2w) {
2021 if (src1 == SLJIT_IMM) {
2022#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2023 if ((dst == SLJIT_R0) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) {
2024#else
2025 if ((dst == SLJIT_R0) && (src1w > 127 || src1w < -128)) {
2026#endif
2027 BINARY_EAX_IMM(op_eax_imm, src1w);
2028 }
2029 else {
2030 BINARY_IMM(op_imm, op_mr, src1w, dst, dstw);
2031 }
2032 }
2033 else if (FAST_IS_REG(dst)) {
2034 inst = emit_x86_instruction(compiler, 1, dst, dstw, src1, src1w);
2035 FAIL_IF(!inst);
2036 *inst = op_rm;
2037 }
2038 else if (FAST_IS_REG(src1)) {
2039 inst = emit_x86_instruction(compiler, 1, src1, src1w, dst, dstw);
2040 FAIL_IF(!inst);
2041 *inst = op_mr;
2042 }
2043 else {
2044 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2045 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
2046 FAIL_IF(!inst);
2047 *inst = op_mr;
2048 }
2049 return SLJIT_SUCCESS;
2050 }
2051
2052 /* General version. */
2053 if (FAST_IS_REG(dst)) {
2054 EMIT_MOV(compiler, dst, 0, src1, src1w);
2055 if (src2 == SLJIT_IMM) {
2056 BINARY_IMM(op_imm, op_mr, src2w, dst, 0);
2057 }
2058 else {
2059 inst = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w);
2060 FAIL_IF(!inst);
2061 *inst = op_rm;
2062 }
2063 }
2064 else {
2065 /* This version requires less memory writing. */
2066 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2067 if (src2 == SLJIT_IMM) {
2068 BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);
2069 }
2070 else {
2071 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
2072 FAIL_IF(!inst);
2073 *inst = op_rm;
2074 }
2075 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
2076 }
2077
2078 return SLJIT_SUCCESS;
2079}
2080
2081static sljit_s32 emit_non_cum_binary(struct sljit_compiler *compiler,
2082 sljit_u32 op_types,
2083 sljit_s32 dst, sljit_sw dstw,
2084 sljit_s32 src1, sljit_sw src1w,
2085 sljit_s32 src2, sljit_sw src2w)
2086{
2087 sljit_u8* inst;
2088 sljit_u8 op_eax_imm = U8(op_types >> 24);
2089 sljit_u8 op_rm = U8((op_types >> 16) & 0xff);
2090 sljit_u8 op_mr = U8((op_types >> 8) & 0xff);
2091 sljit_u8 op_imm = U8(op_types & 0xff);
2092
2093 if (dst == src1 && dstw == src1w) {
2094 if (src2 == SLJIT_IMM) {
2095#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2096 if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
2097#else
2098 if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128)) {
2099#endif
2100 BINARY_EAX_IMM(op_eax_imm, src2w);
2101 }
2102 else {
2103 BINARY_IMM(op_imm, op_mr, src2w, dst, dstw);
2104 }
2105 }
2106 else if (FAST_IS_REG(dst)) {
2107 inst = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w);
2108 FAIL_IF(!inst);
2109 *inst = op_rm;
2110 }
2111 else if (FAST_IS_REG(src2)) {
2112 inst = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw);
2113 FAIL_IF(!inst);
2114 *inst = op_mr;
2115 }
2116 else {
2117 EMIT_MOV(compiler, TMP_REG1, 0, src2, src2w);
2118 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
2119 FAIL_IF(!inst);
2120 *inst = op_mr;
2121 }
2122 return SLJIT_SUCCESS;
2123 }
2124
2125 /* General version. */
2126 if (FAST_IS_REG(dst) && dst != src2) {
2127 EMIT_MOV(compiler, dst, 0, src1, src1w);
2128 if (src2 == SLJIT_IMM) {
2129 BINARY_IMM(op_imm, op_mr, src2w, dst, 0);
2130 }
2131 else {
2132 inst = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w);
2133 FAIL_IF(!inst);
2134 *inst = op_rm;
2135 }
2136 }
2137 else {
2138 /* This version requires less memory writing. */
2139 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2140 if (src2 == SLJIT_IMM) {
2141 BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);
2142 }
2143 else {
2144 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
2145 FAIL_IF(!inst);
2146 *inst = op_rm;
2147 }
2148 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
2149 }
2150
2151 return SLJIT_SUCCESS;
2152}
2153
2154static sljit_s32 emit_mul(struct sljit_compiler *compiler,
2155 sljit_s32 dst, sljit_sw dstw,
2156 sljit_s32 src1, sljit_sw src1w,
2157 sljit_s32 src2, sljit_sw src2w)
2158{
2159 sljit_u8* inst;
2160 sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
2161
2162 /* Register destination. */
2163 if (dst_r == src1 && src2 != SLJIT_IMM) {
2164 FAIL_IF(emit_groupf(compiler, IMUL_r_rm, dst_r, src2, src2w));
2165 } else if (dst_r == src2 && src1 != SLJIT_IMM) {
2166 FAIL_IF(emit_groupf(compiler, IMUL_r_rm, dst_r, src1, src1w));
2167 } else if (src1 == SLJIT_IMM) {
2168 if (src2 == SLJIT_IMM) {
2169 EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, src2w);
2170 src2 = dst_r;
2171 src2w = 0;
2172 }
2173
2174 if (src1w <= 127 && src1w >= -128) {
2175 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
2176 FAIL_IF(!inst);
2177 *inst = IMUL_r_rm_i8;
2178
2179 FAIL_IF(emit_byte(compiler, U8(src1w)));
2180 }
2181#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2182 else {
2183 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
2184 FAIL_IF(!inst);
2185 *inst = IMUL_r_rm_i32;
2186 inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
2187 FAIL_IF(!inst);
2188 INC_SIZE(4);
2189 sljit_unaligned_store_sw(inst, src1w);
2190 }
2191#else
2192 else if (IS_HALFWORD(src1w)) {
2193 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
2194 FAIL_IF(!inst);
2195 *inst = IMUL_r_rm_i32;
2196 inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
2197 FAIL_IF(!inst);
2198 INC_SIZE(4);
2199 sljit_unaligned_store_s32(inst, (sljit_s32)src1w);
2200 }
2201 else {
2202 if (dst_r != src2)
2203 EMIT_MOV(compiler, dst_r, 0, src2, src2w);
2204 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src1w));
2205 FAIL_IF(emit_groupf(compiler, IMUL_r_rm, dst_r, TMP_REG2, 0));
2206 }
2207#endif
2208 }
2209 else if (src2 == SLJIT_IMM) {
2210 /* Note: src1 is NOT immediate. */
2211
2212 if (src2w <= 127 && src2w >= -128) {
2213 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
2214 FAIL_IF(!inst);
2215 *inst = IMUL_r_rm_i8;
2216
2217 FAIL_IF(emit_byte(compiler, U8(src2w)));
2218 }
2219#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2220 else {
2221 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
2222 FAIL_IF(!inst);
2223 *inst = IMUL_r_rm_i32;
2224
2225 inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
2226 FAIL_IF(!inst);
2227 INC_SIZE(4);
2228 sljit_unaligned_store_sw(inst, src2w);
2229 }
2230#else
2231 else if (IS_HALFWORD(src2w)) {
2232 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
2233 FAIL_IF(!inst);
2234 *inst = IMUL_r_rm_i32;
2235
2236 inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
2237 FAIL_IF(!inst);
2238 INC_SIZE(4);
2239 sljit_unaligned_store_s32(inst, (sljit_s32)src2w);
2240 } else {
2241 if (dst_r != src1)
2242 EMIT_MOV(compiler, dst_r, 0, src1, src1w);
2243 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w));
2244 FAIL_IF(emit_groupf(compiler, IMUL_r_rm, dst_r, TMP_REG2, 0));
2245 }
2246#endif
2247 } else {
2248 /* Neither argument is immediate. */
2249 if (ADDRESSING_DEPENDS_ON(src2, dst_r))
2250 dst_r = TMP_REG1;
2251 EMIT_MOV(compiler, dst_r, 0, src1, src1w);
2252 FAIL_IF(emit_groupf(compiler, IMUL_r_rm, dst_r, src2, src2w));
2253 }
2254
2255 if (dst & SLJIT_MEM)
2256 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
2257
2258 return SLJIT_SUCCESS;
2259}
2260
2261static sljit_s32 emit_lea_binary(struct sljit_compiler *compiler,
2262 sljit_s32 dst, sljit_sw dstw,
2263 sljit_s32 src1, sljit_sw src1w,
2264 sljit_s32 src2, sljit_sw src2w)
2265{
2266 sljit_u8* inst;
2267 sljit_s32 dst_r, done = 0;
2268
2269 /* These cases better be left to handled by normal way. */
2270 if (dst == src1 && dstw == src1w)
2271 return SLJIT_ERR_UNSUPPORTED;
2272 if (dst == src2 && dstw == src2w)
2273 return SLJIT_ERR_UNSUPPORTED;
2274
2275 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
2276
2277 if (FAST_IS_REG(src1)) {
2278 if (FAST_IS_REG(src2)) {
2279 inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM2(src1, src2), 0);
2280 FAIL_IF(!inst);
2281 *inst = LEA_r_m;
2282 done = 1;
2283 }
2284#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2285 if (src2 == SLJIT_IMM && (compiler->mode32 || IS_HALFWORD(src2w))) {
2286 inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), (sljit_s32)src2w);
2287#else
2288 if (src2 == SLJIT_IMM) {
2289 inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), src2w);
2290#endif
2291 FAIL_IF(!inst);
2292 *inst = LEA_r_m;
2293 done = 1;
2294 }
2295 }
2296 else if (FAST_IS_REG(src2)) {
2297#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2298 if (src1 == SLJIT_IMM && (compiler->mode32 || IS_HALFWORD(src1w))) {
2299 inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), (sljit_s32)src1w);
2300#else
2301 if (src1 == SLJIT_IMM) {
2302 inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), src1w);
2303#endif
2304 FAIL_IF(!inst);
2305 *inst = LEA_r_m;
2306 done = 1;
2307 }
2308 }
2309
2310 if (done) {
2311 if (dst_r == TMP_REG1)
2312 return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
2313 return SLJIT_SUCCESS;
2314 }
2315 return SLJIT_ERR_UNSUPPORTED;
2316}
2317
2318static sljit_s32 emit_cmp_binary(struct sljit_compiler *compiler,
2319 sljit_s32 src1, sljit_sw src1w,
2320 sljit_s32 src2, sljit_sw src2w)
2321{
2322 sljit_u8* inst;
2323
2324#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2325 if (src1 == SLJIT_R0 && src2 == SLJIT_IMM && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
2326#else
2327 if (src1 == SLJIT_R0 && src2 == SLJIT_IMM && (src2w > 127 || src2w < -128)) {
2328#endif
2330 return SLJIT_SUCCESS;
2331 }
2332
2333 if (FAST_IS_REG(src1)) {
2334 if (src2 == SLJIT_IMM) {
2335 BINARY_IMM(CMP, CMP_rm_r, src2w, src1, 0);
2336 }
2337 else {
2338 inst = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w);
2339 FAIL_IF(!inst);
2340 *inst = CMP_r_rm;
2341 }
2342 return SLJIT_SUCCESS;
2343 }
2344
2345 if (FAST_IS_REG(src2) && src1 != SLJIT_IMM) {
2346 inst = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w);
2347 FAIL_IF(!inst);
2348 *inst = CMP_rm_r;
2349 return SLJIT_SUCCESS;
2350 }
2351
2352 if (src2 == SLJIT_IMM) {
2353 if (src1 == SLJIT_IMM) {
2354 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2355 src1 = TMP_REG1;
2356 src1w = 0;
2357 }
2358 BINARY_IMM(CMP, CMP_rm_r, src2w, src1, src1w);
2359 }
2360 else {
2361 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2362 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
2363 FAIL_IF(!inst);
2364 *inst = CMP_r_rm;
2365 }
2366 return SLJIT_SUCCESS;
2367}
2368
2369static sljit_s32 emit_test_binary(struct sljit_compiler *compiler,
2370 sljit_s32 src1, sljit_sw src1w,
2371 sljit_s32 src2, sljit_sw src2w)
2372{
2373 sljit_u8* inst;
2374
2375#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2376 if (src1 == SLJIT_R0 && src2 == SLJIT_IMM && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
2377#else
2378 if (src1 == SLJIT_R0 && src2 == SLJIT_IMM && (src2w > 127 || src2w < -128)) {
2379#endif
2381 return SLJIT_SUCCESS;
2382 }
2383
2384#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2385 if (src2 == SLJIT_R0 && src1 == SLJIT_IMM && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) {
2386#else
2387 if (src2 == SLJIT_R0 && src1 == SLJIT_IMM && (src1w > 127 || src1w < -128)) {
2388#endif
2390 return SLJIT_SUCCESS;
2391 }
2392
2393 if (src1 != SLJIT_IMM) {
2394 if (src2 == SLJIT_IMM) {
2395#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2396 if (IS_HALFWORD(src2w) || compiler->mode32) {
2397 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, src1w);
2398 FAIL_IF(!inst);
2399 *inst = GROUP_F7;
2400 } else {
2401 FAIL_IF(emit_load_imm64(compiler, FAST_IS_REG(src1) ? TMP_REG2 : TMP_REG1, src2w));
2402 inst = emit_x86_instruction(compiler, 1, FAST_IS_REG(src1) ? TMP_REG2 : TMP_REG1, 0, src1, src1w);
2403 FAIL_IF(!inst);
2404 *inst = TEST_rm_r;
2405 }
2406#else
2407 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, src1w);
2408 FAIL_IF(!inst);
2409 *inst = GROUP_F7;
2410#endif
2411 return SLJIT_SUCCESS;
2412 }
2413 else if (FAST_IS_REG(src1)) {
2414 inst = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w);
2415 FAIL_IF(!inst);
2416 *inst = TEST_rm_r;
2417 return SLJIT_SUCCESS;
2418 }
2419 }
2420
2421 if (src2 != SLJIT_IMM) {
2422 if (src1 == SLJIT_IMM) {
2423#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2424 if (IS_HALFWORD(src1w) || compiler->mode32) {
2425 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src1w, src2, src2w);
2426 FAIL_IF(!inst);
2427 *inst = GROUP_F7;
2428 }
2429 else {
2430 FAIL_IF(emit_load_imm64(compiler, TMP_REG1, src1w));
2431 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
2432 FAIL_IF(!inst);
2433 *inst = TEST_rm_r;
2434 }
2435#else
2436 inst = emit_x86_instruction(compiler, 1, src1, src1w, src2, src2w);
2437 FAIL_IF(!inst);
2438 *inst = GROUP_F7;
2439#endif
2440 return SLJIT_SUCCESS;
2441 }
2442 else if (FAST_IS_REG(src2)) {
2443 inst = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w);
2444 FAIL_IF(!inst);
2445 *inst = TEST_rm_r;
2446 return SLJIT_SUCCESS;
2447 }
2448 }
2449
2450 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2451 if (src2 == SLJIT_IMM) {
2452#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2453 if (IS_HALFWORD(src2w) || compiler->mode32) {
2454 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REG1, 0);
2455 FAIL_IF(!inst);
2456 *inst = GROUP_F7;
2457 }
2458 else {
2459 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w));
2460 inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, TMP_REG1, 0);
2461 FAIL_IF(!inst);
2462 *inst = TEST_rm_r;
2463 }
2464#else
2465 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REG1, 0);
2466 FAIL_IF(!inst);
2467 *inst = GROUP_F7;
2468#endif
2469 }
2470 else {
2471 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
2472 FAIL_IF(!inst);
2473 *inst = TEST_rm_r;
2474 }
2475 return SLJIT_SUCCESS;
2476}
2477
2478static sljit_s32 emit_shift(struct sljit_compiler *compiler,
2479 sljit_u8 mode,
2480 sljit_s32 dst, sljit_sw dstw,
2481 sljit_s32 src1, sljit_sw src1w,
2482 sljit_s32 src2, sljit_sw src2w)
2483{
2484#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2485 sljit_s32 mode32;
2486#endif
2487 sljit_u8* inst;
2488
2489 if (src2 == SLJIT_IMM || src2 == SLJIT_PREF_SHIFT_REG) {
2490 if (dst == src1 && dstw == src1w) {
2491 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, dstw);
2492 FAIL_IF(!inst);
2493 inst[1] |= mode;
2494 return SLJIT_SUCCESS;
2495 }
2496 if (dst == SLJIT_PREF_SHIFT_REG && src2 == SLJIT_PREF_SHIFT_REG) {
2497 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2498 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2499 FAIL_IF(!inst);
2500 inst[1] |= mode;
2501 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2502 return SLJIT_SUCCESS;
2503 }
2504 if (FAST_IS_REG(dst)) {
2505 EMIT_MOV(compiler, dst, 0, src1, src1w);
2506 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, 0);
2507 FAIL_IF(!inst);
2508 inst[1] |= mode;
2509 return SLJIT_SUCCESS;
2510 }
2511
2512 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2513 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, TMP_REG1, 0);
2514 FAIL_IF(!inst);
2515 inst[1] |= mode;
2516 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
2517 return SLJIT_SUCCESS;
2518 }
2519
2520 if (dst == SLJIT_PREF_SHIFT_REG) {
2521 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2522 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
2523 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2524 FAIL_IF(!inst);
2525 inst[1] |= mode;
2526 return emit_mov(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2527 }
2528
2529 if (FAST_IS_REG(dst) && dst != src2 && dst != TMP_REG1 && !ADDRESSING_DEPENDS_ON(src2, dst)) {
2530 if (src1 != dst)
2531 EMIT_MOV(compiler, dst, 0, src1, src1w);
2532#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2533 mode32 = compiler->mode32;
2534 compiler->mode32 = 0;
2535#endif
2536 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_PREF_SHIFT_REG, 0);
2537#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2538 compiler->mode32 = mode32;
2539#endif
2540 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
2541 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, dst, 0);
2542 FAIL_IF(!inst);
2543 inst[1] |= mode;
2544#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2545 compiler->mode32 = 0;
2546#endif
2547 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2548#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2549 compiler->mode32 = mode32;
2550#endif
2551 return SLJIT_SUCCESS;
2552 }
2553
2554 /* This case is complex since ecx itself may be used for
2555 addressing, and this case must be supported as well. */
2556 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2557#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2558 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), 0, SLJIT_PREF_SHIFT_REG, 0);
2559#else /* !SLJIT_CONFIG_X86_32 */
2560 mode32 = compiler->mode32;
2561 compiler->mode32 = 0;
2562 EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_PREF_SHIFT_REG, 0);
2563 compiler->mode32 = mode32;
2564#endif /* SLJIT_CONFIG_X86_32 */
2565
2566 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
2567 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2568 FAIL_IF(!inst);
2569 inst[1] |= mode;
2570
2571#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2572 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, SLJIT_MEM1(SLJIT_SP), 0);
2573#else
2574 compiler->mode32 = 0;
2575 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG2, 0);
2576 compiler->mode32 = mode32;
2577#endif /* SLJIT_CONFIG_X86_32 */
2578
2579 if (dst != TMP_REG1)
2580 return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
2581
2582 return SLJIT_SUCCESS;
2583}
2584
2585static sljit_s32 emit_shift_with_flags(struct sljit_compiler *compiler,
2586 sljit_u8 mode, sljit_s32 set_flags,
2587 sljit_s32 dst, sljit_sw dstw,
2588 sljit_s32 src1, sljit_sw src1w,
2589 sljit_s32 src2, sljit_sw src2w)
2590{
2591 /* The CPU does not set flags if the shift count is 0. */
2592 if (src2 == SLJIT_IMM) {
2593#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2594 src2w &= compiler->mode32 ? 0x1f : 0x3f;
2595#else /* !SLJIT_CONFIG_X86_64 */
2596 src2w &= 0x1f;
2597#endif /* SLJIT_CONFIG_X86_64 */
2598 if (src2w != 0)
2599 return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
2600
2601 if (!set_flags)
2602 return emit_mov(compiler, dst, dstw, src1, src1w);
2603 /* OR dst, src, 0 */
2604 return emit_cum_binary(compiler, BINARY_OPCODE(OR),
2605 dst, dstw, src1, src1w, SLJIT_IMM, 0);
2606 }
2607
2608 if (!set_flags)
2609 return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
2610
2611 if (!FAST_IS_REG(dst))
2612 FAIL_IF(emit_cmp_binary(compiler, src1, src1w, SLJIT_IMM, 0));
2613
2614 FAIL_IF(emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w));
2615
2616 if (FAST_IS_REG(dst))
2617 return emit_cmp_binary(compiler, dst, dstw, SLJIT_IMM, 0);
2618 return SLJIT_SUCCESS;
2619}
2620
2622 sljit_s32 dst, sljit_sw dstw,
2623 sljit_s32 src1, sljit_sw src1w,
2624 sljit_s32 src2, sljit_sw src2w)
2625{
2626 CHECK_ERROR();
2627 CHECK(check_sljit_emit_op2(compiler, op, 0, dst, dstw, src1, src1w, src2, src2w));
2628 ADJUST_LOCAL_OFFSET(dst, dstw);
2629 ADJUST_LOCAL_OFFSET(src1, src1w);
2630 ADJUST_LOCAL_OFFSET(src2, src2w);
2631
2632 CHECK_EXTRA_REGS(dst, dstw, (void)0);
2633 CHECK_EXTRA_REGS(src1, src1w, (void)0);
2634 CHECK_EXTRA_REGS(src2, src2w, (void)0);
2635#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2636 compiler->mode32 = op & SLJIT_32;
2637#endif
2638
2639 switch (GET_OPCODE(op)) {
2640 case SLJIT_ADD:
2641 if (!HAS_FLAGS(op)) {
2642 if (emit_lea_binary(compiler, dst, dstw, src1, src1w, src2, src2w) != SLJIT_ERR_UNSUPPORTED)
2643 return compiler->error;
2644 }
2645 return emit_cum_binary(compiler, BINARY_OPCODE(ADD),
2646 dst, dstw, src1, src1w, src2, src2w);
2647 case SLJIT_ADDC:
2648 return emit_cum_binary(compiler, BINARY_OPCODE(ADC),
2649 dst, dstw, src1, src1w, src2, src2w);
2650 case SLJIT_SUB:
2651 if (src1 == SLJIT_IMM && src1w == 0)
2652 return emit_unary(compiler, NEG_rm, dst, dstw, src2, src2w);
2653
2654 if (!HAS_FLAGS(op)) {
2655 if (src2 == SLJIT_IMM && emit_lea_binary(compiler, dst, dstw, src1, src1w, SLJIT_IMM, -src2w) != SLJIT_ERR_UNSUPPORTED)
2656 return compiler->error;
2657 if (FAST_IS_REG(dst) && src2 == dst) {
2658 FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB), dst, 0, dst, 0, src1, src1w));
2659 return emit_unary(compiler, NEG_rm, dst, 0, dst, 0);
2660 }
2661 }
2662
2663 return emit_non_cum_binary(compiler, BINARY_OPCODE(SUB),
2664 dst, dstw, src1, src1w, src2, src2w);
2665 case SLJIT_SUBC:
2666 return emit_non_cum_binary(compiler, BINARY_OPCODE(SBB),
2667 dst, dstw, src1, src1w, src2, src2w);
2668 case SLJIT_MUL:
2669 return emit_mul(compiler, dst, dstw, src1, src1w, src2, src2w);
2670 case SLJIT_AND:
2671 return emit_cum_binary(compiler, BINARY_OPCODE(AND),
2672 dst, dstw, src1, src1w, src2, src2w);
2673 case SLJIT_OR:
2674 return emit_cum_binary(compiler, BINARY_OPCODE(OR),
2675 dst, dstw, src1, src1w, src2, src2w);
2676 case SLJIT_XOR:
2677 if (!HAS_FLAGS(op)) {
2678 if (src2 == SLJIT_IMM && src2w == -1)
2679 return emit_unary(compiler, NOT_rm, dst, dstw, src1, src1w);
2680 if (src1 == SLJIT_IMM && src1w == -1)
2681 return emit_unary(compiler, NOT_rm, dst, dstw, src2, src2w);
2682 }
2683
2684 return emit_cum_binary(compiler, BINARY_OPCODE(XOR),
2685 dst, dstw, src1, src1w, src2, src2w);
2686 case SLJIT_SHL:
2687 case SLJIT_MSHL:
2688 return emit_shift_with_flags(compiler, SHL, HAS_FLAGS(op),
2689 dst, dstw, src1, src1w, src2, src2w);
2690 case SLJIT_LSHR:
2691 case SLJIT_MLSHR:
2692 return emit_shift_with_flags(compiler, SHR, HAS_FLAGS(op),
2693 dst, dstw, src1, src1w, src2, src2w);
2694 case SLJIT_ASHR:
2695 case SLJIT_MASHR:
2696 return emit_shift_with_flags(compiler, SAR, HAS_FLAGS(op),
2697 dst, dstw, src1, src1w, src2, src2w);
2698 case SLJIT_ROTL:
2699 return emit_shift_with_flags(compiler, ROL, 0,
2700 dst, dstw, src1, src1w, src2, src2w);
2701 case SLJIT_ROTR:
2702 return emit_shift_with_flags(compiler, ROR, 0,
2703 dst, dstw, src1, src1w, src2, src2w);
2704 }
2705
2706 return SLJIT_SUCCESS;
2707}
2708
2710 sljit_s32 src1, sljit_sw src1w,
2711 sljit_s32 src2, sljit_sw src2w)
2712{
2713 sljit_s32 opcode = GET_OPCODE(op);
2714
2715 CHECK_ERROR();
2716 CHECK(check_sljit_emit_op2(compiler, op, 1, 0, 0, src1, src1w, src2, src2w));
2717
2718 if (opcode != SLJIT_SUB && opcode != SLJIT_AND) {
2719 SLJIT_SKIP_CHECKS(compiler);
2720 return sljit_emit_op2(compiler, op, TMP_REG1, 0, src1, src1w, src2, src2w);
2721 }
2722
2723 ADJUST_LOCAL_OFFSET(src1, src1w);
2724 ADJUST_LOCAL_OFFSET(src2, src2w);
2725
2726 CHECK_EXTRA_REGS(src1, src1w, (void)0);
2727 CHECK_EXTRA_REGS(src2, src2w, (void)0);
2728#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2729 compiler->mode32 = op & SLJIT_32;
2730#endif
2731
2732 if (opcode == SLJIT_SUB)
2733 return emit_cmp_binary(compiler, src1, src1w, src2, src2w);
2734
2735 return emit_test_binary(compiler, src1, src1w, src2, src2w);
2736}
2737
2739 sljit_s32 dst_reg,
2740 sljit_s32 src1, sljit_sw src1w,
2741 sljit_s32 src2, sljit_sw src2w)
2742{
2743 sljit_u8* inst;
2744 sljit_sw dstw = 0;
2745
2746 CHECK_ERROR();
2747 CHECK(check_sljit_emit_op2r(compiler, op, dst_reg, src1, src1w, src2, src2w));
2748 ADJUST_LOCAL_OFFSET(src1, src1w);
2749 ADJUST_LOCAL_OFFSET(src2, src2w);
2750
2751 CHECK_EXTRA_REGS(dst_reg, dstw, (void)0);
2752 CHECK_EXTRA_REGS(src1, src1w, (void)0);
2753 CHECK_EXTRA_REGS(src2, src2w, (void)0);
2754#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2755 compiler->mode32 = op & SLJIT_32;
2756#endif
2757
2758 switch (GET_OPCODE(op)) {
2759 case SLJIT_MULADD:
2760 FAIL_IF(emit_mul(compiler, TMP_REG1, 0, src1, src1w, src2, src2w));
2761 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst_reg, dstw);
2762 FAIL_IF(!inst);
2763 *inst = ADD_rm_r;
2764 return SLJIT_SUCCESS;
2765 }
2766
2767 return SLJIT_SUCCESS;
2768}
2769
2771 sljit_s32 dst_reg,
2772 sljit_s32 src1_reg,
2773 sljit_s32 src2_reg,
2774 sljit_s32 src3, sljit_sw src3w)
2775{
2776 sljit_s32 is_rotate, is_left, move_src1;
2777 sljit_u8* inst;
2778 sljit_sw src1w = 0;
2779 sljit_sw dstw = 0;
2780 /* The whole register must be saved even for 32 bit operations. */
2781 sljit_u8 restore_ecx = 0;
2782#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2783 sljit_sw src2w = 0;
2784 sljit_s32 restore_sp4 = 0;
2785#endif /* SLJIT_CONFIG_X86_32 */
2786
2787 CHECK_ERROR();
2788 CHECK(check_sljit_emit_shift_into(compiler, op, dst_reg, src1_reg, src2_reg, src3, src3w));
2789 ADJUST_LOCAL_OFFSET(src3, src3w);
2790
2791 CHECK_EXTRA_REGS(dst_reg, dstw, (void)0);
2792 CHECK_EXTRA_REGS(src3, src3w, (void)0);
2793
2794#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2795 compiler->mode32 = op & SLJIT_32;
2796#endif /* SLJIT_CONFIG_X86_64 */
2797
2798 if (src3 == SLJIT_IMM) {
2799#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2800 src3w &= 0x1f;
2801#else /* !SLJIT_CONFIG_X86_32 */
2802 src3w &= (op & SLJIT_32) ? 0x1f : 0x3f;
2803#endif /* SLJIT_CONFIG_X86_32 */
2804
2805 if (src3w == 0)
2806 return SLJIT_SUCCESS;
2807 }
2808
2809 is_left = (GET_OPCODE(op) == SLJIT_SHL || GET_OPCODE(op) == SLJIT_MSHL);
2810
2811 is_rotate = (src1_reg == src2_reg);
2812 CHECK_EXTRA_REGS(src1_reg, src1w, (void)0);
2813 CHECK_EXTRA_REGS(src2_reg, src2w, (void)0);
2814
2815 if (is_rotate)
2816 return emit_shift(compiler, is_left ? ROL : ROR, dst_reg, dstw, src1_reg, src1w, src3, src3w);
2817
2818#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2819 if (src2_reg & SLJIT_MEM) {
2820 EMIT_MOV(compiler, TMP_REG1, 0, src2_reg, src2w);
2821 src2_reg = TMP_REG1;
2822 }
2823#endif /* SLJIT_CONFIG_X86_32 */
2824
2825 if (dst_reg == SLJIT_PREF_SHIFT_REG && src3 != SLJIT_IMM && (src3 != SLJIT_PREF_SHIFT_REG || src1_reg != SLJIT_PREF_SHIFT_REG)) {
2826#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2827 EMIT_MOV(compiler, TMP_REG1, 0, src1_reg, src1w);
2828 src1_reg = TMP_REG1;
2829 src1w = 0;
2830#else /* !SLJIT_CONFIG_X86_64 */
2831 if (src2_reg != TMP_REG1) {
2832 EMIT_MOV(compiler, TMP_REG1, 0, src1_reg, src1w);
2833 src1_reg = TMP_REG1;
2834 src1w = 0;
2835 } else if ((src1_reg & SLJIT_MEM) || src1_reg == SLJIT_PREF_SHIFT_REG) {
2836 restore_sp4 = (src3 == SLJIT_R0) ? SLJIT_R1 : SLJIT_R0;
2837 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_s32), restore_sp4, 0);
2838 EMIT_MOV(compiler, restore_sp4, 0, src1_reg, src1w);
2839 src1_reg = restore_sp4;
2840 src1w = 0;
2841 } else {
2842 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_s32), src1_reg, 0);
2843 restore_sp4 = src1_reg;
2844 }
2845#endif /* SLJIT_CONFIG_X86_64 */
2846
2847 if (src3 != SLJIT_PREF_SHIFT_REG)
2848 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src3, src3w);
2849 } else {
2850 if (src2_reg == SLJIT_PREF_SHIFT_REG && src3 != SLJIT_IMM && src3 != SLJIT_PREF_SHIFT_REG) {
2851#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2852 compiler->mode32 = 0;
2853#endif /* SLJIT_CONFIG_X86_64 */
2854 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_PREF_SHIFT_REG, 0);
2855#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2856 compiler->mode32 = op & SLJIT_32;
2857#endif /* SLJIT_CONFIG_X86_64 */
2858 src2_reg = TMP_REG1;
2859 restore_ecx = 1;
2860 }
2861
2862 move_src1 = 0;
2863#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2864 if (dst_reg != src1_reg) {
2865 if (dst_reg != src3) {
2866 EMIT_MOV(compiler, dst_reg, 0, src1_reg, src1w);
2867 src1_reg = dst_reg;
2868 src1w = 0;
2869 } else
2870 move_src1 = 1;
2871 }
2872#else /* !SLJIT_CONFIG_X86_64 */
2873 if (dst_reg & SLJIT_MEM) {
2874 if (src2_reg != TMP_REG1) {
2875 EMIT_MOV(compiler, TMP_REG1, 0, src1_reg, src1w);
2876 src1_reg = TMP_REG1;
2877 src1w = 0;
2878 } else if ((src1_reg & SLJIT_MEM) || src1_reg == SLJIT_PREF_SHIFT_REG) {
2879 restore_sp4 = (src3 == SLJIT_R0) ? SLJIT_R1 : SLJIT_R0;
2880 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_s32), restore_sp4, 0);
2881 EMIT_MOV(compiler, restore_sp4, 0, src1_reg, src1w);
2882 src1_reg = restore_sp4;
2883 src1w = 0;
2884 } else {
2885 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_s32), src1_reg, 0);
2886 restore_sp4 = src1_reg;
2887 }
2888 } else if (dst_reg != src1_reg) {
2889 if (dst_reg != src3) {
2890 EMIT_MOV(compiler, dst_reg, 0, src1_reg, src1w);
2891 src1_reg = dst_reg;
2892 src1w = 0;
2893 } else
2894 move_src1 = 1;
2895 }
2896#endif /* SLJIT_CONFIG_X86_64 */
2897
2898 if (src3 != SLJIT_IMM && src3 != SLJIT_PREF_SHIFT_REG) {
2899 if (!restore_ecx) {
2900#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2901 compiler->mode32 = 0;
2902 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_PREF_SHIFT_REG, 0);
2903 compiler->mode32 = op & SLJIT_32;
2904 restore_ecx = 1;
2905#else /* !SLJIT_CONFIG_X86_64 */
2906 if (src1_reg != TMP_REG1 && src2_reg != TMP_REG1) {
2907 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_PREF_SHIFT_REG, 0);
2908 restore_ecx = 1;
2909 } else {
2910 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), 0, SLJIT_PREF_SHIFT_REG, 0);
2911 restore_ecx = 2;
2912 }
2913#endif /* SLJIT_CONFIG_X86_64 */
2914 }
2915 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src3, src3w);
2916 }
2917
2918 if (move_src1) {
2919 EMIT_MOV(compiler, dst_reg, 0, src1_reg, src1w);
2920 src1_reg = dst_reg;
2921 src1w = 0;
2922 }
2923 }
2924
2925 inst = emit_x86_instruction(compiler, 2, src2_reg, 0, src1_reg, src1w);
2926 FAIL_IF(!inst);
2927 inst[0] = GROUP_0F;
2928
2929 if (src3 == SLJIT_IMM) {
2930 inst[1] = U8((is_left ? SHLD : SHRD) - 1);
2931
2932 /* Immediate argument is added separately. */
2933 FAIL_IF(emit_byte(compiler, U8(src3w)));
2934 } else
2935 inst[1] = U8(is_left ? SHLD : SHRD);
2936
2937#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2938 if (restore_ecx) {
2939 compiler->mode32 = 0;
2940 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2941 }
2942
2943 if (src1_reg != dst_reg) {
2944 compiler->mode32 = op & SLJIT_32;
2945 return emit_mov(compiler, dst_reg, dstw, src1_reg, 0);
2946 }
2947#else /* !SLJIT_CONFIG_X86_64 */
2948 if (restore_ecx)
2949 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, restore_ecx == 1 ? TMP_REG1 : SLJIT_MEM1(SLJIT_SP), 0);
2950
2951 if (src1_reg != dst_reg)
2952 EMIT_MOV(compiler, dst_reg, dstw, src1_reg, 0);
2953
2954 if (restore_sp4)
2955 return emit_mov(compiler, restore_sp4, 0, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_s32));
2956#endif /* SLJIT_CONFIG_X86_32 */
2957
2958 return SLJIT_SUCCESS;
2959}
2960
2962 sljit_s32 src, sljit_sw srcw)
2963{
2964 CHECK_ERROR();
2965 CHECK(check_sljit_emit_op_src(compiler, op, src, srcw));
2966 ADJUST_LOCAL_OFFSET(src, srcw);
2967
2968 CHECK_EXTRA_REGS(src, srcw, (void)0);
2969
2970 switch (op) {
2971 case SLJIT_FAST_RETURN:
2972 return emit_fast_return(compiler, src, srcw);
2974 /* Don't adjust shadow stack if it isn't enabled. */
2975 if (!cpu_has_shadow_stack ())
2976 return SLJIT_SUCCESS;
2977 return adjust_shadow_stack(compiler, src, srcw);
2978 case SLJIT_PREFETCH_L1:
2979 case SLJIT_PREFETCH_L2:
2980 case SLJIT_PREFETCH_L3:
2982 return emit_prefetch(compiler, op, src, srcw);
2983 }
2984
2985 return SLJIT_SUCCESS;
2986}
2987
2989 sljit_s32 dst, sljit_sw dstw)
2990{
2991 CHECK_ERROR();
2992 CHECK(check_sljit_emit_op_dst(compiler, op, dst, dstw));
2993 ADJUST_LOCAL_OFFSET(dst, dstw);
2994
2995 CHECK_EXTRA_REGS(dst, dstw, (void)0);
2996
2997 switch (op) {
2998 case SLJIT_FAST_ENTER:
2999 return emit_fast_enter(compiler, dst, dstw);
3001 return sljit_emit_get_return_address(compiler, dst, dstw);
3002 }
3003
3004 return SLJIT_SUCCESS;
3005}
3006
3008{
3009 CHECK_REG_INDEX(check_sljit_get_register_index(type, reg));
3010
3011 if (type == SLJIT_GP_REGISTER) {
3012#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
3013 if (reg >= SLJIT_R3 && reg <= SLJIT_R8)
3014 return -1;
3015#endif /* SLJIT_CONFIG_X86_32 */
3016 return reg_map[reg];
3017 }
3018
3020 return -1;
3021
3022 return freg_map[reg];
3023}
3024
3026 void *instruction, sljit_u32 size)
3027{
3028 sljit_u8 *inst;
3029
3030 CHECK_ERROR();
3031 CHECK(check_sljit_emit_op_custom(compiler, instruction, size));
3032
3033 inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
3034 FAIL_IF(!inst);
3035 INC_SIZE(size);
3036 SLJIT_MEMCPY(inst, instruction, size);
3037 return SLJIT_SUCCESS;
3038}
3039
3040/* --------------------------------------------------------------------- */
3041/* Floating point operators */
3042/* --------------------------------------------------------------------- */
3043
3044/* Alignment(3) + 4 * 16 bytes. */
3045static sljit_u32 sse2_data[3 + (4 * 4)];
3046static sljit_u32 *sse2_buffer;
3047
3048static void init_compiler(void)
3049{
3050 get_cpu_features();
3051
3052 /* Align to 16 bytes. */
3053 sse2_buffer = (sljit_u32*)(((sljit_uw)sse2_data + 15) & ~(sljit_uw)0xf);
3054
3055 /* Single precision constants (each constant is 16 byte long). */
3056 sse2_buffer[0] = 0x80000000;
3057 sse2_buffer[4] = 0x7fffffff;
3058 /* Double precision constants (each constant is 16 byte long). */
3059 sse2_buffer[8] = 0;
3060 sse2_buffer[9] = 0x80000000;
3061 sse2_buffer[12] = 0xffffffff;
3062 sse2_buffer[13] = 0x7fffffff;
3063}
3064
3065static sljit_s32 emit_groupf(struct sljit_compiler *compiler,
3066 sljit_uw op,
3067 sljit_s32 dst, sljit_s32 src, sljit_sw srcw)
3068{
3069 sljit_u8 *inst = emit_x86_instruction(compiler, 2 | (op & ~(sljit_uw)0xff), dst, 0, src, srcw);
3070 FAIL_IF(!inst);
3071 inst[0] = GROUP_0F;
3072 inst[1] = op & 0xff;
3073 return SLJIT_SUCCESS;
3074}
3075
3076static sljit_s32 emit_groupf_ext(struct sljit_compiler *compiler,
3077 sljit_uw op,
3078 sljit_s32 dst, sljit_s32 src, sljit_sw srcw)
3079{
3080 sljit_u8 *inst;
3081
3082 SLJIT_ASSERT((op & EX86_SSE2) && ((op & VEX_OP_0F38) || (op & VEX_OP_0F3A)));
3083
3084 inst = emit_x86_instruction(compiler, 3 | (op & ~((sljit_uw)0xff | VEX_OP_0F38 | VEX_OP_0F3A)), dst, 0, src, srcw);
3085 FAIL_IF(!inst);
3086 inst[0] = GROUP_0F;
3087 inst[1] = U8((op & VEX_OP_0F38) ? 0x38 : 0x3A);
3088 inst[2] = op & 0xff;
3089 return SLJIT_SUCCESS;
3090}
3091
3092static SLJIT_INLINE sljit_s32 emit_sse2_load(struct sljit_compiler *compiler,
3093 sljit_s32 single, sljit_s32 dst, sljit_s32 src, sljit_sw srcw)
3094{
3095 return emit_groupf(compiler, MOVSD_x_xm | (single ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2, dst, src, srcw);
3096}
3097
3098static SLJIT_INLINE sljit_s32 emit_sse2_store(struct sljit_compiler *compiler,
3099 sljit_s32 single, sljit_s32 dst, sljit_sw dstw, sljit_s32 src)
3100{
3101 return emit_groupf(compiler, MOVSD_xm_x | (single ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2, src, dst, dstw);
3102}
3103
3104static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op,
3105 sljit_s32 dst, sljit_sw dstw,
3106 sljit_s32 src, sljit_sw srcw)
3107{
3108 sljit_s32 dst_r;
3109
3110 CHECK_EXTRA_REGS(dst, dstw, (void)0);
3111 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
3112
3113#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3115 compiler->mode32 = 0;
3116#endif
3117
3118 FAIL_IF(emit_groupf(compiler, CVTTSD2SI_r_xm | EX86_SELECT_F2_F3(op) | EX86_SSE2_OP2, dst_r, src, srcw));
3119
3120 if (dst & SLJIT_MEM)
3121 return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
3122 return SLJIT_SUCCESS;
3123}
3124
3125static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op,
3126 sljit_s32 dst, sljit_sw dstw,
3127 sljit_s32 src, sljit_sw srcw)
3128{
3129 sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG;
3130
3131 CHECK_EXTRA_REGS(src, srcw, (void)0);
3132
3133#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3135 compiler->mode32 = 0;
3136#endif
3137
3138 if (src == SLJIT_IMM) {
3139#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3141 srcw = (sljit_s32)srcw;
3142#endif
3143 EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
3144 src = TMP_REG1;
3145 srcw = 0;
3146 }
3147
3148 FAIL_IF(emit_groupf(compiler, CVTSI2SD_x_rm | EX86_SELECT_F2_F3(op) | EX86_SSE2_OP1, dst_r, src, srcw));
3149
3150#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3151 compiler->mode32 = 1;
3152#endif
3153 if (dst_r == TMP_FREG)
3154 return emit_sse2_store(compiler, op & SLJIT_32, dst, dstw, TMP_FREG);
3155 return SLJIT_SUCCESS;
3156}
3157
3158static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op,
3159 sljit_s32 src1, sljit_sw src1w,
3160 sljit_s32 src2, sljit_sw src2w)
3161{
3162 switch (GET_FLAG_TYPE(op)) {
3164 /* Also: SLJIT_UNORDERED_OR_NOT_EQUAL */
3165 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src1, src1w));
3166 FAIL_IF(emit_groupf(compiler, CMPS_x_xm | EX86_SELECT_F2_F3(op) | EX86_SSE2, TMP_FREG, src2, src2w));
3167
3168 /* EQ */
3169 FAIL_IF(emit_byte(compiler, 0));
3170
3171 src1 = TMP_FREG;
3172 src2 = TMP_FREG;
3173 src2w = 0;
3174 break;
3175
3176 case SLJIT_ORDERED_LESS:
3178 /* Also: SLJIT_UNORDERED_OR_GREATER_EQUAL, SLJIT_ORDERED_LESS_EQUAL */
3179 if (!FAST_IS_REG(src2)) {
3180 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src2, src2w));
3181 src2 = TMP_FREG;
3182 }
3183
3184 return emit_groupf(compiler, UCOMISD_x_xm | EX86_SELECT_66(op) | EX86_SSE2, src2, src1, src1w);
3185 }
3186
3187 if (!FAST_IS_REG(src1)) {
3188 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src1, src1w));
3189 src1 = TMP_FREG;
3190 }
3191
3192 return emit_groupf(compiler, UCOMISD_x_xm | EX86_SELECT_66(op) | EX86_SSE2, src1, src2, src2w);
3193}
3194
3196 sljit_s32 dst, sljit_sw dstw,
3197 sljit_s32 src, sljit_sw srcw)
3198{
3199 sljit_s32 dst_r;
3200 sljit_u8 *inst;
3201
3202#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3203 compiler->mode32 = 1;
3204#endif
3205
3206 CHECK_ERROR();
3207 SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw);
3208
3209 if (GET_OPCODE(op) == SLJIT_MOV_F64) {
3210 if (FAST_IS_REG(dst))
3211 return emit_sse2_load(compiler, op & SLJIT_32, dst, src, srcw);
3212 if (FAST_IS_REG(src))
3213 return emit_sse2_store(compiler, op & SLJIT_32, dst, dstw, src);
3214 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src, srcw));
3215 return emit_sse2_store(compiler, op & SLJIT_32, dst, dstw, TMP_FREG);
3216 }
3217
3219 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG;
3220 if (FAST_IS_REG(src)) {
3221 /* We overwrite the high bits of source. From SLJIT point of view,
3222 this is not an issue.
3223 Note: In SSE3, we could also use MOVDDUP and MOVSLDUP. */
3224 FAIL_IF(emit_groupf(compiler, UNPCKLPD_x_xm | ((op & SLJIT_32) ? EX86_PREF_66 : 0) | EX86_SSE2, src, src, 0));
3225 } else {
3226 FAIL_IF(emit_sse2_load(compiler, !(op & SLJIT_32), TMP_FREG, src, srcw));
3227 src = TMP_FREG;
3228 }
3229
3230 FAIL_IF(emit_groupf(compiler, CVTPD2PS_x_xm | ((op & SLJIT_32) ? EX86_PREF_66 : 0) | EX86_SSE2, dst_r, src, 0));
3231 if (dst_r == TMP_FREG)
3232 return emit_sse2_store(compiler, op & SLJIT_32, dst, dstw, TMP_FREG);
3233 return SLJIT_SUCCESS;
3234 }
3235
3236 if (FAST_IS_REG(dst)) {
3237 dst_r = (dst == src) ? TMP_FREG : dst;
3238
3239 if (src & SLJIT_MEM)
3240 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src, srcw));
3241
3242 FAIL_IF(emit_groupf(compiler, PCMPEQD_x_xm | EX86_PREF_66 | EX86_SSE2, dst_r, dst_r, 0));
3243
3244 inst = emit_x86_instruction(compiler, 2 | EX86_PREF_66 | EX86_SSE2_OP2, 0, 0, dst_r, 0);
3245 inst[0] = GROUP_0F;
3246 /* Same as PSRLD_x / PSRLQ_x */
3247 inst[1] = (op & SLJIT_32) ? PSLLD_x_i8 : PSLLQ_x_i8;
3248
3249 if (GET_OPCODE(op) == SLJIT_ABS_F64) {
3250 inst[2] |= 2 << 3;
3251 FAIL_IF(emit_byte(compiler, 1));
3252 } else {
3253 inst[2] |= 6 << 3;
3254 FAIL_IF(emit_byte(compiler, ((op & SLJIT_32) ? 31 : 63)));
3255 }
3256
3257 if (dst_r != TMP_FREG)
3258 dst_r = (src & SLJIT_MEM) ? TMP_FREG : src;
3259 return emit_groupf(compiler, (GET_OPCODE(op) == SLJIT_NEG_F64 ? XORPD_x_xm : ANDPD_x_xm) | EX86_SSE2, dst, dst_r, 0);
3260 }
3261
3262 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src, srcw));
3263
3264 switch (GET_OPCODE(op)) {
3265 case SLJIT_NEG_F64:
3266 FAIL_IF(emit_groupf(compiler, XORPD_x_xm | EX86_SELECT_66(op) | EX86_SSE2, TMP_FREG, SLJIT_MEM0(), (sljit_sw)((op & SLJIT_32) ? sse2_buffer : sse2_buffer + 8)));
3267 break;
3268
3269 case SLJIT_ABS_F64:
3270 FAIL_IF(emit_groupf(compiler, ANDPD_x_xm | EX86_SELECT_66(op) | EX86_SSE2, TMP_FREG, SLJIT_MEM0(), (sljit_sw)((op & SLJIT_32) ? sse2_buffer + 4 : sse2_buffer + 12)));
3271 break;
3272 }
3273
3274 return emit_sse2_store(compiler, op & SLJIT_32, dst, dstw, TMP_FREG);
3275}
3276
3278 sljit_s32 dst, sljit_sw dstw,
3279 sljit_s32 src1, sljit_sw src1w,
3280 sljit_s32 src2, sljit_sw src2w)
3281{
3282 sljit_s32 dst_r;
3283
3284 CHECK_ERROR();
3285 CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
3286 ADJUST_LOCAL_OFFSET(dst, dstw);
3287 ADJUST_LOCAL_OFFSET(src1, src1w);
3288 ADJUST_LOCAL_OFFSET(src2, src2w);
3289
3290#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3291 compiler->mode32 = 1;
3292#endif
3293
3294 if (FAST_IS_REG(dst)) {
3295 dst_r = dst;
3296 if (dst == src1)
3297 ; /* Do nothing here. */
3298 else if (dst == src2 && (GET_OPCODE(op) == SLJIT_ADD_F64 || GET_OPCODE(op) == SLJIT_MUL_F64)) {
3299 /* Swap arguments. */
3300 src2 = src1;
3301 src2w = src1w;
3302 } else if (dst != src2)
3303 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, dst_r, src1, src1w));
3304 else {
3305 dst_r = TMP_FREG;
3306 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src1, src1w));
3307 }
3308 } else {
3309 dst_r = TMP_FREG;
3310 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src1, src1w));
3311 }
3312
3313 switch (GET_OPCODE(op)) {
3314 case SLJIT_ADD_F64:
3315 FAIL_IF(emit_groupf(compiler, ADDSD_x_xm | EX86_SELECT_F2_F3(op) | EX86_SSE2, dst_r, src2, src2w));
3316 break;
3317
3318 case SLJIT_SUB_F64:
3319 FAIL_IF(emit_groupf(compiler, SUBSD_x_xm | EX86_SELECT_F2_F3(op) | EX86_SSE2, dst_r, src2, src2w));
3320 break;
3321
3322 case SLJIT_MUL_F64:
3323 FAIL_IF(emit_groupf(compiler, MULSD_x_xm | EX86_SELECT_F2_F3(op) | EX86_SSE2, dst_r, src2, src2w));
3324 break;
3325
3326 case SLJIT_DIV_F64:
3327 FAIL_IF(emit_groupf(compiler, DIVSD_x_xm | EX86_SELECT_F2_F3(op) | EX86_SSE2, dst_r, src2, src2w));
3328 break;
3329 }
3330
3331 if (dst_r != dst)
3332 return emit_sse2_store(compiler, op & SLJIT_32, dst, dstw, TMP_FREG);
3333 return SLJIT_SUCCESS;
3334}
3335
3337 sljit_s32 dst_freg,
3338 sljit_s32 src1, sljit_sw src1w,
3339 sljit_s32 src2, sljit_sw src2w)
3340{
3341 sljit_uw pref;
3342
3343 CHECK_ERROR();
3344 CHECK(check_sljit_emit_fop2r(compiler, op, dst_freg, src1, src1w, src2, src2w));
3345 ADJUST_LOCAL_OFFSET(src1, src1w);
3346 ADJUST_LOCAL_OFFSET(src2, src2w);
3347
3348#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3349 compiler->mode32 = 1;
3350#endif
3351
3352 if (dst_freg == src1) {
3353 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src2, src2w));
3354 pref = EX86_SELECT_66(op) | EX86_SSE2;
3355 FAIL_IF(emit_groupf(compiler, XORPD_x_xm | pref, TMP_FREG, src1, src1w));
3356 FAIL_IF(emit_groupf(compiler, ANDPD_x_xm | pref, TMP_FREG, SLJIT_MEM0(), (sljit_sw)((op & SLJIT_32) ? sse2_buffer : sse2_buffer + 8)));
3357 return emit_groupf(compiler, XORPD_x_xm | pref, dst_freg, TMP_FREG, 0);
3358 }
3359
3360 if (src1 & SLJIT_MEM) {
3361 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src1, src1w));
3362 src1 = TMP_FREG;
3363 src1w = 0;
3364 }
3365
3366 if (dst_freg != src2)
3367 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, dst_freg, src2, src2w));
3368
3369 pref = EX86_SELECT_66(op) | EX86_SSE2;
3370 FAIL_IF(emit_groupf(compiler, XORPD_x_xm | pref, dst_freg, src1, src1w));
3371 FAIL_IF(emit_groupf(compiler, ANDPD_x_xm | pref, dst_freg, SLJIT_MEM0(), (sljit_sw)((op & SLJIT_32) ? sse2_buffer : sse2_buffer + 8)));
3372 return emit_groupf(compiler, XORPD_x_xm | pref, dst_freg, src1, src1w);
3373}
3374
3375/* --------------------------------------------------------------------- */
3376/* Conditional instructions */
3377/* --------------------------------------------------------------------- */
3378
3380{
3381 sljit_u8 *inst;
3382 struct sljit_label *label;
3383
3385 CHECK_PTR(check_sljit_emit_label(compiler));
3386
3387 if (compiler->last_label && compiler->last_label->size == compiler->size)
3388 return compiler->last_label;
3389
3390 label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label));
3391 PTR_FAIL_IF(!label);
3392 set_label(label, compiler);
3393
3394 inst = (sljit_u8*)ensure_buf(compiler, 1);
3395 PTR_FAIL_IF(!inst);
3396 inst[0] = SLJIT_INST_LABEL;
3397
3398 return label;
3399}
3400
3402{
3403 sljit_u8 *inst;
3404 struct sljit_jump *jump;
3405
3407 CHECK_PTR(check_sljit_emit_jump(compiler, type));
3408
3409 jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
3410 PTR_FAIL_IF_NULL(jump);
3411 set_jump(jump, compiler, (sljit_u32)((type & SLJIT_REWRITABLE_JUMP) | ((type & 0xff) << TYPE_SHIFT)));
3412 type &= 0xff;
3413
3414 jump->addr = compiler->size;
3415 /* Worst case size. */
3416 compiler->size += (type >= SLJIT_JUMP) ? JUMP_MAX_SIZE : CJUMP_MAX_SIZE;
3417 inst = (sljit_u8*)ensure_buf(compiler, 1);
3418 PTR_FAIL_IF_NULL(inst);
3419
3420 inst[0] = SLJIT_INST_JUMP;
3421 return jump;
3422}
3423
3425{
3426 sljit_u8 *inst;
3427 struct sljit_jump *jump;
3428
3429 CHECK_ERROR();
3430 CHECK(check_sljit_emit_ijump(compiler, type, src, srcw));
3431 ADJUST_LOCAL_OFFSET(src, srcw);
3432
3433 CHECK_EXTRA_REGS(src, srcw, (void)0);
3434
3435 if (src == SLJIT_IMM) {
3436 jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
3437 FAIL_IF_NULL(jump);
3438 set_jump(jump, compiler, (sljit_u32)(JUMP_ADDR | (type << TYPE_SHIFT)));
3439 jump->u.target = (sljit_uw)srcw;
3440
3441 jump->addr = compiler->size;
3442 /* Worst case size. */
3443 compiler->size += JUMP_MAX_SIZE;
3444 inst = (sljit_u8*)ensure_buf(compiler, 1);
3445 FAIL_IF_NULL(inst);
3446
3447 inst[0] = SLJIT_INST_JUMP;
3448 } else {
3449#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3450 /* REX_W is not necessary (src is not immediate). */
3451 compiler->mode32 = 1;
3452#endif
3453 inst = emit_x86_instruction(compiler, 1, 0, 0, src, srcw);
3454 FAIL_IF(!inst);
3455 inst[0] = GROUP_FF;
3456 inst[1] = U8(inst[1] | ((type >= SLJIT_FAST_CALL) ? CALL_rm : JMP_rm));
3457 }
3458 return SLJIT_SUCCESS;
3459}
3460
3462 sljit_s32 dst, sljit_sw dstw,
3464{
3465 sljit_u8 *inst;
3466 sljit_u8 cond_set;
3467#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3468 sljit_s32 reg;
3469#endif /* !SLJIT_CONFIG_X86_64 */
3470 /* ADJUST_LOCAL_OFFSET and CHECK_EXTRA_REGS might overwrite these values. */
3471 sljit_s32 dst_save = dst;
3472 sljit_sw dstw_save = dstw;
3473
3474 CHECK_ERROR();
3475 CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, type));
3476
3477 ADJUST_LOCAL_OFFSET(dst, dstw);
3478 CHECK_EXTRA_REGS(dst, dstw, (void)0);
3479
3480 /* setcc = jcc + 0x10. */
3481 cond_set = U8(get_jump_code((sljit_uw)type) + 0x10);
3482
3483#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3484 if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && FAST_IS_REG(dst)) {
3485 inst = (sljit_u8*)ensure_buf(compiler, 1 + 4 + 3);
3486 FAIL_IF(!inst);
3487 INC_SIZE(4 + 3);
3488 /* Set low register to conditional flag. */
3489 inst[0] = (reg_map[TMP_REG1] <= 7) ? REX : REX_B;
3490 inst[1] = GROUP_0F;
3491 inst[2] = cond_set;
3492 inst[3] = MOD_REG | reg_lmap[TMP_REG1];
3493 inst[4] = U8(REX | (reg_map[TMP_REG1] <= 7 ? 0 : REX_R) | (reg_map[dst] <= 7 ? 0 : REX_B));
3494 inst[5] = OR_rm8_r8;
3495 inst[6] = U8(MOD_REG | (reg_lmap[TMP_REG1] << 3) | reg_lmap[dst]);
3496 return SLJIT_SUCCESS;
3497 }
3498
3499 reg = (GET_OPCODE(op) < SLJIT_ADD && FAST_IS_REG(dst)) ? dst : TMP_REG1;
3500
3501 inst = (sljit_u8*)ensure_buf(compiler, 1 + 4 + 4);
3502 FAIL_IF(!inst);
3503 INC_SIZE(4 + 4);
3504 /* Set low register to conditional flag. */
3505 inst[0] = (reg_map[reg] <= 7) ? REX : REX_B;
3506 inst[1] = GROUP_0F;
3507 inst[2] = cond_set;
3508 inst[3] = MOD_REG | reg_lmap[reg];
3509 inst[4] = REX_W | (reg_map[reg] <= 7 ? 0 : (REX_B | REX_R));
3510 /* The movzx instruction does not affect flags. */
3511 inst[5] = GROUP_0F;
3512 inst[6] = MOVZX_r_rm8;
3513 inst[7] = U8(MOD_REG | (reg_lmap[reg] << 3) | reg_lmap[reg]);
3514
3515 if (reg != TMP_REG1)
3516 return SLJIT_SUCCESS;
3517
3518 if (GET_OPCODE(op) < SLJIT_ADD) {
3519 compiler->mode32 = GET_OPCODE(op) != SLJIT_MOV;
3520 return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
3521 }
3522
3523 SLJIT_SKIP_CHECKS(compiler);
3524 return sljit_emit_op2(compiler, op, dst_save, dstw_save, dst_save, dstw_save, TMP_REG1, 0);
3525
3526#else /* !SLJIT_CONFIG_X86_64 */
3527 SLJIT_ASSERT(reg_map[TMP_REG1] < 4);
3528
3529 /* The SLJIT_CONFIG_X86_32 code path starts here. */
3530 if (GET_OPCODE(op) < SLJIT_ADD && FAST_IS_REG(dst) && reg_map[dst] <= 4) {
3531 /* Low byte is accessible. */
3532 inst = (sljit_u8*)ensure_buf(compiler, 1 + 3 + 3);
3533 FAIL_IF(!inst);
3534 INC_SIZE(3 + 3);
3535 /* Set low byte to conditional flag. */
3536 inst[0] = GROUP_0F;
3537 inst[1] = cond_set;
3538 inst[2] = U8(MOD_REG | reg_map[dst]);
3539
3540 inst[3] = GROUP_0F;
3541 inst[4] = MOVZX_r_rm8;
3542 inst[5] = U8(MOD_REG | (reg_map[dst] << 3) | reg_map[dst]);
3543 return SLJIT_SUCCESS;
3544 }
3545
3546 if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && FAST_IS_REG(dst) && reg_map[dst] <= 4) {
3547 inst = (sljit_u8*)ensure_buf(compiler, 1 + 3 + 2);
3548 FAIL_IF(!inst);
3549 INC_SIZE(3 + 2);
3550
3551 /* Set low byte to conditional flag. */
3552 inst[0] = GROUP_0F;
3553 inst[1] = cond_set;
3554 inst[2] = U8(MOD_REG | reg_map[TMP_REG1]);
3555
3556 inst[3] = OR_rm8_r8;
3557 inst[4] = U8(MOD_REG | (reg_map[TMP_REG1] << 3) | reg_map[dst]);
3558 return SLJIT_SUCCESS;
3559 }
3560
3561 inst = (sljit_u8*)ensure_buf(compiler, 1 + 3 + 3);
3562 FAIL_IF(!inst);
3563 INC_SIZE(3 + 3);
3564 /* Set low byte to conditional flag. */
3565 inst[0] = GROUP_0F;
3566 inst[1] = cond_set;
3567 inst[2] = U8(MOD_REG | reg_map[TMP_REG1]);
3568
3569 inst[3] = GROUP_0F;
3570 inst[4] = MOVZX_r_rm8;
3571 inst[5] = U8(MOD_REG | (reg_map[TMP_REG1] << 3) | reg_map[TMP_REG1]);
3572
3573 if (GET_OPCODE(op) < SLJIT_ADD)
3574 return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
3575
3576 SLJIT_SKIP_CHECKS(compiler);
3577 return sljit_emit_op2(compiler, op, dst_save, dstw_save, dst_save, dstw_save, TMP_REG1, 0);
3578#endif /* SLJIT_CONFIG_X86_64 */
3579}
3580
3582 sljit_s32 dst_freg,
3583 sljit_s32 src1, sljit_sw src1w,
3584 sljit_s32 src2_freg)
3585{
3586 sljit_u8* inst;
3587 sljit_uw size;
3588
3589 CHECK_ERROR();
3590 CHECK(check_sljit_emit_fselect(compiler, type, dst_freg, src1, src1w, src2_freg));
3591
3592 ADJUST_LOCAL_OFFSET(src1, src1w);
3593
3594#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3595 compiler->mode32 = 1;
3596#endif /* SLJIT_CONFIG_X86_64 */
3597
3598 if (dst_freg != src2_freg) {
3599 if (dst_freg == src1) {
3600 src1 = src2_freg;
3601 src1w = 0;
3602 type ^= 0x1;
3603 } else
3604 FAIL_IF(emit_sse2_load(compiler, type & SLJIT_32, dst_freg, src2_freg, 0));
3605 }
3606
3607 inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
3608 FAIL_IF(!inst);
3609 INC_SIZE(2);
3610 inst[0] = U8(get_jump_code((sljit_uw)(type & ~SLJIT_32) ^ 0x1) - 0x10);
3611
3612 size = compiler->size;
3613 FAIL_IF(emit_sse2_load(compiler, type & SLJIT_32, dst_freg, src1, src1w));
3614
3615 inst[1] = U8(compiler->size - size);
3616 return SLJIT_SUCCESS;
3617}
3618
3620 sljit_s32 freg,
3621 sljit_s32 srcdst, sljit_sw srcdstw)
3622{
3623 sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
3624 sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
3625 sljit_s32 alignment = SLJIT_SIMD_GET_ELEM2_SIZE(type);
3626 sljit_uw op;
3627
3628 CHECK_ERROR();
3629 CHECK(check_sljit_emit_simd_mov(compiler, type, freg, srcdst, srcdstw));
3630
3631 ADJUST_LOCAL_OFFSET(srcdst, srcdstw);
3632
3633#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3634 compiler->mode32 = 1;
3635#endif /* SLJIT_CONFIG_X86_64 */
3636
3637 switch (reg_size) {
3638 case 4:
3639 op = EX86_SSE2;
3640 break;
3641 case 5:
3642 if (!(cpu_feature_list & CPU_FEATURE_AVX2))
3643 return SLJIT_ERR_UNSUPPORTED;
3644 op = EX86_SSE2 | VEX_256;
3645 break;
3646 default:
3647 return SLJIT_ERR_UNSUPPORTED;
3648 }
3649
3650 if (!(srcdst & SLJIT_MEM))
3651 alignment = reg_size;
3652
3653 if (type & SLJIT_SIMD_FLOAT) {
3654 if (elem_size == 2 || elem_size == 3) {
3655 op |= alignment >= reg_size ? MOVAPS_x_xm : MOVUPS_x_xm;
3656
3657 if (elem_size == 3)
3658 op |= EX86_PREF_66;
3659
3660 if (type & SLJIT_SIMD_STORE)
3661 op += 1;
3662 } else
3663 return SLJIT_ERR_UNSUPPORTED;
3664 } else {
3666 | (alignment >= reg_size ? EX86_PREF_66 : EX86_PREF_F3);
3667 }
3668
3669 if (type & SLJIT_SIMD_TEST)
3670 return SLJIT_SUCCESS;
3671
3672 if ((op & VEX_256) || ((cpu_feature_list & CPU_FEATURE_AVX) && (compiler->options & SLJIT_ENTER_USE_VEX)))
3673 return emit_vex_instruction(compiler, op, freg, 0, srcdst, srcdstw);
3674
3675 return emit_groupf(compiler, op, freg, srcdst, srcdstw);
3676}
3677
3679 sljit_s32 freg,
3680 sljit_s32 src, sljit_sw srcw)
3681{
3682 sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
3683 sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
3684 sljit_s32 use_vex = (cpu_feature_list & CPU_FEATURE_AVX) && (compiler->options & SLJIT_ENTER_USE_VEX);
3685 sljit_u8 *inst;
3686 sljit_u8 opcode = 0;
3687 sljit_uw op;
3688
3689 CHECK_ERROR();
3690 CHECK(check_sljit_emit_simd_replicate(compiler, type, freg, src, srcw));
3691
3692 ADJUST_LOCAL_OFFSET(src, srcw);
3693
3694 if (!(type & SLJIT_SIMD_FLOAT)) {
3695 CHECK_EXTRA_REGS(src, srcw, (void)0);
3696 }
3697
3698#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
3699 if ((type & SLJIT_SIMD_FLOAT) ? (elem_size < 2 || elem_size > 3) : (elem_size > 2))
3700 return SLJIT_ERR_UNSUPPORTED;
3701#else /* !SLJIT_CONFIG_X86_32 */
3702 compiler->mode32 = 1;
3703
3704 if (elem_size > 3 || ((type & SLJIT_SIMD_FLOAT) && elem_size < 2))
3705 return SLJIT_ERR_UNSUPPORTED;
3706#endif /* SLJIT_CONFIG_X86_32 */
3707
3708 if (reg_size != 4 && (reg_size != 5 || !(cpu_feature_list & CPU_FEATURE_AVX2)))
3709 return SLJIT_ERR_UNSUPPORTED;
3710
3711 if (type & SLJIT_SIMD_TEST)
3712 return SLJIT_SUCCESS;
3713
3714 if (reg_size == 5)
3715 use_vex = 1;
3716
3717 if (use_vex && src != SLJIT_IMM) {
3718 op = 0;
3719
3720 switch (elem_size) {
3721 case 0:
3722 if (cpu_feature_list & CPU_FEATURE_AVX2)
3724 break;
3725 case 1:
3726 if (cpu_feature_list & CPU_FEATURE_AVX2)
3728 break;
3729 case 2:
3730 if (type & SLJIT_SIMD_FLOAT) {
3731 if ((cpu_feature_list & CPU_FEATURE_AVX2) || ((cpu_feature_list & CPU_FEATURE_AVX) && (src & SLJIT_MEM)))
3733 } else if (cpu_feature_list & CPU_FEATURE_AVX2)
3735 break;
3736 default:
3737#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3738 if (!(type & SLJIT_SIMD_FLOAT)) {
3739 if (cpu_feature_list & CPU_FEATURE_AVX2)
3741 break;
3742 }
3743#endif /* SLJIT_CONFIG_X86_64 */
3744
3745 if (reg_size == 5)
3747 break;
3748 }
3749
3750 if (op != 0) {
3751 if (!(src & SLJIT_MEM) && !(type & SLJIT_SIMD_FLOAT)) {
3752#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3753 if (elem_size >= 3)
3754 compiler->mode32 = 0;
3755#endif /* SLJIT_CONFIG_X86_64 */
3756 FAIL_IF(emit_vex_instruction(compiler, MOVD_x_rm | VEX_AUTO_W | EX86_PREF_66 | EX86_SSE2_OP1, freg, 0, src, srcw));
3757#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3758 compiler->mode32 = 1;
3759#endif /* SLJIT_CONFIG_X86_64 */
3760 src = freg;
3761 srcw = 0;
3762 }
3763
3764 if (reg_size == 5)
3765 op |= VEX_256;
3766
3767 return emit_vex_instruction(compiler, op, freg, 0, src, srcw);
3768 }
3769 }
3770
3771 if (type & SLJIT_SIMD_FLOAT) {
3772 if (src == SLJIT_IMM) {
3773 if (use_vex)
3774 return emit_vex_instruction(compiler, XORPD_x_xm | (reg_size == 5 ? VEX_256 : 0) | (elem_size == 3 ? EX86_PREF_66 : 0) | EX86_SSE2 | VEX_SSE2_OPV, freg, freg, freg, 0);
3775
3776 return emit_groupf(compiler, XORPD_x_xm | (elem_size == 3 ? EX86_PREF_66 : 0) | EX86_SSE2, freg, freg, 0);
3777 }
3778
3779 SLJIT_ASSERT(reg_size == 4);
3780
3781 if (use_vex) {
3782 if (elem_size == 3)
3783 return emit_vex_instruction(compiler, MOVDDUP_x_xm | EX86_PREF_F2 | EX86_SSE2, freg, 0, src, srcw);
3784
3785 SLJIT_ASSERT(!(src & SLJIT_MEM));
3786 FAIL_IF(emit_vex_instruction(compiler, SHUFPS_x_xm | EX86_SSE2 | VEX_SSE2_OPV, freg, src, src, 0));
3787 return emit_byte(compiler, 0);
3788 }
3789
3790 if (elem_size == 2 && freg != src) {
3791 FAIL_IF(emit_sse2_load(compiler, 1, freg, src, srcw));
3792 src = freg;
3793 srcw = 0;
3794 }
3795
3796 op = (elem_size == 2 ? SHUFPS_x_xm : MOVDDUP_x_xm) | (elem_size == 2 ? 0 : EX86_PREF_F2) | EX86_SSE2;
3797 FAIL_IF(emit_groupf(compiler, op, freg, src, srcw));
3798
3799 if (elem_size == 2)
3800 return emit_byte(compiler, 0);
3801 return SLJIT_SUCCESS;
3802 }
3803
3804 if (src == SLJIT_IMM) {
3805 if (elem_size == 0) {
3806 srcw = (sljit_u8)srcw;
3807 srcw |= srcw << 8;
3808 srcw |= srcw << 16;
3809 elem_size = 2;
3810 } else if (elem_size == 1) {
3811 srcw = (sljit_u16)srcw;
3812 srcw |= srcw << 16;
3813 elem_size = 2;
3814 }
3815
3816#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3817 if (elem_size == 2 && (sljit_s32)srcw == -1)
3818 srcw = -1;
3819#endif /* SLJIT_CONFIG_X86_64 */
3820
3821 if (srcw == 0 || srcw == -1) {
3822 if (use_vex)
3823 return emit_vex_instruction(compiler, (srcw == 0 ? PXOR_x_xm : PCMPEQD_x_xm) | (reg_size == 5 ? VEX_256 : 0) | EX86_PREF_66 | EX86_SSE2 | VEX_SSE2_OPV, freg, freg, freg, 0);
3824
3825 return emit_groupf(compiler, (srcw == 0 ? PXOR_x_xm : PCMPEQD_x_xm) | EX86_PREF_66 | EX86_SSE2, freg, freg, 0);
3826 }
3827
3828#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3829 if (elem_size == 3)
3830 FAIL_IF(emit_load_imm64(compiler, TMP_REG1, srcw));
3831 else
3832#endif /* SLJIT_CONFIG_X86_64 */
3833 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, srcw);
3834
3835 src = TMP_REG1;
3836 srcw = 0;
3837
3838 }
3839
3840 op = 2;
3841 opcode = MOVD_x_rm;
3842
3843 switch (elem_size) {
3844 case 0:
3845 if (!FAST_IS_REG(src)) {
3846 opcode = 0x3a /* Prefix of PINSRB_x_rm_i8. */;
3847 op = 3;
3848 }
3849 break;
3850 case 1:
3851 if (!FAST_IS_REG(src))
3852 opcode = PINSRW_x_rm_i8;
3853 break;
3854 case 2:
3855 break;
3856#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3857 case 3:
3858 /* MOVQ */
3859 compiler->mode32 = 0;
3860 break;
3861#endif /* SLJIT_CONFIG_X86_64 */
3862 }
3863
3864 if (use_vex) {
3865 if (opcode != MOVD_x_rm) {
3866 op = (opcode == 0x3a) ? (PINSRB_x_rm_i8 | VEX_OP_0F3A) : opcode;
3867 FAIL_IF(emit_vex_instruction(compiler, op | EX86_PREF_66 | EX86_SSE2_OP1 | VEX_SSE2_OPV, freg, freg, src, srcw));
3868 } else
3869 FAIL_IF(emit_vex_instruction(compiler, MOVD_x_rm | VEX_AUTO_W | EX86_PREF_66 | EX86_SSE2_OP1, freg, 0, src, srcw));
3870 } else {
3871 inst = emit_x86_instruction(compiler, op | EX86_PREF_66 | EX86_SSE2_OP1, freg, 0, src, srcw);
3872 FAIL_IF(!inst);
3873 inst[0] = GROUP_0F;
3874 inst[1] = opcode;
3875
3876 if (op == 3) {
3877 SLJIT_ASSERT(opcode == 0x3a);
3878 inst[2] = PINSRB_x_rm_i8;
3879 }
3880 }
3881
3882 if (use_vex && elem_size >= 2) {
3883#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
3884 op = VPBROADCASTD_x_xm;
3885#else /* !SLJIT_CONFIG_X86_32 */
3886 op = (elem_size == 3) ? VPBROADCASTQ_x_xm : VPBROADCASTD_x_xm;
3887#endif /* SLJIT_CONFIG_X86_32 */
3888 return emit_vex_instruction(compiler, op | ((reg_size == 5) ? VEX_256 : 0) | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, freg, 0, freg, 0);
3889 }
3890
3891 SLJIT_ASSERT(reg_size == 4);
3892
3893 if (opcode != MOVD_x_rm)
3894 FAIL_IF(emit_byte(compiler, 0));
3895
3896 switch (elem_size) {
3897 case 0:
3898 if (use_vex) {
3899 FAIL_IF(emit_vex_instruction(compiler, PXOR_x_xm | EX86_PREF_66 | EX86_SSE2 | VEX_SSE2_OPV, TMP_FREG, TMP_FREG, TMP_FREG, 0));
3900 return emit_vex_instruction(compiler, PSHUFB_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2 | VEX_SSE2_OPV, freg, freg, TMP_FREG, 0);
3901 }
3902 FAIL_IF(emit_groupf(compiler, PXOR_x_xm | EX86_PREF_66 | EX86_SSE2, TMP_FREG, TMP_FREG, 0));
3903 return emit_groupf_ext(compiler, PSHUFB_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, freg, TMP_FREG, 0);
3904 case 1:
3905 if (use_vex)
3906 FAIL_IF(emit_vex_instruction(compiler, PSHUFLW_x_xm | EX86_PREF_F2 | EX86_SSE2, freg, 0, freg, 0));
3907 else
3908 FAIL_IF(emit_groupf(compiler, PSHUFLW_x_xm | EX86_PREF_F2 | EX86_SSE2, freg, freg, 0));
3909 FAIL_IF(emit_byte(compiler, 0));
3910 /* fallthrough */
3911 default:
3912 if (use_vex)
3913 FAIL_IF(emit_vex_instruction(compiler, PSHUFD_x_xm | EX86_PREF_66 | EX86_SSE2, freg, 0, freg, 0));
3914 else
3915 FAIL_IF(emit_groupf(compiler, PSHUFD_x_xm | EX86_PREF_66 | EX86_SSE2, freg, freg, 0));
3916 return emit_byte(compiler, 0);
3917#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3918 case 3:
3919 compiler->mode32 = 1;
3920 if (use_vex)
3921 FAIL_IF(emit_vex_instruction(compiler, PSHUFD_x_xm | EX86_PREF_66 | EX86_SSE2, freg, 0, freg, 0));
3922 else
3923 FAIL_IF(emit_groupf(compiler, PSHUFD_x_xm | EX86_PREF_66 | EX86_SSE2, freg, freg, 0));
3924 return emit_byte(compiler, 0x44);
3925#endif /* SLJIT_CONFIG_X86_64 */
3926 }
3927}
3928
3930 sljit_s32 freg, sljit_s32 lane_index,
3931 sljit_s32 srcdst, sljit_sw srcdstw)
3932{
3933 sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
3934 sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
3935 sljit_s32 use_vex = (cpu_feature_list & CPU_FEATURE_AVX) && (compiler->options & SLJIT_ENTER_USE_VEX);
3936 sljit_u8 *inst;
3937 sljit_u8 opcode = 0;
3938 sljit_uw op;
3939 sljit_s32 freg_orig = freg;
3940#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
3941 sljit_s32 srcdst_is_ereg = 0;
3942 sljit_s32 srcdst_orig = 0;
3943 sljit_sw srcdstw_orig = 0;
3944#endif /* SLJIT_CONFIG_X86_32 */
3945
3946 CHECK_ERROR();
3947 CHECK(check_sljit_emit_simd_lane_mov(compiler, type, freg, lane_index, srcdst, srcdstw));
3948
3949 ADJUST_LOCAL_OFFSET(srcdst, srcdstw);
3950
3951 if (reg_size == 5) {
3952 if (!(cpu_feature_list & CPU_FEATURE_AVX2))
3953 return SLJIT_ERR_UNSUPPORTED;
3954 use_vex = 1;
3955 } else if (reg_size != 4)
3956 return SLJIT_ERR_UNSUPPORTED;
3957
3958#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
3959 if ((type & SLJIT_SIMD_FLOAT) ? (elem_size < 2 || elem_size > 3) : elem_size > 2)
3960 return SLJIT_ERR_UNSUPPORTED;
3961#else /* SLJIT_CONFIG_X86_32 */
3962 if (elem_size > 3 || ((type & SLJIT_SIMD_FLOAT) && elem_size < 2))
3963 return SLJIT_ERR_UNSUPPORTED;
3964#endif /* SLJIT_CONFIG_X86_32 */
3965
3966 if (type & SLJIT_SIMD_TEST)
3967 return SLJIT_SUCCESS;
3968
3969#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3970 compiler->mode32 = 1;
3971#else /* !SLJIT_CONFIG_X86_64 */
3972 if (!(type & SLJIT_SIMD_FLOAT)) {
3973 CHECK_EXTRA_REGS(srcdst, srcdstw, srcdst_is_ereg = 1);
3974
3975 if ((type & SLJIT_SIMD_STORE) && ((srcdst_is_ereg && elem_size < 2) || (elem_size == 0 && (type & SLJIT_SIMD_LANE_SIGNED) && FAST_IS_REG(srcdst) && reg_map[srcdst] >= 4))) {
3976 srcdst_orig = srcdst;
3977 srcdstw_orig = srcdstw;
3978 srcdst = TMP_REG1;
3979 srcdstw = 0;
3980 }
3981 }
3982#endif /* SLJIT_CONFIG_X86_64 */
3983
3984 if (type & SLJIT_SIMD_LANE_ZERO) {
3985 if (lane_index == 0) {
3986 if (!(type & SLJIT_SIMD_FLOAT)) {
3987#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3988 if (elem_size == 3) {
3989 compiler->mode32 = 0;
3990 elem_size = 2;
3991 }
3992#endif /* SLJIT_CONFIG_X86_64 */
3993 if (srcdst == SLJIT_IMM) {
3994 if (elem_size == 0)
3995 srcdstw = (sljit_u8)srcdstw;
3996 else if (elem_size == 1)
3997 srcdstw = (sljit_u16)srcdstw;
3998
3999 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, srcdstw);
4000 srcdst = TMP_REG1;
4001 srcdstw = 0;
4002 elem_size = 2;
4003 }
4004
4005 if (elem_size == 2) {
4006 if (use_vex)
4007 return emit_vex_instruction(compiler, MOVD_x_rm | VEX_AUTO_W | EX86_PREF_66 | EX86_SSE2_OP1, freg, 0, srcdst, srcdstw);
4008 return emit_groupf(compiler, MOVD_x_rm | EX86_PREF_66 | EX86_SSE2_OP1, freg, srcdst, srcdstw);
4009 }
4010 } else if (srcdst & SLJIT_MEM) {
4011 SLJIT_ASSERT(elem_size == 2 || elem_size == 3);
4012
4013 if (use_vex)
4014 return emit_vex_instruction(compiler, MOVSD_x_xm | (elem_size == 2 ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2, freg, 0, srcdst, srcdstw);
4015 return emit_groupf(compiler, MOVSD_x_xm | (elem_size == 2 ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2, freg, srcdst, srcdstw);
4016 } else if (elem_size == 3) {
4017 if (use_vex)
4018 return emit_vex_instruction(compiler, MOVQ_x_xm | EX86_PREF_F3 | EX86_SSE2, freg, 0, srcdst, 0);
4019 return emit_groupf(compiler, MOVQ_x_xm | EX86_PREF_F3 | EX86_SSE2, freg, srcdst, 0);
4020 } else if (use_vex) {
4021 FAIL_IF(emit_vex_instruction(compiler, XORPD_x_xm | EX86_SSE2 | VEX_SSE2_OPV, TMP_FREG, TMP_FREG, TMP_FREG, 0));
4022 return emit_vex_instruction(compiler, MOVSD_x_xm | EX86_PREF_F3 | EX86_SSE2 | VEX_SSE2_OPV, freg, TMP_FREG, srcdst, 0);
4023 }
4024 }
4025
4026 if (reg_size == 5 && lane_index >= (1 << (4 - elem_size))) {
4027 freg = TMP_FREG;
4028 lane_index -= (1 << (4 - elem_size));
4029 } else if ((type & SLJIT_SIMD_FLOAT) && freg == srcdst) {
4030 if (use_vex)
4031 FAIL_IF(emit_vex_instruction(compiler, MOVSD_x_xm | (elem_size == 2 ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2 | VEX_SSE2_OPV, TMP_FREG, TMP_FREG, srcdst, srcdstw));
4032 else
4033 FAIL_IF(emit_sse2_load(compiler, elem_size == 2, TMP_FREG, srcdst, srcdstw));
4034 srcdst = TMP_FREG;
4035 srcdstw = 0;
4036 }
4037
4038 op = ((!(type & SLJIT_SIMD_FLOAT) || elem_size != 2) ? EX86_PREF_66 : 0)
4040
4041 if (use_vex)
4042 FAIL_IF(emit_vex_instruction(compiler, op | (reg_size == 5 ? VEX_256 : 0) | VEX_SSE2_OPV, freg, freg, freg, 0));
4043 else
4044 FAIL_IF(emit_groupf(compiler, op, freg, freg, 0));
4045 } else if (reg_size == 5 && lane_index >= (1 << (4 - elem_size))) {
4046 FAIL_IF(emit_vex_instruction(compiler, ((type & SLJIT_SIMD_FLOAT) ? VEXTRACTF128_x_ym : VEXTRACTI128_x_ym) | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | EX86_SSE2, freg, 0, TMP_FREG, 0));
4047 FAIL_IF(emit_byte(compiler, 1));
4048
4049 freg = TMP_FREG;
4050 lane_index -= (1 << (4 - elem_size));
4051 }
4052
4053 if (type & SLJIT_SIMD_FLOAT) {
4054 if (elem_size == 3) {
4055 if (srcdst & SLJIT_MEM) {
4056 if (type & SLJIT_SIMD_STORE)
4057 op = lane_index == 0 ? MOVLPD_m_x : MOVHPD_m_x;
4058 else
4059 op = lane_index == 0 ? MOVLPD_x_m : MOVHPD_x_m;
4060
4061 /* VEX prefix clears upper bits of the target register. */
4062 if (use_vex && ((type & SLJIT_SIMD_STORE) || reg_size == 4 || freg == TMP_FREG))
4063 FAIL_IF(emit_vex_instruction(compiler, op | EX86_PREF_66 | EX86_SSE2
4064 | ((type & SLJIT_SIMD_STORE) ? 0 : VEX_SSE2_OPV), freg, (type & SLJIT_SIMD_STORE) ? 0 : freg, srcdst, srcdstw));
4065 else
4066 FAIL_IF(emit_groupf(compiler, op | EX86_PREF_66 | EX86_SSE2, freg, srcdst, srcdstw));
4067
4068 /* In case of store, freg is not TMP_FREG. */
4069 } else if (type & SLJIT_SIMD_STORE) {
4070 if (lane_index == 1) {
4071 if (use_vex)
4072 return emit_vex_instruction(compiler, MOVHLPS_x_x | EX86_SSE2 | VEX_SSE2_OPV, srcdst, srcdst, freg, 0);
4073 return emit_groupf(compiler, MOVHLPS_x_x | EX86_SSE2, srcdst, freg, 0);
4074 }
4075 if (use_vex)
4076 return emit_vex_instruction(compiler, MOVSD_x_xm | EX86_PREF_F2 | EX86_SSE2 | VEX_SSE2_OPV, srcdst, srcdst, freg, 0);
4077 return emit_sse2_load(compiler, 0, srcdst, freg, 0);
4078 } else if (use_vex && (reg_size == 4 || freg == TMP_FREG)) {
4079 if (lane_index == 1)
4080 FAIL_IF(emit_vex_instruction(compiler, MOVLHPS_x_x | EX86_SSE2 | VEX_SSE2_OPV, freg, freg, srcdst, 0));
4081 else
4082 FAIL_IF(emit_vex_instruction(compiler, MOVSD_x_xm | EX86_PREF_F2 | EX86_SSE2 | VEX_SSE2_OPV, freg, freg, srcdst, 0));
4083 } else {
4084 if (lane_index == 1)
4085 FAIL_IF(emit_groupf(compiler, MOVLHPS_x_x | EX86_SSE2, freg, srcdst, 0));
4086 else
4087 FAIL_IF(emit_sse2_load(compiler, 0, freg, srcdst, 0));
4088 }
4089 } else if (type & SLJIT_SIMD_STORE) {
4090 if (lane_index == 0) {
4091 if (use_vex)
4092 return emit_vex_instruction(compiler, ((srcdst & SLJIT_MEM) ? MOVSD_xm_x : MOVSD_x_xm) | EX86_PREF_F3 | EX86_SSE2
4093 | ((srcdst & SLJIT_MEM) ? 0 : VEX_SSE2_OPV), freg, ((srcdst & SLJIT_MEM) ? 0 : freg), srcdst, srcdstw);
4094 return emit_sse2_store(compiler, 1, srcdst, srcdstw, freg);
4095 }
4096
4097 if (srcdst & SLJIT_MEM) {
4098 if (use_vex)
4099 FAIL_IF(emit_vex_instruction(compiler, EXTRACTPS_x_xm | EX86_PREF_66 | VEX_OP_0F3A | EX86_SSE2, freg, 0, srcdst, srcdstw));
4100 else
4101 FAIL_IF(emit_groupf_ext(compiler, EXTRACTPS_x_xm | EX86_PREF_66 | VEX_OP_0F3A | EX86_SSE2, freg, srcdst, srcdstw));
4102 return emit_byte(compiler, U8(lane_index));
4103 }
4104
4105 if (use_vex) {
4106 FAIL_IF(emit_vex_instruction(compiler, SHUFPS_x_xm | EX86_SSE2 | VEX_SSE2_OPV, srcdst, freg, freg, 0));
4107 return emit_byte(compiler, U8(lane_index));
4108 }
4109
4110 if (srcdst == freg)
4111 op = SHUFPS_x_xm | EX86_SSE2;
4112 else {
4113 switch (lane_index) {
4114 case 1:
4116 break;
4117 case 2:
4118 op = MOVHLPS_x_x | EX86_SSE2;
4119 break;
4120 default:
4121 SLJIT_ASSERT(lane_index == 3);
4123 break;
4124 }
4125 }
4126
4127 FAIL_IF(emit_groupf(compiler, op, srcdst, freg, 0));
4128
4129 op &= 0xff;
4130 if (op == SHUFPS_x_xm || op == PSHUFD_x_xm)
4131 return emit_byte(compiler, U8(lane_index));
4132
4133 return SLJIT_SUCCESS;
4134 } else {
4135 if (lane_index != 0 || (srcdst & SLJIT_MEM)) {
4136 FAIL_IF(emit_groupf_ext(compiler, INSERTPS_x_xm | EX86_PREF_66 | VEX_OP_0F3A | EX86_SSE2, freg, srcdst, srcdstw));
4137 FAIL_IF(emit_byte(compiler, U8(lane_index << 4)));
4138 } else
4139 FAIL_IF(emit_sse2_store(compiler, 1, freg, 0, srcdst));
4140 }
4141
4142 if (freg != TMP_FREG || (type & SLJIT_SIMD_STORE))
4143 return SLJIT_SUCCESS;
4144
4145 SLJIT_ASSERT(reg_size == 5);
4146
4147 if (type & SLJIT_SIMD_LANE_ZERO) {
4148 FAIL_IF(emit_vex_instruction(compiler, VPERMPD_y_ym | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | VEX_W | EX86_SSE2, freg_orig, 0, TMP_FREG, 0));
4149 return emit_byte(compiler, 0x4e);
4150 }
4151
4152 FAIL_IF(emit_vex_instruction(compiler, VINSERTF128_y_y_xm | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | EX86_SSE2 | VEX_SSE2_OPV, freg_orig, freg_orig, TMP_FREG, 0));
4153 return emit_byte(compiler, 1);
4154 }
4155
4156 if (srcdst == SLJIT_IMM) {
4157 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, srcdstw);
4158 srcdst = TMP_REG1;
4159 srcdstw = 0;
4160 }
4161
4162 op = 3;
4163
4164 switch (elem_size) {
4165 case 0:
4167 break;
4168 case 1:
4169 if (!(type & SLJIT_SIMD_STORE)) {
4170 op = 2;
4171 opcode = PINSRW_x_rm_i8;
4172 } else
4173 opcode = PEXTRW_rm_x_i8;
4174 break;
4175 case 2:
4177 break;
4178#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
4179 case 3:
4180 /* PINSRQ / PEXTRQ */
4182 compiler->mode32 = 0;
4183 break;
4184#endif /* SLJIT_CONFIG_X86_64 */
4185 }
4186
4187 if (use_vex && (type & SLJIT_SIMD_STORE)) {
4188 op = opcode | ((op == 3) ? VEX_OP_0F3A : 0);
4189 FAIL_IF(emit_vex_instruction(compiler, op | EX86_PREF_66 | VEX_AUTO_W | EX86_SSE2_OP1 | VEX_SSE2_OPV, freg, 0, srcdst, srcdstw));
4190 } else {
4191 inst = emit_x86_instruction(compiler, op | EX86_PREF_66 | EX86_SSE2_OP1, freg, 0, srcdst, srcdstw);
4192 FAIL_IF(!inst);
4193 inst[0] = GROUP_0F;
4194
4195 if (op == 3) {
4196 inst[1] = 0x3a;
4197 inst[2] = opcode;
4198 } else
4199 inst[1] = opcode;
4200 }
4201
4202 FAIL_IF(emit_byte(compiler, U8(lane_index)));
4203
4204 if (!(type & SLJIT_SIMD_LANE_SIGNED) || (srcdst & SLJIT_MEM)) {
4205 if (freg == TMP_FREG && !(type & SLJIT_SIMD_STORE)) {
4206 SLJIT_ASSERT(reg_size == 5);
4207
4208 if (type & SLJIT_SIMD_LANE_ZERO) {
4209 FAIL_IF(emit_vex_instruction(compiler, VPERMQ_y_ym | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | VEX_W | EX86_SSE2, freg_orig, 0, TMP_FREG, 0));
4210 return emit_byte(compiler, 0x4e);
4211 }
4212
4213 FAIL_IF(emit_vex_instruction(compiler, VINSERTI128_y_y_xm | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | EX86_SSE2 | VEX_SSE2_OPV, freg_orig, freg_orig, TMP_FREG, 0));
4214 return emit_byte(compiler, 1);
4215 }
4216
4217#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
4218 if (srcdst_orig & SLJIT_MEM)
4219 return emit_mov(compiler, srcdst_orig, srcdstw_orig, TMP_REG1, 0);
4220#endif /* SLJIT_CONFIG_X86_32 */
4221 return SLJIT_SUCCESS;
4222 }
4223
4224#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
4225 if (elem_size >= 3)
4226 return SLJIT_SUCCESS;
4227
4228 compiler->mode32 = (type & SLJIT_32);
4229
4230 op = 2;
4231
4232 if (elem_size == 0)
4233 op |= EX86_REX;
4234
4235 if (elem_size == 2) {
4236 if (type & SLJIT_32)
4237 return SLJIT_SUCCESS;
4238
4239 SLJIT_ASSERT(!(compiler->mode32));
4240 op = 1;
4241 }
4242
4243 inst = emit_x86_instruction(compiler, op, srcdst, 0, srcdst, 0);
4244 FAIL_IF(!inst);
4245
4246 if (op != 1) {
4247 inst[0] = GROUP_0F;
4248 inst[1] = U8((elem_size == 0) ? MOVSX_r_rm8 : MOVSX_r_rm16);
4249 } else
4250 inst[0] = MOVSXD_r_rm;
4251#else /* !SLJIT_CONFIG_X86_64 */
4252 if (elem_size >= 2)
4253 return SLJIT_SUCCESS;
4254
4255 FAIL_IF(emit_groupf(compiler, (elem_size == 0) ? MOVSX_r_rm8 : MOVSX_r_rm16,
4256 (srcdst_orig != 0 && FAST_IS_REG(srcdst_orig)) ? srcdst_orig : srcdst, srcdst, 0));
4257
4258 if (srcdst_orig & SLJIT_MEM)
4259 return emit_mov(compiler, srcdst_orig, srcdstw_orig, TMP_REG1, 0);
4260#endif /* SLJIT_CONFIG_X86_64 */
4261 return SLJIT_SUCCESS;
4262}
4263
4265 sljit_s32 freg,
4266 sljit_s32 src, sljit_s32 src_lane_index)
4267{
4268 sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
4269 sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
4270 sljit_s32 use_vex = (cpu_feature_list & CPU_FEATURE_AVX) && (compiler->options & SLJIT_ENTER_USE_VEX);
4271 sljit_uw pref;
4272 sljit_u8 byte;
4273#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
4274 sljit_s32 opcode3 = TMP_REG1;
4275#else /* !SLJIT_CONFIG_X86_32 */
4276 sljit_s32 opcode3 = SLJIT_S0;
4277#endif /* SLJIT_CONFIG_X86_32 */
4278
4279 CHECK_ERROR();
4280 CHECK(check_sljit_emit_simd_lane_replicate(compiler, type, freg, src, src_lane_index));
4281
4282#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
4283 compiler->mode32 = 1;
4284#endif /* SLJIT_CONFIG_X86_64 */
4285 SLJIT_ASSERT(reg_map[opcode3] == 3);
4286
4287 if (reg_size == 5) {
4288 if (!(cpu_feature_list & CPU_FEATURE_AVX2))
4289 return SLJIT_ERR_UNSUPPORTED;
4290 use_vex = 1;
4291 } else if (reg_size != 4)
4292 return SLJIT_ERR_UNSUPPORTED;
4293
4294 if (type & SLJIT_SIMD_FLOAT) {
4295 pref = 0;
4296 byte = U8(src_lane_index);
4297
4298 if (elem_size == 3) {
4299 if (type & SLJIT_SIMD_TEST)
4300 return SLJIT_SUCCESS;
4301
4302 if (reg_size == 5) {
4303 if (src_lane_index == 0)
4304 return emit_vex_instruction(compiler, VBROADCASTSD_x_xm | VEX_256 | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, freg, 0, src, 0);
4305
4306 FAIL_IF(emit_vex_instruction(compiler, VPERMPD_y_ym | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | VEX_W | EX86_SSE2, freg, 0, src, 0));
4307
4308 byte = U8(byte | (byte << 2));
4309 return emit_byte(compiler, U8(byte | (byte << 4)));
4310 }
4311
4312 if (src_lane_index == 0) {
4313 if (use_vex)
4314 return emit_vex_instruction(compiler, MOVDDUP_x_xm | EX86_PREF_F2 | EX86_SSE2, freg, 0, src, 0);
4315 return emit_groupf(compiler, MOVDDUP_x_xm | EX86_PREF_F2 | EX86_SSE2, freg, src, 0);
4316 }
4317
4318 /* Changes it to SHUFPD_x_xm. */
4319 pref = EX86_PREF_66;
4320 } else if (elem_size != 2)
4321 return SLJIT_ERR_UNSUPPORTED;
4322 else if (type & SLJIT_SIMD_TEST)
4323 return SLJIT_SUCCESS;
4324
4325 if (reg_size == 5) {
4326 SLJIT_ASSERT(elem_size == 2);
4327
4328 if (src_lane_index == 0)
4329 return emit_vex_instruction(compiler, VBROADCASTSS_x_xm | VEX_256 | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, freg, 0, src, 0);
4330
4331 FAIL_IF(emit_vex_instruction(compiler, VPERMPD_y_ym | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | VEX_W | EX86_SSE2, freg, 0, src, 0));
4332
4333 byte = 0x44;
4334 if (src_lane_index >= 4) {
4335 byte = 0xee;
4336 src_lane_index -= 4;
4337 }
4338
4339 FAIL_IF(emit_byte(compiler, byte));
4340 FAIL_IF(emit_vex_instruction(compiler, SHUFPS_x_xm | VEX_256 | pref | EX86_SSE2 | VEX_SSE2_OPV, freg, freg, freg, 0));
4341 byte = U8(src_lane_index);
4342 } else if (use_vex) {
4343 FAIL_IF(emit_vex_instruction(compiler, SHUFPS_x_xm | pref | EX86_SSE2 | VEX_SSE2_OPV, freg, src, src, 0));
4344 } else {
4345 if (freg != src)
4346 FAIL_IF(emit_groupf(compiler, MOVAPS_x_xm | pref | EX86_SSE2, freg, src, 0));
4347
4348 FAIL_IF(emit_groupf(compiler, SHUFPS_x_xm | pref | EX86_SSE2, freg, freg, 0));
4349 }
4350
4351 if (elem_size == 2) {
4352 byte = U8(byte | (byte << 2));
4353 byte = U8(byte | (byte << 4));
4354 } else
4355 byte = U8(byte | (byte << 1));
4356
4357 return emit_byte(compiler, U8(byte));
4358 }
4359
4360 if (type & SLJIT_SIMD_TEST)
4361 return SLJIT_SUCCESS;
4362
4363 if (elem_size == 0) {
4364 if (reg_size == 5 && src_lane_index >= 16) {
4365 FAIL_IF(emit_vex_instruction(compiler, VPERMQ_y_ym | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | VEX_W | EX86_SSE2, freg, 0, src, 0));
4366 FAIL_IF(emit_byte(compiler, src_lane_index >= 24 ? 0xff : 0xaa));
4367 src_lane_index &= 0x7;
4368 src = freg;
4369 }
4370
4371 if (src_lane_index != 0 || (freg != src && (!(cpu_feature_list & CPU_FEATURE_AVX2) || !use_vex))) {
4372 pref = 0;
4373
4374 if ((src_lane_index & 0x3) == 0) {
4375 pref = EX86_PREF_66;
4376 byte = U8(src_lane_index >> 2);
4377 } else if (src_lane_index < 8 && (src_lane_index & 0x1) == 0) {
4378 pref = EX86_PREF_F2;
4379 byte = U8(src_lane_index >> 1);
4380 } else {
4381 if (!use_vex) {
4382 if (freg != src)
4383 FAIL_IF(emit_groupf(compiler, MOVDQA_x_xm | EX86_PREF_66 | EX86_SSE2, freg, src, 0));
4384
4385 FAIL_IF(emit_groupf(compiler, PSRLDQ_x | EX86_PREF_66 | EX86_SSE2_OP2, opcode3, freg, 0));
4386 } else
4387 FAIL_IF(emit_vex_instruction(compiler, PSRLDQ_x | EX86_PREF_66 | EX86_SSE2_OP2 | VEX_SSE2_OPV, opcode3, freg, src, 0));
4388
4389 FAIL_IF(emit_byte(compiler, U8(src_lane_index)));
4390 }
4391
4392 if (pref != 0) {
4393 if (use_vex)
4394 FAIL_IF(emit_vex_instruction(compiler, PSHUFLW_x_xm | pref | EX86_SSE2, freg, 0, src, 0));
4395 else
4396 FAIL_IF(emit_groupf(compiler, PSHUFLW_x_xm | pref | EX86_SSE2, freg, src, 0));
4397 FAIL_IF(emit_byte(compiler, byte));
4398 }
4399
4400 src = freg;
4401 }
4402
4403 if (use_vex && (cpu_feature_list & CPU_FEATURE_AVX2))
4404 return emit_vex_instruction(compiler, VPBROADCASTB_x_xm | (reg_size == 5 ? VEX_256 : 0) | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, freg, 0, src, 0);
4405
4406 SLJIT_ASSERT(reg_size == 4);
4407 FAIL_IF(emit_groupf(compiler, PXOR_x_xm | EX86_PREF_66 | EX86_SSE2, TMP_FREG, TMP_FREG, 0));
4408 return emit_groupf_ext(compiler, PSHUFB_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, freg, TMP_FREG, 0);
4409 }
4410
4411 if ((cpu_feature_list & CPU_FEATURE_AVX2) && use_vex && src_lane_index == 0 && elem_size <= 3) {
4412 switch (elem_size) {
4413 case 1:
4415 break;
4416 case 2:
4418 break;
4419 default:
4421 break;
4422 }
4423
4424 if (reg_size == 5)
4425 pref |= VEX_256;
4426
4427 return emit_vex_instruction(compiler, pref, freg, 0, src, 0);
4428 }
4429
4430 if (reg_size == 5) {
4431 switch (elem_size) {
4432 case 1:
4433 byte = U8(src_lane_index & 0x3);
4434 src_lane_index >>= 2;
4435 pref = PSHUFLW_x_xm | VEX_256 | ((src_lane_index & 1) == 0 ? EX86_PREF_F2 : EX86_PREF_F3) | EX86_SSE2;
4436 break;
4437 case 2:
4438 byte = U8(src_lane_index & 0x3);
4439 src_lane_index >>= 1;
4441 break;
4442 case 3:
4443 pref = 0;
4444 break;
4445 default:
4446 FAIL_IF(emit_vex_instruction(compiler, VPERMQ_y_ym | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | VEX_W | EX86_SSE2, freg, 0, src, 0));
4447 return emit_byte(compiler, U8(src_lane_index == 0 ? 0x44 : 0xee));
4448 }
4449
4450 if (pref != 0) {
4451 FAIL_IF(emit_vex_instruction(compiler, pref, freg, 0, src, 0));
4452 byte = U8(byte | (byte << 2));
4453 FAIL_IF(emit_byte(compiler, U8(byte | (byte << 4))));
4454
4455 if (src_lane_index == 0)
4456 return emit_vex_instruction(compiler, VPBROADCASTQ_x_xm | VEX_256 | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, freg, 0, freg, 0);
4457
4458 src = freg;
4459 }
4460
4461 FAIL_IF(emit_vex_instruction(compiler, VPERMQ_y_ym | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | VEX_W | EX86_SSE2, freg, 0, src, 0));
4462 byte = U8(src_lane_index);
4463 byte = U8(byte | (byte << 2));
4464 return emit_byte(compiler, U8(byte | (byte << 4)));
4465 }
4466
4467 switch (elem_size) {
4468 case 1:
4469 byte = U8(src_lane_index & 0x3);
4470 src_lane_index >>= 1;
4471 pref = (src_lane_index & 2) == 0 ? EX86_PREF_F2 : EX86_PREF_F3;
4472
4473 if (use_vex)
4474 FAIL_IF(emit_vex_instruction(compiler, PSHUFLW_x_xm | pref | EX86_SSE2, freg, 0, src, 0));
4475 else
4476 FAIL_IF(emit_groupf(compiler, PSHUFLW_x_xm | pref | EX86_SSE2, freg, src, 0));
4477 byte = U8(byte | (byte << 2));
4478 FAIL_IF(emit_byte(compiler, U8(byte | (byte << 4))));
4479
4480 if ((cpu_feature_list & CPU_FEATURE_AVX2) && use_vex && pref == EX86_PREF_F2)
4481 return emit_vex_instruction(compiler, VPBROADCASTD_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, freg, 0, freg, 0);
4482
4483 src = freg;
4484 /* fallthrough */
4485 case 2:
4486 byte = U8(src_lane_index);
4487 byte = U8(byte | (byte << 2));
4488 break;
4489 default:
4490 byte = U8(src_lane_index << 1);
4491 byte = U8(byte | (byte << 2) | 0x4);
4492 break;
4493 }
4494
4495 if (use_vex)
4496 FAIL_IF(emit_vex_instruction(compiler, PSHUFD_x_xm | EX86_PREF_66 | EX86_SSE2, freg, 0, src, 0));
4497 else
4498 FAIL_IF(emit_groupf(compiler, PSHUFD_x_xm | EX86_PREF_66 | EX86_SSE2, freg, src, 0));
4499 return emit_byte(compiler, U8(byte | (byte << 4)));
4500}
4501
4503 sljit_s32 freg,
4504 sljit_s32 src, sljit_sw srcw)
4505{
4506 sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
4507 sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
4508 sljit_s32 elem2_size = SLJIT_SIMD_GET_ELEM2_SIZE(type);
4509 sljit_s32 use_vex = (cpu_feature_list & CPU_FEATURE_AVX) && (compiler->options & SLJIT_ENTER_USE_VEX);
4510 sljit_u8 opcode;
4511
4512 CHECK_ERROR();
4513 CHECK(check_sljit_emit_simd_extend(compiler, type, freg, src, srcw));
4514
4515 ADJUST_LOCAL_OFFSET(src, srcw);
4516
4517#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
4518 compiler->mode32 = 1;
4519#endif /* SLJIT_CONFIG_X86_64 */
4520
4521 if (reg_size == 5) {
4522 if (!(cpu_feature_list & CPU_FEATURE_AVX2))
4523 return SLJIT_ERR_UNSUPPORTED;
4524 use_vex = 1;
4525 } else if (reg_size != 4)
4526 return SLJIT_ERR_UNSUPPORTED;
4527
4528 if (type & SLJIT_SIMD_FLOAT) {
4529 if (elem_size != 2 || elem2_size != 3)
4530 return SLJIT_ERR_UNSUPPORTED;
4531
4532 if (type & SLJIT_SIMD_TEST)
4533 return SLJIT_SUCCESS;
4534
4535 if (use_vex)
4536 return emit_vex_instruction(compiler, CVTPS2PD_x_xm | ((reg_size == 5) ? VEX_256 : 0) | EX86_SSE2, freg, 0, src, srcw);
4537 return emit_groupf(compiler, CVTPS2PD_x_xm | EX86_SSE2, freg, src, srcw);
4538 }
4539
4540 switch (elem_size) {
4541 case 0:
4542 if (elem2_size == 1)
4544 else if (elem2_size == 2)
4546 else if (elem2_size == 3)
4548 else
4549 return SLJIT_ERR_UNSUPPORTED;
4550 break;
4551 case 1:
4552 if (elem2_size == 2)
4554 else if (elem2_size == 3)
4556 else
4557 return SLJIT_ERR_UNSUPPORTED;
4558 break;
4559 case 2:
4560 if (elem2_size == 3)
4562 else
4563 return SLJIT_ERR_UNSUPPORTED;
4564 break;
4565 default:
4566 return SLJIT_ERR_UNSUPPORTED;
4567 }
4568
4569 if (type & SLJIT_SIMD_TEST)
4570 return SLJIT_SUCCESS;
4571
4572 if (use_vex)
4573 return emit_vex_instruction(compiler, opcode | ((reg_size == 5) ? VEX_256 : 0) | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, freg, 0, src, srcw);
4574 return emit_groupf_ext(compiler, opcode | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, freg, src, srcw);
4575}
4576
4578 sljit_s32 freg,
4579 sljit_s32 dst, sljit_sw dstw)
4580{
4581 sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
4582 sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
4583 sljit_s32 use_vex = (cpu_feature_list & CPU_FEATURE_AVX) && (compiler->options & SLJIT_ENTER_USE_VEX);
4584 sljit_s32 dst_r;
4585 sljit_uw op;
4586 sljit_u8 *inst;
4587
4588 CHECK_ERROR();
4589 CHECK(check_sljit_emit_simd_sign(compiler, type, freg, dst, dstw));
4590
4591 ADJUST_LOCAL_OFFSET(dst, dstw);
4592
4593 CHECK_EXTRA_REGS(dst, dstw, (void)0);
4594#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
4595 compiler->mode32 = 1;
4596#endif /* SLJIT_CONFIG_X86_64 */
4597
4598 if (elem_size > 3 || ((type & SLJIT_SIMD_FLOAT) && elem_size < 2))
4599 return SLJIT_ERR_UNSUPPORTED;
4600
4601 if (reg_size == 4) {
4602 if (type & SLJIT_SIMD_TEST)
4603 return SLJIT_SUCCESS;
4604
4606
4607 switch (elem_size) {
4608 case 1:
4609 if (use_vex)
4610 FAIL_IF(emit_vex_instruction(compiler, PACKSSWB_x_xm | EX86_PREF_66 | EX86_SSE2 | VEX_SSE2_OPV, TMP_FREG, freg, freg, 0));
4611 else
4612 FAIL_IF(emit_groupf(compiler, PACKSSWB_x_xm | EX86_PREF_66 | EX86_SSE2, TMP_FREG, freg, 0));
4613 freg = TMP_FREG;
4614 break;
4615 case 2:
4616 op = EX86_SSE2_OP2;
4617 break;
4618 }
4619
4620 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
4621 op |= (elem_size < 2) ? PMOVMSKB_r_x : MOVMSKPS_r_x;
4622
4623 if (use_vex)
4624 FAIL_IF(emit_vex_instruction(compiler, op, dst_r, 0, freg, 0));
4625 else
4626 FAIL_IF(emit_groupf(compiler, op, dst_r, freg, 0));
4627
4628#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
4629 compiler->mode32 = type & SLJIT_32;
4630#endif /* SLJIT_CONFIG_X86_64 */
4631
4632 if (elem_size == 1) {
4633 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 8, dst_r, 0);
4634 FAIL_IF(!inst);
4635 inst[1] |= SHR;
4636 }
4637
4638 if (dst_r == TMP_REG1)
4639 return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
4640
4641 return SLJIT_SUCCESS;
4642 }
4643
4644 if (reg_size != 5 || !(cpu_feature_list & CPU_FEATURE_AVX2))
4645 return SLJIT_ERR_UNSUPPORTED;
4646
4647 if (type & SLJIT_SIMD_TEST)
4648 return SLJIT_SUCCESS;
4649
4650 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
4651
4652 if (elem_size == 1) {
4653 FAIL_IF(emit_vex_instruction(compiler, VEXTRACTI128_x_ym | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | EX86_SSE2, freg, 0, TMP_FREG, 0));
4654 FAIL_IF(emit_byte(compiler, 1));
4655 FAIL_IF(emit_vex_instruction(compiler, PACKSSWB_x_xm | VEX_256 | EX86_PREF_66 | EX86_SSE2 | VEX_SSE2_OPV, TMP_FREG, freg, TMP_FREG, 0));
4656 FAIL_IF(emit_groupf(compiler, PMOVMSKB_r_x | EX86_PREF_66 | EX86_SSE2_OP2, dst_r, TMP_FREG, 0));
4657 } else {
4659
4660 if (elem_size == 0)
4662 else if (elem_size == 3)
4663 op |= EX86_PREF_66;
4664
4665 FAIL_IF(emit_vex_instruction(compiler, op, dst_r, 0, freg, 0));
4666 }
4667
4668 if (dst_r == TMP_REG1) {
4669#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
4670 compiler->mode32 = type & SLJIT_32;
4671#endif /* SLJIT_CONFIG_X86_64 */
4672 return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
4673 }
4674
4675 return SLJIT_SUCCESS;
4676}
4677
4678static sljit_s32 emit_simd_mov(struct sljit_compiler *compiler, sljit_s32 type,
4679 sljit_s32 dst_freg, sljit_s32 src_freg)
4680{
4682
4683 SLJIT_ASSERT(SLJIT_SIMD_GET_REG_SIZE(type) == 4);
4684
4685 if (!(type & SLJIT_SIMD_FLOAT) || SLJIT_SIMD_GET_ELEM_SIZE(type) == 3)
4686 op |= EX86_PREF_66;
4687
4688 return emit_groupf(compiler, op, dst_freg, src_freg, 0);
4689}
4690
4692 sljit_s32 dst_freg, sljit_s32 src1_freg, sljit_s32 src2_freg)
4693{
4694 sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
4695 sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
4696 sljit_uw op = 0;
4697
4698 CHECK_ERROR();
4699 CHECK(check_sljit_emit_simd_op2(compiler, type, dst_freg, src1_freg, src2_freg));
4700
4701#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
4702 compiler->mode32 = 1;
4703#endif /* SLJIT_CONFIG_X86_64 */
4704
4705 if (reg_size == 5) {
4706 if (!(cpu_feature_list & CPU_FEATURE_AVX2))
4707 return SLJIT_ERR_UNSUPPORTED;
4708 } else if (reg_size != 4)
4709 return SLJIT_ERR_UNSUPPORTED;
4710
4711 if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3))
4712 return SLJIT_ERR_UNSUPPORTED;
4713
4714 switch (SLJIT_SIMD_GET_OPCODE(type)) {
4715 case SLJIT_SIMD_OP2_AND:
4717
4718 if (!(type & SLJIT_SIMD_FLOAT) || elem_size == 3)
4719 op |= EX86_PREF_66;
4720 break;
4721 case SLJIT_SIMD_OP2_OR:
4723
4724 if (!(type & SLJIT_SIMD_FLOAT) || elem_size == 3)
4725 op |= EX86_PREF_66;
4726 break;
4727 case SLJIT_SIMD_OP2_XOR:
4729
4730 if (!(type & SLJIT_SIMD_FLOAT) || elem_size == 3)
4731 op |= EX86_PREF_66;
4732 break;
4733 }
4734
4735 if (type & SLJIT_SIMD_TEST)
4736 return SLJIT_SUCCESS;
4737
4738 if (reg_size == 5 || ((cpu_feature_list & CPU_FEATURE_AVX) && (compiler->options & SLJIT_ENTER_USE_VEX))) {
4739 if (reg_size == 5)
4740 op |= VEX_256;
4741
4742 return emit_vex_instruction(compiler, op | EX86_SSE2 | VEX_SSE2_OPV, dst_freg, src1_freg, src2_freg, 0);
4743 }
4744
4745 if (dst_freg != src1_freg) {
4746 if (dst_freg == src2_freg)
4747 src2_freg = src1_freg;
4748 else
4749 FAIL_IF(emit_simd_mov(compiler, type, dst_freg, src1_freg));
4750 }
4751
4752 FAIL_IF(emit_groupf(compiler, op | EX86_SSE2, dst_freg, src2_freg, 0));
4753 return SLJIT_SUCCESS;
4754}
4755
4757 sljit_s32 dst_reg,
4758 sljit_s32 mem_reg)
4759{
4760 CHECK_ERROR();
4761 CHECK(check_sljit_emit_atomic_load(compiler, op, dst_reg, mem_reg));
4762
4763 SLJIT_SKIP_CHECKS(compiler);
4764 return sljit_emit_op1(compiler, op, dst_reg, 0, SLJIT_MEM1(mem_reg), 0);
4765}
4766
4768 sljit_s32 src_reg,
4769 sljit_s32 mem_reg,
4770 sljit_s32 temp_reg)
4771{
4772 sljit_uw pref;
4773 sljit_s32 free_reg = TMP_REG1;
4774#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
4775 sljit_sw srcw = 0;
4776 sljit_sw tempw = 0;
4777#endif /* SLJIT_CONFIG_X86_32 */
4778
4779 CHECK_ERROR();
4780 CHECK(check_sljit_emit_atomic_store(compiler, op, src_reg, mem_reg, temp_reg));
4781 CHECK_EXTRA_REGS(src_reg, srcw, (void)0);
4782 CHECK_EXTRA_REGS(temp_reg, tempw, (void)0);
4783
4784 SLJIT_ASSERT(FAST_IS_REG(src_reg) || src_reg == SLJIT_MEM1(SLJIT_SP));
4785 SLJIT_ASSERT(FAST_IS_REG(temp_reg) || temp_reg == SLJIT_MEM1(SLJIT_SP));
4786
4787 op = GET_OPCODE(op);
4788#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
4789 if ((src_reg & SLJIT_MEM) || (op == SLJIT_MOV_U8 && reg_map[src_reg] >= 4)) {
4790 /* Src is virtual register or its low byte is not accessible. */
4791 SLJIT_ASSERT(src_reg != SLJIT_R1);
4792 free_reg = src_reg;
4793
4794 EMIT_MOV(compiler, TMP_REG1, 0, src_reg, srcw);
4795 src_reg = TMP_REG1;
4796
4797 if (mem_reg == src_reg)
4798 mem_reg = TMP_REG1;
4799 }
4800#endif /* SLJIT_CONFIG_X86_32 */
4801
4802 if (temp_reg != SLJIT_R0) {
4803#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
4804 compiler->mode32 = 0;
4805
4806 EMIT_MOV(compiler, free_reg, 0, SLJIT_R0, 0);
4807 EMIT_MOV(compiler, SLJIT_R0, 0, temp_reg, 0);
4808
4809 if (src_reg == SLJIT_R0)
4810 src_reg = free_reg;
4811 if (mem_reg == SLJIT_R0)
4812 mem_reg = free_reg;
4813#else /* !SLJIT_CONFIG_X86_64 */
4814 if (src_reg == TMP_REG1 && mem_reg == SLJIT_R0 && (free_reg & SLJIT_MEM)) {
4815 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), 0, SLJIT_R1, 0);
4816 EMIT_MOV(compiler, SLJIT_R1, 0, SLJIT_R0, 0);
4817 EMIT_MOV(compiler, SLJIT_R0, 0, temp_reg, tempw);
4818
4819 mem_reg = SLJIT_R1;
4820 free_reg = SLJIT_R1;
4821 } else {
4822 EMIT_MOV(compiler, free_reg, 0, SLJIT_R0, 0);
4823 EMIT_MOV(compiler, SLJIT_R0, 0, temp_reg, tempw);
4824
4825 if (src_reg == SLJIT_R0)
4826 src_reg = free_reg;
4827 if (mem_reg == SLJIT_R0)
4828 mem_reg = free_reg;
4829 }
4830#endif /* SLJIT_CONFIG_X86_64 */
4831 }
4832
4833#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
4834 compiler->mode32 = op != SLJIT_MOV && op != SLJIT_MOV_P;
4835#endif /* SLJIT_CONFIG_X86_64 */
4836
4837 /* Lock prefix. */
4838 FAIL_IF(emit_byte(compiler, GROUP_LOCK));
4839
4840 pref = 0;
4841 if (op == SLJIT_MOV_U16)
4842 pref = EX86_HALF_ARG | EX86_PREF_66;
4843#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
4844 if (op == SLJIT_MOV_U8)
4845 pref = EX86_REX;
4846#endif /* SLJIT_CONFIG_X86_64 */
4847
4848 FAIL_IF(emit_groupf(compiler, (op == SLJIT_MOV_U8 ? CMPXCHG_rm8_r : CMPXCHG_rm_r) | pref, src_reg, SLJIT_MEM1(mem_reg), 0));
4849
4850 if (temp_reg != SLJIT_R0) {
4851#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
4852 compiler->mode32 = 0;
4853 return emit_mov(compiler, SLJIT_R0, 0, TMP_REG1, 0);
4854#else /* !SLJIT_CONFIG_X86_64 */
4855 EMIT_MOV(compiler, SLJIT_R0, 0, free_reg, 0);
4856 if (free_reg != TMP_REG1)
4857 return emit_mov(compiler, free_reg, 0, (free_reg == SLJIT_R1) ? SLJIT_MEM1(SLJIT_SP) : TMP_REG1, 0);
4858#endif /* SLJIT_CONFIG_X86_64 */
4859 }
4860 return SLJIT_SUCCESS;
4861}
4862
4864{
4865 CHECK_ERROR();
4866 CHECK(check_sljit_get_local_base(compiler, dst, dstw, offset));
4867 ADJUST_LOCAL_OFFSET(dst, dstw);
4868
4869 CHECK_EXTRA_REGS(dst, dstw, (void)0);
4870
4871#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
4872 compiler->mode32 = 0;
4873#endif
4874
4875 ADJUST_LOCAL_OFFSET(SLJIT_MEM1(SLJIT_SP), offset);
4876
4877#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
4878 if (NOT_HALFWORD(offset)) {
4879 FAIL_IF(emit_load_imm64(compiler, TMP_REG1, offset));
4880#if (defined SLJIT_DEBUG && SLJIT_DEBUG)
4881 SLJIT_ASSERT(emit_lea_binary(compiler, dst, dstw, SLJIT_SP, 0, TMP_REG1, 0) != SLJIT_ERR_UNSUPPORTED);
4882 return compiler->error;
4883#else
4884 return emit_lea_binary(compiler, dst, dstw, SLJIT_SP, 0, TMP_REG1, 0);
4885#endif
4886 }
4887#endif
4888
4889 if (offset != 0)
4890 return emit_lea_binary(compiler, dst, dstw, SLJIT_SP, 0, SLJIT_IMM, offset);
4891 return emit_mov(compiler, dst, dstw, SLJIT_SP, 0);
4892}
4893
4895{
4896 sljit_u8 *inst;
4897 struct sljit_const *const_;
4898#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
4899 sljit_s32 reg;
4900#endif
4901
4903 CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value));
4904 ADJUST_LOCAL_OFFSET(dst, dstw);
4905
4906 CHECK_EXTRA_REGS(dst, dstw, (void)0);
4907
4908 const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const));
4909 PTR_FAIL_IF(!const_);
4910 set_const(const_, compiler);
4911
4912#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
4913 compiler->mode32 = 0;
4914 reg = FAST_IS_REG(dst) ? dst : TMP_REG1;
4915
4916 if (emit_load_imm64(compiler, reg, init_value))
4917 return NULL;
4918#else
4919 if (emit_mov(compiler, dst, dstw, SLJIT_IMM, init_value))
4920 return NULL;
4921#endif
4922
4923 inst = (sljit_u8*)ensure_buf(compiler, 1);
4924 PTR_FAIL_IF(!inst);
4925
4926 inst[0] = SLJIT_INST_CONST;
4927
4928#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
4929 if (dst & SLJIT_MEM)
4930 if (emit_mov(compiler, dst, dstw, TMP_REG1, 0))
4931 return NULL;
4932#endif
4933
4934 return const_;
4935}
4936
4938{
4939 struct sljit_jump *jump;
4940 sljit_u8 *inst;
4941#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
4942 sljit_s32 reg;
4943#endif /* SLJIT_CONFIG_X86_64 */
4944
4946 CHECK_PTR(check_sljit_emit_mov_addr(compiler, dst, dstw));
4947 ADJUST_LOCAL_OFFSET(dst, dstw);
4948
4949 CHECK_EXTRA_REGS(dst, dstw, (void)0);
4950
4951 jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
4952 PTR_FAIL_IF(!jump);
4953 set_mov_addr(jump, compiler, 0);
4954
4955#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
4956 compiler->mode32 = 0;
4957 reg = FAST_IS_REG(dst) ? dst : TMP_REG1;
4958
4959 PTR_FAIL_IF(emit_load_imm64(compiler, reg, 0));
4960 jump->addr = compiler->size;
4961
4962 if (reg_map[reg] >= 8)
4963 jump->flags |= MOV_ADDR_HI;
4964#else /* !SLJIT_CONFIG_X86_64 */
4965 PTR_FAIL_IF(emit_mov(compiler, dst, dstw, SLJIT_IMM, 0));
4966#endif /* SLJIT_CONFIG_X86_64 */
4967
4968 inst = (sljit_u8*)ensure_buf(compiler, 1);
4969 PTR_FAIL_IF(!inst);
4970
4971 inst[0] = SLJIT_INST_MOV_ADDR;
4972
4973#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
4974 if (dst & SLJIT_MEM)
4975 PTR_FAIL_IF(emit_mov(compiler, dst, dstw, TMP_REG1, 0));
4976#endif /* SLJIT_CONFIG_X86_64 */
4977
4978 return jump;
4979}
4980
4982{
4983 SLJIT_UNUSED_ARG(executable_offset);
4984
4985 SLJIT_UPDATE_WX_FLAGS((void*)addr, (void*)(addr + sizeof(sljit_uw)), 0);
4986#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
4987 sljit_unaligned_store_sw((void*)addr, (sljit_sw)(new_target - (addr + 4) - (sljit_uw)executable_offset));
4988#else
4989 sljit_unaligned_store_sw((void*)addr, (sljit_sw)new_target);
4990#endif
4991 SLJIT_UPDATE_WX_FLAGS((void*)addr, (void*)(addr + sizeof(sljit_uw)), 1);
4992}
4993
4995{
4996 SLJIT_UNUSED_ARG(executable_offset);
4997
4998 SLJIT_UPDATE_WX_FLAGS((void*)addr, (void*)(addr + sizeof(sljit_sw)), 0);
4999 sljit_unaligned_store_sw((void*)addr, new_constant);
5000 SLJIT_UPDATE_WX_FLAGS((void*)addr, (void*)(addr + sizeof(sljit_sw)), 1);
5001}
size_t len
Definition apprentice.c:174
sign
Definition bcmath.h:37
uint32_t u
Definition cdf.c:78
#define max(a, b)
Definition exif.c:60
zend_ffi_type * type
Definition ffi.c:3812
new_type size
Definition ffi.c:4365
zend_ffi_ctype_name_buf buf
Definition ffi.c:4685
zend_long offset
char * mode
#define NULL
Definition gdcache.h:45
#define ROL(n, x)
#define SHR(b, x)
Definition hash_sha.c:115
#define GET_OPCODE(i)
Definition minilua.c:929
PHP_JSON_API size_t int options
Definition php_json.h:102
unsigned short int sljit_u16
signed short int sljit_s16
#define SLJIT_UNLIKELY(x)
#define SLJIT_API_FUNC_ATTRIBUTE
unsigned int sljit_uw
unsigned char sljit_u8
#define SLJIT_COMPILE_ASSERT(x, description)
signed int sljit_s32
unsigned int sljit_u32
#define SLJIT_MEMCPY(dest, src, len)
signed char sljit_s8
#define SLJIT_ASSERT(x)
#define SLJIT_UNUSED_ARG(arg)
#define SLJIT_INLINE
int sljit_sw
#define SLJIT_UPDATE_WX_FLAGS(from, to, enable_exec)
#define PTR_FAIL_IF_NULL(ptr)
Definition sljitLir.c:75
#define CHECK_ERROR()
Definition sljitLir.c:43
#define PTR_FAIL_IF(expr)
Definition sljitLir.c:61
#define FAIL_IF_NULL(ptr)
Definition sljitLir.c:67
#define FAIL_IF(expr)
Definition sljitLir.c:55
#define PTR_FAIL_WITH_EXEC_IF(ptr)
Definition sljitLir.c:83
#define CHECK_ERROR_PTR()
Definition sljitLir.c:49
#define SLJIT_UNORDERED_OR_LESS_EQUAL
Definition sljitLir.h:1626
#define SLJIT_SUB_F64
Definition sljitLir.h:1451
#define SLJIT_NOT_CARRY
Definition sljitLir.h:1581
#define SLJIT_HAS_SIMD
Definition sljitLir.h:712
#define SLJIT_SKIP_FRAMES_BEFORE_FAST_RETURN
Definition sljitLir.h:1357
#define SLJIT_ORDERED
Definition sljitLir.h:1608
#define SLJIT_DIVMOD_SW
Definition sljitLir.h:1079
#define SLJIT_SIMD_OP2_AND
Definition sljitLir.h:2094
#define SLJIT_SIMD_TEST
Definition sljitLir.h:1900
#define SLJIT_ORDERED_LESS_EQUAL
Definition sljitLir.h:1639
#define SLJIT_CONV_F64_FROM_S32
Definition sljitLir.h:1421
#define SLJIT_FAST_CALL
Definition sljitLir.h:1645
#define SLJIT_OVERFLOW
Definition sljitLir.h:1574
#define SLJIT_REV_S16
Definition sljitLir.h:1176
#define SLJIT_FAST_RETURN
Definition sljitLir.h:1353
#define SLJIT_SIMD_STORE
Definition sljitLir.h:1896
#define SLJIT_R1
Definition sljitLir.h:169
#define SLJIT_ATOMIC_NOT_STORED
Definition sljitLir.h:1585
#define SLJIT_XOR
Definition sljitLir.h:1227
#define SLJIT_DIV_F64
Definition sljitLir.h:1457
#define SLJIT_SP
Definition sljitLir.h:214
#define SLJIT_ROTL
Definition sljitLir.h:1268
#define SLJIT_UNORDERED_OR_GREATER
Definition sljitLir.h:1637
#define SLJIT_MEM2(r1, r2)
Definition sljitLir.h:930
#define SLJIT_MUL_F64
Definition sljitLir.h:1454
#define SLJIT_LMUL_UW
Definition sljitLir.h:1062
#define SLJIT_UNORDERED
Definition sljitLir.h:1605
#define SLJIT_ADD
Definition sljitLir.h:1203
#define SLJIT_ORDERED_GREATER_EQUAL
Definition sljitLir.h:1635
#define SLJIT_R8
Definition sljitLir.h:181
#define SLJIT_PREFETCH_L3
Definition sljitLir.h:1375
#define SLJIT_SIG_GREATER_EQUAL
Definition sljitLir.h:1567
#define SLJIT_MOV_U16
Definition sljitLir.h:1133
#define SLJIT_UNORDERED_OR_NOT_EQUAL
Definition sljitLir.h:1618
#define SLJIT_LMUL_SW
Definition sljitLir.h:1066
#define SLJIT_NOT_EQUAL
Definition sljitLir.h:1554
#define SLJIT_SIMD_EXTEND_SIGNED
Definition sljitLir.h:2043
#define SLJIT_PREFETCH_L1
Definition sljitLir.h:1363
#define SLJIT_ABS_F64
Definition sljitLir.h:1437
#define SLJIT_SIMD_OP2_XOR
Definition sljitLir.h:2098
#define SLJIT_32
Definition sljitLir.h:978
#define SLJIT_MSHL
Definition sljitLir.h:1239
#define SLJIT_F_EQUAL
Definition sljitLir.h:1591
#define SLJIT_ORDERED_EQUAL
Definition sljitLir.h:1616
#define SLJIT_HAS_VIRTUAL_REGISTERS
Definition sljitLir.h:688
#define SLJIT_MOV_S8
Definition sljitLir.h:1130
#define SLJIT_ERR_UNSUPPORTED
Definition sljitLir.h:112
#define SLJIT_MEM0()
Definition sljitLir.h:928
#define SLJIT_MULADD
Definition sljitLir.h:1292
#define SLJIT_HAS_FPU
Definition sljitLir.h:686
#define SLJIT_UNORDERED_OR_LESS
Definition sljitLir.h:1633
#define SLJIT_SUB
Definition sljitLir.h:1211
#define SLJIT_ASHR
Definition sljitLir.h:1258
#define SLJIT_SUCCESS
Definition sljitLir.h:101
#define SLJIT_ORDERED_GREATER
Definition sljitLir.h:1624
#define SLJIT_SIG_LESS_EQUAL
Definition sljitLir.h:1571
#define SLJIT_IMM
Definition sljitLir.h:931
#define SLJIT_DIVMOD_UW
Definition sljitLir.h:1071
#define SLJIT_UNORDERED_OR_EQUAL
Definition sljitLir.h:1629
#define SLJIT_HAS_ROT
Definition sljitLir.h:698
#define SLJIT_ROTR
Definition sljitLir.h:1273
#define SLJIT_R3
Definition sljitLir.h:176
#define SLJIT_LESS_EQUAL
Definition sljitLir.h:1563
#define SLJIT_CTZ
Definition sljitLir.h:1158
#define SLJIT_MUL
Definition sljitLir.h:1218
#define SLJIT_REWRITABLE_JUMP
Definition sljitLir.h:1653
#define SLJIT_OR
Definition sljitLir.h:1224
#define SLJIT_CARRY
Definition sljitLir.h:1579
#define SLJIT_MOV_F64
Definition sljitLir.h:1403
#define SLJIT_SIMD_REG_512
Definition sljitLir.h:1908
#define SLJIT_SIG_LESS
Definition sljitLir.h:1565
#define SLJIT_HAS_CMOV
Definition sljitLir.h:700
#define SLJIT_NOT_OVERFLOW
Definition sljitLir.h:1576
#define SLJIT_F_NOT_EQUAL
Definition sljitLir.h:1593
#define SLJIT_MEM1(r1)
Definition sljitLir.h:929
#define SLJIT_F_GREATER_EQUAL
Definition sljitLir.h:1597
#define SLJIT_HAS_CLZ
Definition sljitLir.h:692
#define SLJIT_EQUAL
Definition sljitLir.h:1552
#define SLJIT_CONV_SW_FROM_F64
Definition sljitLir.h:1412
#define SLJIT_JUMP
Definition sljitLir.h:1643
#define SLJIT_GREATER
Definition sljitLir.h:1561
#define SLJIT_AND
Definition sljitLir.h:1221
#define SLJIT_HAS_PREFETCH
Definition sljitLir.h:702
#define SLJIT_BREAKPOINT
Definition sljitLir.h:1054
#define SLJIT_SIG_GREATER
Definition sljitLir.h:1569
#define SLJIT_SIMD_LANE_ZERO
Definition sljitLir.h:1986
#define SLJIT_FLOAT_REGISTER
Definition sljitLir.h:2214
#define SLJIT_ATOMIC_STORED
Definition sljitLir.h:1583
#define SLJIT_GET_RETURN_ADDRESS
Definition sljitLir.h:1394
#define SLJIT_MOV_U32
Definition sljitLir.h:1140
#define SLJIT_SIMD_OP2_OR
Definition sljitLir.h:2096
#define SLJIT_CONV_F64_FROM_SW
Definition sljitLir.h:1418
#define SLJIT_DIV_SW
Definition sljitLir.h:1093
#define SLJIT_DIV_UW
Definition sljitLir.h:1085
#define SLJIT_SIMD_LANE_SIGNED
Definition sljitLir.h:1988
#define SLJIT_MOV_S16
Definition sljitLir.h:1136
#define SLJIT_GREATER_EQUAL
Definition sljitLir.h:1559
#define SLJIT_GP_REGISTER
Definition sljitLir.h:2212
#define SLJIT_MEM
Definition sljitLir.h:927
#define SLJIT_MOV_U8
Definition sljitLir.h:1127
#define SLJIT_SKIP_FRAMES_BEFORE_RETURN
Definition sljitLir.h:1104
#define SLJIT_SIMD_REG_256
Definition sljitLir.h:1906
#define SLJIT_ADDC
Definition sljitLir.h:1206
#define SLJIT_FAST_ENTER
Definition sljitLir.h:1388
#define SLJIT_ERR_COMPILED
Definition sljitLir.h:105
#define SLJIT_HAS_COPY_F64
Definition sljitLir.h:706
#define SLJIT_SIMD_REG_128
Definition sljitLir.h:1904
#define SLJIT_ENDBR
Definition sljitLir.h:1099
#define SLJIT_CLZ
Definition sljitLir.h:1153
#define SLJIT_F_LESS_EQUAL
Definition sljitLir.h:1601
#define SLJIT_HAS_CTZ
Definition sljitLir.h:694
#define SLJIT_REV_U16
Definition sljitLir.h:1170
#define SLJIT_SUBC
Definition sljitLir.h:1214
#define SLJIT_MLSHR
Definition sljitLir.h:1251
#define SLJIT_ORDERED_LESS
Definition sljitLir.h:1620
#define SLJIT_HAS_ATOMIC
Definition sljitLir.h:719
#define SLJIT_HAS_COPY_F32
Definition sljitLir.h:704
#define SLJIT_LSHR
Definition sljitLir.h:1246
#define SLJIT_MOV_S32
Definition sljitLir.h:1143
#define SLJIT_CONV_F64_FROM_F32
Definition sljitLir.h:1409
#define SLJIT_PREFETCH_L2
Definition sljitLir.h:1369
#define SLJIT_SHL
Definition sljitLir.h:1234
#define SLJIT_PREFETCH_ONCE
Definition sljitLir.h:1381
#define SLJIT_MASHR
Definition sljitLir.h:1263
#define SLJIT_F_GREATER
Definition sljitLir.h:1599
#define SLJIT_R0
Definition sljitLir.h:168
#define SLJIT_ORDERED_NOT_EQUAL
Definition sljitLir.h:1631
#define SLJIT_HAS_REV
Definition sljitLir.h:696
#define SLJIT_MOV_P
Definition sljitLir.h:1149
#define SLJIT_NEG_F64
Definition sljitLir.h:1434
#define SLJIT_REV
Definition sljitLir.h:1164
#define SLJIT_F_LESS
Definition sljitLir.h:1595
#define SLJIT_S0
Definition sljitLir.h:188
#define SLJIT_LESS
Definition sljitLir.h:1557
#define SLJIT_SIMD_FLOAT
Definition sljitLir.h:1898
#define SLJIT_MOV32
Definition sljitLir.h:1145
#define SLJIT_MOV
Definition sljitLir.h:1125
#define SLJIT_REV_S32
Definition sljitLir.h:1187
#define SLJIT_UNORDERED_OR_GREATER_EQUAL
Definition sljitLir.h:1622
#define SLJIT_REV_U32
Definition sljitLir.h:1182
#define SLJIT_ADD_F64
Definition sljitLir.h:1448
#define SLJIT_NOP
Definition sljitLir.h:1058
#define TMP_REG2
#define ADC
#define SUB
#define CMP
#define TMP_REG1
#define NOP
#define ADD
#define MUL
#define AND
#define OR
#define XOR
#define SLJIT_IS_FPU_AVAILABLE
#define CPU_FEATURE_DETECTED
#define DIV
#define SAR
#define BSWAP_r
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 dst, sljit_sw dstw, sljit_s32 type)
SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump * sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type)
#define INC_SIZE(s)
#define CHECK_EXTRA_REGS(p, w, do)
#define PAND_x_xm
#define VPBROADCASTQ_x_xm
#define PMOVSXBW_x_xm
#define XORPD_x_xm
#define VPBROADCASTD_x_xm
#define EX86_SSE2
#define CMP_r_rm
#define CMOVE_r_rm
#define BSR_r_rm
#define VBROADCASTSS_x_xm
#define SHUFPS_x_xm
#define MOVDDUP_x_xm
#define IS_HALFWORD(x)
#define SLJIT_INST_LABEL
#define PMOVZXDQ_x_xm
#define PEXTRB_rm_x_i8
#define CMPXCHG_rm_r
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2r(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 dst_reg, sljit_s32 src1, sljit_sw src1w, sljit_s32 src2, sljit_sw src2w)
#define SLJIT_INST_JUMP
#define VEX_W
#define CPU_FEATURE_CMOV
#define BINARY_EAX_IMM(op_eax_imm, immw)
#define SUBSD_x_xm
#define HALFWORD_MAX
#define CALL_rm
#define VPBROADCASTB_x_xm
#define REX_R
#define EX86_NO_REXW
#define MOVUPS_x_xm
SLJIT_API_FUNC_ATTRIBUTE const char * sljit_get_platform_name(void)
#define HALFWORD_MIN
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
#define EX86_PREF_F2
#define PMOVSXWQ_x_xm
#define EX86_PREF_F3
SLJIT_API_FUNC_ATTRIBUTE void * sljit_generate_code(struct sljit_compiler *compiler, sljit_s32 options, void *exec_allocator_data)
#define SHL
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src1, sljit_sw src1w, sljit_s32 src2, sljit_sw src2w)
#define MOVSXD_r_rm
#define MOVSX_r_rm8
#define EX86_SSE2_OP2
#define NOT_HALFWORD(x)
#define POR_x_xm
#define VEX_AUTO_W
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_cmp_info(sljit_s32 type)
#define GROUP_F7
#define MOVSD_xm_x
#define EX86_REX
#define INSERTPS_x_xm
#define MOVAPS_x_xm
#define MOV_rm_r
#define ANDPD_x_xm
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_local_base(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw offset)
#define PCMPEQD_x_xm
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_load(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 dst_reg, sljit_s32 mem_reg)
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 type, sljit_s32 reg)
SLJIT_API_FUNC_ATTRIBUTE struct sljit_const * sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value)
#define LZCNT_r_rm
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_replicate(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 freg, sljit_s32 src, sljit_sw srcw)
#define MOVSD_x_xm
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_op2(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 dst_freg, sljit_s32 src1_freg, sljit_s32 src2_freg)
#define CALL_i32
#define U8(v)
#define ROR
#define ORPD_x_xm
SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset)
#define GROUP_FF
#define MOVLPD_m_x
#define MOVMSKPS_r_x
#define JMP_i8
#define PMOVSXWD_x_xm
#define CPU_FEATURE_AVX
#define ADD_rm_r
#define MOVHPD_m_x
#define BINARY_IMM(op_imm, op_mr, immw, arg, argw)
#define MOVSHDUP_x_xm
#define PREFETCH
#define PACKSSWB_x_xm
#define CPU_FEATURE_LZCNT
#define PINSRB_x_rm_i8
#define MOVHLPS_x_x
#define VPERMPD_y_ym
#define DIVSD_x_xm
#define CVTPS2PD_x_xm
SLJIT_API_FUNC_ATTRIBUTE struct sljit_label * sljit_emit_label(struct sljit_compiler *compiler)
#define ADDSD_x_xm
#define IMUL_r_rm
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw)
#define MOVDQA_x_xm
#define INT3
#define MULSD_x_xm
#define CPU_FEATURE_OSXSAVE
#define SLJIT_INST_MOV_ADDR
#define UNPCKLPD_x_xm
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2r(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 dst_freg, sljit_s32 src1, sljit_sw src1w, sljit_s32 src2, sljit_sw src2w)
#define MOVD_x_rm
#define PSLLD_x_i8
#define REX_W
#define EX86_SSE2_OP1
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_mov(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 freg, sljit_s32 lane_index, sljit_s32 srcdst, sljit_sw srcdstw)
#define VPERMQ_y_ym
#define IMUL
SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump * sljit_emit_mov_addr(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
#define EX86_PREF_66
#define MOV_rm8_i8
#define TEST_EAX_i32
#define PEXTRD_rm_x_i8
#define GROUP_F3
#define PINSRW_x_rm_i8
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op)
#define VEX_OP_0F38
#define MOVDQA_xm_x
#define PSRLDQ_x
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 dst, sljit_sw dstw, sljit_s32 src1, sljit_sw src1w, sljit_s32 src2, sljit_sw src2w)
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 dst, sljit_sw dstw, sljit_s32 src, sljit_sw srcw)
#define VEXTRACTF128_x_ym
#define CMP_rm_r
#define EXTRACTPS_x_xm
#define PMOVSXBQ_x_xm
#define EMIT_MOV(compiler, dst, dstw, src, srcw)
#define IMUL_r_rm_i8
#define EX86_HALF_ARG
#define TMP_FREG
#define CVTTSD2SI_r_xm
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 dst_reg, sljit_s32 src1_reg, sljit_s32 src2_reg, sljit_s32 src3, sljit_sw src3w)
#define SHRD
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_extend(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 freg, sljit_s32 src, sljit_sw srcw)
#define EX86_SELECT_66(op)
#define PMOVZXBQ_x_xm
#define UCOMISD_x_xm
#define MOVZX_r_rm8
#define PMOVZXBW_x_xm
#define SLJIT_INST_CONST
#define PMOVZXBD_x_xm
#define MOVLPD_x_m
#define VPBROADCASTW_x_xm
#define PXOR_x_xm
#define BINARY_OPCODE(opcode)
#define VEX_OP_0F3A
#define PSLLQ_x_i8
#define JMP_rm
#define EX86_SHIFT_INS
#define MOVHPD_x_m
#define TZCNT_r_rm
#define VINSERTI128_y_y_xm
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_mov(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 freg, sljit_s32 srcdst, sljit_sw srcdstw)
#define VBROADCASTSD_x_xm
#define EX86_SELECT_F2_F3(op)
#define VEX_SSE2_OPV
#define NOT_rm
#define CDQ
#define CMPXCHG_rm8_r
#define VINSERTF128_y_y_xm
#define VEXTRACTI128_x_ym
#define REX
#define CPU_FEATURE_TZCNT
#define JMP_i32
#define EX86_BYTE_ARG
#define PSHUFLW_x_xm
#define MOV_r_i32
#define MOV_rm_i32
#define CPU_FEATURE_SSE41
SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset)
#define IDIV
#define PMOVMSKB_r_x
#define GROUP_0F
#define CMPS_x_xm
#define REX_B
#define BSF_r_rm
#define OR_rm8_r8
#define PMOVSXBD_x_xm
#define VEX_256
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_dst(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 dst, sljit_sw dstw)
#define CPU_FEATURE_AVX2
#define SBB
#define MOV_r_rm
#define CVTPD2PS_x_xm
#define MOVLHPS_x_x
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_store(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src_reg, sljit_s32 mem_reg, sljit_s32 temp_reg)
#define NEG_rm
#define CVTSI2SD_x_rm
#define XOR_r_rm
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fselect(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 dst_freg, sljit_s32 src1, sljit_sw src1w, sljit_s32 src2_freg)
#define PEXTRW_rm_x_i8
#define MOVZX_r_rm16
#define MOVQ_x_xm
#define PMOVZXWD_x_xm
#define PMOVZXWQ_x_xm
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw)
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_sign(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 freg, sljit_s32 dst, sljit_sw dstw)
#define PMOVSXDQ_x_xm
#define TEST_rm_r
#define LEA_r_m
#define PINSRD_x_rm_i8
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 dst, sljit_sw dstw, sljit_s32 src1, sljit_sw src1w, sljit_s32 src2, sljit_sw src2w)
#define GROUP_LOCK
#define EX86_BIN_INS
#define PSHUFB_x_xm
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler, void *instruction, sljit_u32 size)
#define MOVSX_r_rm16
#define CMP_EAX_i32
#define SHLD
#define MOV_rm8_r8
#define PSHUFD_x_xm
#define MOD_REG
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 dst, sljit_sw dstw, sljit_s32 src, sljit_sw srcw)
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_replicate(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 freg, sljit_s32 src, sljit_s32 src_lane_index)
#define IMUL_r_rm_i32
sljit_uw executable_size
Definition sljitLir.h:493
struct sljit_const * consts
Definition sljitLir.h:466
sljit_uw size
Definition sljitLir.h:489
sljit_sw executable_offset
Definition sljitLir.h:491
struct sljit_jump * jumps
Definition sljitLir.h:465
sljit_s32 error
Definition sljitLir.h:461
struct sljit_label * last_label
Definition sljitLir.h:467
struct sljit_memory_fragment * buf
Definition sljitLir.h:473
struct sljit_label * labels
Definition sljitLir.h:464
sljit_s32 options
Definition sljitLir.h:462
struct sljit_const * next
Definition sljitLir.h:450
sljit_uw addr
Definition sljitLir.h:451
struct sljit_label * label
Definition sljitLir.h:445
struct sljit_label * next
Definition sljitLir.h:429
union sljit_label::@034003116150245300057154161307153110213245130244 u
sljit_uw size
Definition sljitLir.h:435
sljit_uw addr
Definition sljitLir.h:432
defined(string $constant_name)
void init_compiler(void)
value