17# if defined(__FreeBSD__) || defined(__DragonFly__)
18# include <sys/sysctl.h>
29#include <capstone/capstone.h>
30#define HAVE_CAPSTONE_ITER
32#ifndef IR_DISASM_INTEL_SYNTAX
33# define IR_DISASM_INTEL_SYNTAX 0
55 if (
p->parent ==
NULL) {
57 }
else if (
p->parent->child[0] ==
p) {
58 p->parent->child[0] = r;
60 p->parent->child[1] = r;
74 if (
p->parent ==
NULL) {
76 }
else if (
p->parent->child[1] ==
p) {
77 p->parent->child[1] = l;
79 p->parent->child[0] = l;
108 if (node->
child[1]) {
109 node = node->
child[1];
111 node->
child[1] = sym;
115 }
else if (sym->
addr < node->
addr) {
116 if (node->
child[0]) {
117 node = node->
child[0];
119 node->
child[0] = sym;
135 while (sym && sym != _symbols && sym->
parent->
info == 1) {
138 if (node && node->
info == 1) {
146 ir_syms_rotateleft(sym);
154 if (node && node->
info == 1) {
162 ir_syms_rotateright(sym);
180 ir_disasm_destroy_symbols(
n->child[0]);
183 ir_disasm_destroy_symbols(
n->child[1]);
194 node = node->
child[0];
196 node = node->
child[1];
205static uint64_t ir_disasm_branch_target(csh cs,
const cs_insn *insn)
209#if defined(IR_TARGET_X86) || defined(IR_TARGET_X64)
210 if (cs_insn_group(cs, insn, X86_GRP_JUMP)) {
211 for (i = 0; i < insn->detail->x86.op_count; i++) {
212 if (insn->detail->x86.operands[i].type == X86_OP_IMM) {
213 return insn->detail->x86.operands[i].imm;
217#elif defined(IR_TARGET_AARCH64)
218 if (cs_insn_group(cs, insn, ARM64_GRP_JUMP)
219 || insn->id == ARM64_INS_BL
220 || insn->id == ARM64_INS_ADR) {
221 for (i = 0; i < insn->detail->arm64.op_count; i++) {
222 if (insn->detail->arm64.operands[i].type == ARM64_OP_IMM)
223 return insn->detail->arm64.operands[i].imm;
231static uint64_t ir_disasm_rodata_reference(csh cs,
const cs_insn *insn)
233#if defined(IR_TARGET_X86)
236 for (i = 0; i < insn->detail->x86.op_count; i++) {
237 if (insn->detail->x86.operands[i].type == X86_OP_MEM
238 && insn->detail->x86.operands[i].mem.base == X86_REG_INVALID
239 && insn->detail->x86.operands[i].mem.segment == X86_REG_INVALID
240 && insn->detail->x86.operands[i].mem.index == X86_REG_INVALID
241 && insn->detail->x86.operands[i].mem.scale == 1) {
242 return (uint32_t)insn->detail->x86.operands[i].mem.disp;
245 if (cs_insn_group(cs, insn, X86_GRP_JUMP)) {
246 for (i = 0; i < insn->detail->x86.op_count; i++) {
247 if (insn->detail->x86.operands[i].type == X86_OP_MEM
248 && insn->detail->x86.operands[i].mem.disp) {
249 return (uint32_t)insn->detail->x86.operands[i].mem.disp;
253 if (insn->id == X86_INS_MOV
254 && insn->detail->x86.op_count == 2
255 && insn->detail->x86.operands[0].type == X86_OP_IMM
256 && insn->detail->x86.operands[0].size ==
sizeof(
void*)) {
257 return (uint32_t)insn->detail->x86.operands[0].imm;
259#elif defined(IR_TARGET_X64)
262 for (i = 0; i < insn->detail->x86.op_count; i++) {
263 if (insn->detail->x86.operands[i].type == X86_OP_MEM
264 && insn->detail->x86.operands[i].mem.base == X86_REG_RIP
265 && insn->detail->x86.operands[i].mem.segment == X86_REG_INVALID
267 && insn->detail->x86.operands[i].mem.index == X86_REG_INVALID
268 && insn->detail->x86.operands[i].mem.scale == 1) {
269 return insn->detail->x86.operands[i].mem.disp + insn->address + insn->size;
272#elif defined(IR_TARGET_AARCH64)
275 if (insn->id == ARM64_INS_ADR
276 || insn->id == ARM64_INS_LDRB
277 || insn->id == ARM64_INS_LDR
278 || insn->id == ARM64_INS_LDRH
279 || insn->id == ARM64_INS_LDRSB
280 || insn->id == ARM64_INS_LDRSH
281 || insn->id == ARM64_INS_LDRSW
282 || insn->id == ARM64_INS_STRB
283 || insn->id == ARM64_INS_STR
284 || insn->id == ARM64_INS_STRH) {
285 for (i = 0; i < insn->detail->arm64.op_count; i++) {
286 if (insn->detail->arm64.operands[i].type == ARM64_OP_IMM)
287 return insn->detail->arm64.operands[i].imm;
296static const char* ir_disasm_resolver(uint64_t
addr,
301 void *
a = (
void*)(uintptr_t)(
addr);
311 &&
info.dli_saddr ==
a) {
313 return info.dli_sname;
333 size_t orig_size =
size;
334 const void *orig_end = (
void *)((
char *)
start +
size);
341# ifdef HAVE_CAPSTONE_ITER
342 const uint8_t *cs_code;
351 uint32_t rodata_offset = 0;
352 uint32_t jmp_table_offset = 0;
357# if defined(IR_TARGET_X86) || defined(IR_TARGET_X64)
359 ret = cs_open(CS_ARCH_X86, CS_MODE_64, &cs);
360 if (
ret != CS_ERR_OK) {
361 fprintf(stderr,
"cs_open(CS_ARCH_X86, CS_MODE_64, ...) failed; [%d] %s\n",
ret, cs_strerror(
ret));
365 ret = cs_open(CS_ARCH_X86, CS_MODE_32, &cs);
366 if (
ret != CS_ERR_OK) {
367 fprintf(stderr,
"cs_open(CS_ARCH_X86, CS_MODE_32, ...) failed; [%d] %s\n",
ret, cs_strerror(
ret));
371 cs_option(cs, CS_OPT_DETAIL, CS_OPT_ON);
372# if IR_DISASM_INTEL_SYNTAX
373 cs_option(cs, CS_OPT_SYNTAX, CS_OPT_SYNTAX_INTEL);
375 cs_option(cs, CS_OPT_SYNTAX, CS_OPT_SYNTAX_ATT);
377# elif defined(IR_TARGET_AARCH64)
378 ret = cs_open(CS_ARCH_ARM64, CS_MODE_ARM, &cs);
379 if (
ret != CS_ERR_OK) {
380 fprintf(stderr,
"cs_open(CS_ARCH_ARM64, CS_MODE_ARM, ...) failed; [%d] %s\n",
ret, cs_strerror(
ret));
383 cs_option(cs, CS_OPT_DETAIL, CS_OPT_ON);
384 cs_option(cs, CS_OPT_SYNTAX, CS_OPT_SYNTAX_ATT);
404 if (
size > rodata_offset) {
405 size = rodata_offset;
409 if (jmp_table_offset) {
413 IR_ASSERT(orig_size - jmp_table_offset <= 0xffffffff);
414 n = (uint32_t)(orig_size - jmp_table_offset);
415 if (
size > jmp_table_offset) {
416 size = jmp_table_offset;
422 IR_ASSERT(
n > 0 &&
n %
sizeof(
void*) == 0 && jmp_table_offset %
sizeof(
void*) == 0);
423 p = (uintptr_t*)((
char*)
start + jmp_table_offset);
426 if ((uintptr_t)*
p >= (uintptr_t)
start && (uintptr_t)*
p < (uintptr_t)orig_end) {
437# ifdef HAVE_CAPSTONE_ITER
439 cs_size = (uint8_t*)
end - (uint8_t*)
start;
440 cs_addr = (uint64_t)(uintptr_t)cs_code;
441 insn = cs_malloc(cs);
442 while (cs_disasm_iter(cs, &cs_code, &cs_size, &cs_addr, insn)) {
443 if ((
addr = ir_disasm_branch_target(cs, insn))
446 for (i = 0; i <
count; i++) {
447 if ((
addr = ir_disasm_branch_target(cs, &(insn[i])))
449 && (
addr >= (uint64_t)(uintptr_t)
start &&
addr < (uint64_t)(uintptr_t)
end)) {
451# ifdef HAVE_CAPSTONE_ITER
452 }
else if ((
addr = ir_disasm_rodata_reference(cs, insn))) {
454 }
else if ((
addr = ir_disasm_rodata_reference(cs, &(insn[i])))) {
456 if (
addr >= (uint64_t)(uintptr_t)
end &&
addr < (uint64_t)(uintptr_t)orig_end) {
476# ifdef HAVE_CAPSTONE_ITER
478 cs_size = (uint8_t*)
end - (uint8_t*)
start;
479 cs_addr = (uint64_t)(uintptr_t)cs_code;
480 while (cs_disasm_iter(cs, &cs_code, &cs_size, &cs_addr, insn)) {
483 for (i = 0; i <
count; i++) {
488 fprintf(f,
".ENTRY_%d:\n", entry);
494# ifdef HAVE_CAPSTONE_ITER
496 fprintf(f,
" %" PRIx64
":", insn->address);
499#if defined(IR_TARGET_X64) && (CS_API_MAJOR < 5)
501 if (insn->id == X86_INS_MOVQ &&
strcmp(insn->mnemonic,
"movd") == 0) {
502 insn->mnemonic[3] =
'q';
506 fprintf(f,
"\t%s\n", insn->mnemonic);
509 fprintf(f,
"\t%s ", insn->mnemonic);
513 fprintf(f,
" %" PRIx64
":", insn[i].address);
517 fprintf(f,
"\t%s\n", insn[i].mnemonic);
520 fprintf(f,
"\t%s ", insn[i].mnemonic);
524#if defined(IR_TARGET_X64)
525# ifdef HAVE_CAPSTONE_ITER
526 if ((
addr = ir_disasm_rodata_reference(cs, insn))) {
528 if ((
addr = ir_disasm_rodata_reference(cs, &(insn[i])))) {
530 if (
addr >= (uint64_t)(uintptr_t)
end &&
addr < (uint64_t)(uintptr_t)orig_end) {
536 while (r >
p && ((*r >=
'0' && *r <=
'9') || (*r >=
'a' && *r <=
'f') || (*r >=
'A' && *r <=
'F'))) {
539 if (r >
p && *r ==
'x' && *(r - 1) ==
'0') {
546 fprintf(f,
".ENTRY_%d%s\n", entry, q);
548 fprintf(f,
".L%d%s\n", -entry, q);
553 }
else if ((sym = ir_disasm_resolver(
addr, &
offset))) {
557 while (r >
p && ((*r >=
'0' && *r <=
'9') || (*r >=
'a' && *r <=
'f') || (*r >=
'A' && *r <=
'F'))) {
560 if (r >
p && *r ==
'x' && *(r - 1) ==
'0') {
580#if defined(IR_TARGET_AARCH64)
589 if (*r >=
'0' && *r <=
'9') {
591 }
else if (*r >=
'A' && *r <=
'F') {
593 }
else if (*r >=
'a' && *r <=
'f') {
600 if (
p != q && *(q-1) ==
'-') {
604 if (
addr >= (uint64_t)(uintptr_t)
start &&
addr < (uint64_t)(uintptr_t)orig_end) {
609 fprintf(f,
".ENTRY_%d", entry);
616 }
else if ((sym = ir_disasm_resolver(
addr, &
offset))) {
617#if defined(IR_TARGET_X86) || defined(IR_TARGET_X64)
618 if (
offset &&
p != q && *(q-1) ==
'$') {
644# ifdef HAVE_CAPSTONE_ITER
647 cs_free(insn,
count);
650 if (rodata_offset || jmp_table_offset) {
654 const unsigned char *
p = (
unsigned char*)
start + rodata_offset;
655 uint32_t
n = jmp_table_offset ?
656 (uint32_t)(jmp_table_offset - rodata_offset) :
657 (uint32_t)(orig_size - rodata_offset);
664 fprintf(f,
".ENTRY_%d:\n", entry);
669 fprintf(f,
"\t.db 0x%02x", (
int)*
p);
673 while (
n > 0 &&
j > 0) {
686 if (jmp_table_offset) {
687 uintptr_t *
p = (uintptr_t*)(
unsigned char*)
start + jmp_table_offset;
688 uint32_t
n = (uint32_t)(orig_size - jmp_table_offset);
690 fprintf(f,
".align %d\n", (
int)
sizeof(
void*));
692 p = (uintptr_t*)((
char*)
start + jmp_table_offset);
697 fprintf(f,
".ENTRY_%d:\n", entry);
703 if ((uintptr_t)*
p >= (uintptr_t)
start && (uintptr_t)*
p < (uintptr_t)orig_end) {
707 if (
sizeof(
void*) == 8) {
708 fprintf(f,
"\t.qword .ENTRY_%d\n", entry);
710 fprintf(f,
"\t.dword .ENTRY_%d\n", entry);
713 if (
sizeof(
void*) == 8) {
714 fprintf(f,
"\t.qword .L%d\n", -entry);
716 fprintf(f,
"\t.dword .L%d\n", -entry);
724 if (
sizeof(
void*) == 8) {
730 if (
sizeof(
void*) == 8) {
731 fprintf(f,
"\t.qword 0x%0llx\n", (
long long)*
p);
733 fprintf(f,
"\t.dword 0x%0x\n", (
int)*
p);
738 if (
sizeof(
void*) == 8) {
767 if (read(
fd,
s, sect->
size) != (ssize_t)sect->
size) {
775static void ir_elf_load_symbols(
void)
780#if defined(__linux__)
781 int fd = open(
"/proc/self/exe", O_RDONLY);
782#elif defined(__NetBSD__)
783 int fd = open(
"/proc/curproc/exe", O_RDONLY);
784#elif defined(__FreeBSD__) || defined(__DragonFly__)
786 size_t pathlen =
sizeof(path);
787 int mib[4] = {CTL_KERN, KERN_PROC, KERN_PROC_PATHNAME, -1};
788 if (sysctl(mib, 4, path, &pathlen,
NULL, 0) == -1) {
791 int fd = open(path, O_RDONLY);
793 int fd = open(
"/proc/self/path/a.out", O_RDONLY);
794#elif defined(__HAIKU__)
796 if (find_path(B_APP_IMAGE_SYMBOL, B_FIND_PATH_IMAGE_PATH,
797 NULL, path,
sizeof(path)) != B_OK) {
801 int fd = open(path, O_RDONLY);
809 if (read(
fd, &hdr,
sizeof(hdr)) ==
sizeof(hdr)
810 && hdr.
emagic[0] ==
'\177'
815 for (i = 0; i < hdr.
shnum; i++) {
816 if (read(
fd, §,
sizeof(sect)) ==
sizeof(sect)
824 && read(
fd, §,
sizeof(sect)) ==
sizeof(sect)
825 && (str_tbl = (
char*)ir_elf_read_sect(
fd, §)) !=
NULL) {
853 ir_elf_load_symbols();
861 ir_disasm_destroy_symbols(_symbols);
fprintf($stream, string $format, mixed ... $values)
fwrite($stream, string $data, ?int $length=null)
fputs($stream, string $data, ?int $length=null)
count(Countable|array $value, int $mode=COUNT_NORMAL)
strstr(string $haystack, string $needle, bool $before_needle=false)
void ir_hashtab_init(ir_hashtab *tab, uint32_t size)
void ir_hashtab_key_sort(ir_hashtab *tab)
void ir_hashtab_free(ir_hashtab *tab)
ir_ref ir_hashtab_find(const ir_hashtab *tab, uint32_t key)
bool ir_hashtab_add(ir_hashtab *tab, uint32_t key, ir_ref val)
struct _ir_hashtab ir_hashtab
void ir_disasm_free(void)
struct _ir_sym_node ir_sym_node
int ir_disasm(const char *name, const void *start, size_t size, bool asm_addr, ir_ctx *ctx, FILE *f)
void ir_disasm_add_symbol(const char *name, uint64_t addr, uint64_t size)
const char * ir_disasm_find_symbol(uint64_t addr, int64_t *offset)
#define ELFSYM_BIND(info)
struct _ir_elf_header ir_elf_header
#define ELFSYM_TYPE(info)
#define IR_ALIGNED_SIZE(size, alignment)
struct _ir_hashtab_bucket ir_hashtab_bucket
unsigned const char * end
uint32_t jmp_table_offset
struct _ir_sym_node * parent
struct _ir_sym_node * child[2]
strcmp(string $string1, string $string2)