20#if defined(IR_TARGET_X86) || defined(IR_TARGET_X64)
22#elif defined(IR_TARGET_AARCH64)
25# error "Unknown IR target"
41static int ir_assign_virtual_registers_slow(
ir_ctx *ctx)
44 uint32_t vregs_count = 0;
54 for (b = 1, bb = ctx->
cfg_blocks + b; b <= ctx->cfg_blocks_count; b++, bb++) {
68 vregs[i] = ++vregs_count;
85 uint32_t vregs_count = 0;
90 return ir_assign_virtual_registers_slow(ctx);
96 for (i = 1, insn = &ctx->
ir_base[1]; i < ctx->insns_count; i++, insn++) {
143 return ir_new_live_range(ctx,
v,
start,
end);
147 if (
end >=
p->start) {
161 if (
next->end >
p->end) {
164 p->next =
next->next;
178 }
while (
p &&
end >=
p->start);
222 return ir_add_live_range(ctx,
v,
start,
end);
301static bool ir_has_tmp(
ir_ctx *ctx,
ir_ref ref, int32_t op_num)
322 while (
p &&
p->start < old_start) {
327 p->start = new_start;
344 }
while (
p &&
p->pos < use_pos->
pos);
347 prev->next = use_pos;
358 use_pos->
hint = hint;
359 use_pos->
flags = use_flags;
370 ir_add_use_pos(ctx, ival, use_pos);
385 ir_add_use_pos(ctx, ival, use_pos);
396 if (use_pos->
pos ==
pos) {
398 use_pos->
hint = hint;
402 use_pos = use_pos->
next;
407static void ir_hint_propagation(
ir_ctx *ctx)
421 if (use_pos->
op_num == 0) {
423 hint_use_pos = use_pos;
427 ir_add_hint(ctx, hint_use_pos->
hint_ref, hint_use_pos->
pos, use_pos->
hint);
431 use_pos = use_pos->
next;
437#ifdef IR_BITSET_LIVENESS
452 ir_ref *ops = ctx->ir_base[ref].ops;
453 ref = ops[use_pos->op_num];
460 ir_ref var = ir_binding_find(ctx, ref);
464 bb->flags &= ~IR_BB_EMPTY;
465 bb->flags |= IR_BB_OSR_ENTRY_LOADS;
466 if (!ctx->osr_entry_loads) {
467 list = ctx->osr_entry_loads = ir_mem_malloc(sizeof(ir_list));
468 ir_list_init(list, 16);
470 ir_list_push(list, b);
471 ir_list_push(list, 0);
473 ir_list_push(list, ref);
504 uint32_t
j,
n,
flags, def_flags;
523 ir_add_fixed_live_range(ctx, constraints.
tmp_regs[
n].reg,
542 for (;
j <=
n;
j++,
p++) {
546 uint32_t
v = ctx->
vregs[child];
560 ival = ir_add_live_range(ctx,
v,
565 ir_add_use(ctx, ival,
j, use_pos, reg, use_flags, -input);
567 IR_ASSERT(stack_pos < (
int)(
sizeof(stack)/
sizeof(stack_pos)));
568 stack[stack_pos++] = child;
577 input = stack[--stack_pos];
583 uint32_t b, i,
j, k,
n, succ, *
p;
633 live = bb_live + (
len * b);
654 for (
p++,
n--;
n > 0;
p++,
n--) {
682 for (ref = 0; ref < use_list->
count; ref++) {
685 if (insn->op == IR_PHI) {
688 uint32_t
v = ctx->
vregs[input];
707 if (insn->op == IR_END || insn->op == IR_LOOP_END) {
725 if (insn->op != IR_VADDR) {
726 insn->op3 = ctx->
vars;
741 ir_add_fixed_live_range(ctx, constraints.
tmp_regs[
n].reg,
757 if (insn->op != IR_PHI) {
760 ir_reg reg = constraints.
def_reg;
764 if (insn->op == IR_PARAM || insn->op == IR_RLOAD) {
770 hint_ref = insn->op1;
776 if (insn->op == IR_PARAM) {
779 }
else if (insn->op == IR_VLOAD) {
788 ival = ir_fix_live_range(ctx,
v,
790 ival->
type = insn->type;
791 ir_add_use(ctx, ival, 0, def_pos, reg, def_flags, hint_ref);
801 ival->
type = insn->type;
815 for (;
j <= insn->inputs_count;
j++,
p++) {
828 ir_add_fixed_live_range(ctx, reg, use_pos, use_pos +
IR_USE_SUB_REF);
834 }
else if (input == insn->op1) {
848 }
else if (ctx->
rules) {
850 ir_add_fusion_ranges(ctx, ref, input, bb, live);
857 ir_add_fixed_live_range(ctx, reg, use_pos, use_pos +
IR_USE_SUB_REF);
882 child_live_in = bb_live + (
len * child);
886 ir_add_live_range(ctx, i,
910 live = bb_live + (
len * b);
911 ir_add_osr_entry_loads(ctx, bb, live,
len, b);
934#define IS_LIVE_IN_BLOCK(v, b) \
935 (live_in_block[v] == b)
936#define SET_LIVE_IN_BLOCK(v, b) do { \
937 live_in_block[v] = b; \
963 if (live_lists->
len >= live_lists->
a.
size) {
980static void ir_compute_live_sets(
ir_ctx *ctx, uint32_t *live_outs,
ir_list *live_lists)
982 ir_list block_queue, fuse_queue;
990 uint32_t
v = ctx->
vregs[i];
993 uint32_t def_block = ctx->
cfg_map[i];
1003 ir_ref n = insn->inputs_count - 1;
1007 for (;
n > 0;
p++, q++,
n--) {
1009 uint32_t pred_block = ctx->
cfg_map[*q];
1013 if (pred_block != def_block) {
1030 uint32_t use_block = ctx->
cfg_map[use];
1032 if (def_block != use_block &&
ir_live_out_top(ctx, live_outs, live_lists, use_block) !=
v) {
1043 uint32_t use_block = ctx->
cfg_map[use];
1046 if (def_block != use_block &&
ir_live_out_top(ctx, live_outs, live_lists, use_block) !=
v) {
1064 if (live_lists->
len >= live_lists->
a.
size) {
1073 uint32_t pred_block = *
p;
1079 if (pred_block != def_block) {
1108 ir_ref *ops = ctx->ir_base[ref].ops;
1109 ref = ops[use_pos->op_num];
1116 ir_ref var = ir_binding_find(ctx, ref);
1120 bb->flags &= ~IR_BB_EMPTY;
1121 bb->flags |= IR_BB_OSR_ENTRY_LOADS;
1122 if (!ctx->osr_entry_loads) {
1123 list = ctx->osr_entry_loads = ir_mem_malloc(sizeof(ir_list));
1124 ir_list_init(list, 16);
1126 ir_list_push(list, b);
1127 ir_list_push(list, 0);
1129 ir_list_push(list, ref);
1160 uint32_t
j,
n,
flags, def_flags;
1180 ir_add_fixed_live_range(ctx, constraints.
tmp_regs[
n].reg,
1200 for (;
j <=
n;
j++,
p++) {
1204 uint32_t
v = ctx->
vregs[child];
1217 ival = ir_add_live_range(ctx,
v,
1223 ir_add_use(ctx, ival,
j, use_pos, reg, use_flags, -input);
1225 IR_ASSERT(stack_pos < (
int)(
sizeof(stack)/
sizeof(stack_pos)));
1226 stack[stack_pos++] = child;
1235 input = stack[--stack_pos];
1241 uint32_t b, i,
j, k,
n, succ;
1245 uint32_t *live_outs;
1246 uint32_t *live_in_block;
1274 ir_compute_live_sets(ctx, live_outs, &live_lists);
1307 if (insn->op == IR_PHI) {
1310 uint32_t
v = ctx->
vregs[input];
1325 if (insn->op == IR_END || insn->op == IR_LOOP_END) {
1343 if (insn->op != IR_VADDR) {
1344 insn->op3 = ctx->
vars;
1359 ir_add_fixed_live_range(ctx, constraints.
tmp_regs[
n].reg,
1373 if (insn->op != IR_PHI) {
1376 ir_reg reg = constraints.
def_reg;
1380 if (insn->op == IR_PARAM || insn->op == IR_RLOAD) {
1386 hint_ref = insn->op1;
1396 if (insn->op == IR_PARAM) {
1399 }
else if (insn->op == IR_VLOAD) {
1406 ival = ir_fix_live_range(ctx,
v,
1408 ival->
type = insn->type;
1409 ir_add_use(ctx, ival, 0, def_pos, reg, def_flags, hint_ref);
1417 ival->
type = insn->type;
1431 for (;
j <= insn->inputs_count;
j++,
p++) {
1444 ir_add_fixed_live_range(ctx, reg, use_pos, use_pos +
IR_USE_SUB_REF);
1454 }
else if (input == insn->op1) {
1468 }
else if (ctx->
rules) {
1470 ir_add_fusion_ranges(ctx, ref, input, bb, live_in_block, b);
1477 ir_add_fixed_live_range(ctx, reg, use_pos, use_pos +
IR_USE_SUB_REF);
1483 ir_add_fixed_live_range(ctx, reg, use_pos, use_pos +
IR_USE_SUB_REF);
1494 ir_add_osr_entry_loads(ctx, bb, live_outs[ctx->
cfg_blocks_count + 1 + i], &live_lists, b);
1544 return ir_ivals_overlap(&ival1->
range, &ival2->
range);
1550 while (parent && parent->end < child->
start) {
1551 parent = parent->next;
1553 if (!parent || parent->start > child->
start || parent->end < child->
end) {
1556 child = child->
next;
1561static bool ir_vregs_inside(
ir_ctx *ctx, uint32_t parent, uint32_t child)
1571 if (child_ival->
end >= parent_ival->
end) {
1575 return ir_ivals_inside(&parent_ival->
range, &child_ival->
range);
1578static void ir_vregs_join(
ir_ctx *ctx, uint32_t r1, uint32_t r2)
1586 fprintf(stderr,
"COALESCE %d -> %d\n", r2, r1);
1589 ir_add_live_range(ctx, r1, live_range->
start, live_range->
end);
1590 live_range = live_range->
next;
1591 while (live_range) {
1595 ir_add_live_range(ctx, r1, live_range->
start, live_range->
end);
1606 while (*
prev && ((*prev)->pos < use_pos->
pos ||
1607 ((*prev)->pos == use_pos->
pos &&
1608 (use_pos->
op_num == 0 || (*prev)->op_num < use_pos->
op_num)))) {
1609 if ((*prev)->hint_ref > 0 && ctx->
vregs[(*prev)->hint_ref] == r2) {
1610 (*prev)->hint_ref = 0;
1612 prev = &(*prev)->next;
1614 next_pos = use_pos->
next;
1625 use_pos = use_pos->
next;
1639static void ir_vregs_coalesce(
ir_ctx *ctx, uint32_t v1, uint32_t v2,
ir_ref from,
ir_ref to)
1653 ir_vregs_join(ctx, v1, v2);
1654 ctx->
vregs[to] = v1;
1656 ir_vregs_join(ctx, v2, v1);
1657 ctx->
vregs[from] = v2;
1658 }
else if (from < to) {
1659 ir_vregs_join(ctx, v1, v2);
1662 if (ctx->
vregs[i] == v2) {
1667 ctx->
vregs[to] = v1;
1670 ir_vregs_join(ctx, v2, v1);
1673 if (ctx->
vregs[i] == v1) {
1678 ctx->
vregs[from] = v2;
1690 fprintf(stderr,
"BB%d: MOV %d -> %d\n", b, from, to);
1700static int ir_block_cmp(
const void *b1,
const void *b2)
1705 if (
d1->loop_depth >
d2->loop_depth) {
1707 }
else if (
d1->loop_depth ==
d2->loop_depth) {
1708 if (
d1->b <
d2->b) {
1728 insn->op1 = insn->op2;
1734 if (
p->pos ==
pos) {
1746 if (
p->pos == load_pos) {
1747 p->hint_ref = insn->op1;
1753 if (insn->op2 > 0 && ctx->
vregs[insn->op2]) {
1757 if (r->
end == load_pos) {
1768 if (
p->pos == load_pos) {
1778 uint8_t tmp = p1->
flags;
1779 p1->
flags = p2->flags;
1784static int ir_hint_conflict(
ir_ctx *ctx,
ir_ref ref,
int use,
int def)
1816 if (ctx->
vregs[insn->op1]
1818 && !ir_vregs_overlap(ctx, ctx->
vregs[insn->op1], ctx->
vregs[i])
1819 && !ir_hint_conflict(ctx, i, ctx->
vregs[insn->op1], ctx->
vregs[i])) {
1822 if (ctx->
vregs[insn->op2] && ctx->
vregs[insn->op2] != ctx->
vregs[i]) {
1835 ival->
end = load_pos;
1837 if (!ir_vregs_overlap(ctx, ctx->
vregs[insn->op2], ctx->
vregs[i])
1838 && !ir_hint_conflict(ctx, i, ctx->
vregs[insn->op2], ctx->
vregs[i])) {
1839 ir_swap_operands(ctx, i, insn);
1858 uint32_t b,
n, succ, pred_b,
count = 0;
1870 for (b = 1, bb = &ctx->
cfg_blocks[1]; b <= ctx->cfg_blocks_count; b++, bb++) {
1881 if (insn->op == IR_PHI) {
1918 if (insn->op == IR_PHI) {
1920 if (input > 0 && ctx->
vregs[input]) {
1921 uint32_t v1 = ctx->
vregs[input];
1922 uint32_t v2 = ctx->
vregs[use];
1927 if (!ir_vregs_overlap(ctx, v1, v2)) {
1928 ir_vregs_coalesce(ctx, v1, v2, input, use);
1936 if (input_insn->op2 == use
1937 && input_insn->op1 != use
1952 if (ir_vregs_overlap(ctx, v1, v2)) {
1958 ir_swap_operands(ctx, input, input_insn);
1959 IR_ASSERT(!ir_vregs_overlap(ctx, v1, v2));
1960 ir_vregs_coalesce(ctx, v1, v2, input, use);
1968 ir_add_phi_move(ctx, b, input, use);
1973 ir_add_phi_move(ctx, b, input, use);
1980 ir_hint_propagation(ctx);
1984 uint32_t *rule = ctx->
rules + 1;
1997 && insn->op1 != insn->op2) {
1998 ir_try_swap_operands(ctx, i, insn);
2003 && ctx->
vregs[insn->op1]
2005 if (ir_vregs_inside(ctx, ctx->
vregs[insn->op1], ctx->
vregs[i])) {
2009 if (b1 && b1 != b2) {
2013 ir_vregs_coalesce(ctx, ctx->
vregs[i], ctx->
vregs[insn->op1], i, insn->op1);
2069 for (b = 1, bb = &ctx->
cfg_blocks[1]; b <= ctx->cfg_blocks_count; b++, bb++) {
2081 if (insn->op == IR_PHI) {
2082 for (
j = 2;
j <= k;
j++) {
2109 uint32_t succ, k,
n = 0;
2112 ir_ref *loc, *pred, *src, *dst, i, *
p, ref, input;
2117 bool have_constants_or_addresses = 0;
2141 if (insn->op == IR_PHI) {
2144 have_constants_or_addresses = 1;
2145 }
else if (ctx->
vregs[input] != ctx->
vregs[ref]) {
2147 d = ctx->
vregs[ref];
2150 loc[d] = pred[
s] = 0;
2158 src[0] = dst[0] = 0;
2182 emit_copy(ctx, ctx->
ir_base[dst[b]].type, src[c], dst[b]);
2186 if (
a == c && pred[
a]) {
2195 emit_copy(ctx, ctx->
ir_base[src[b]].type, src[b], 0);
2206 if (have_constants_or_addresses) {
2210 if (insn->op == IR_PHI) {
2213 emit_copy(ctx, insn->type, input, ref);
2225# define IR_LOG_LSRA(action, ival, comment) do { \
2226 if (ctx->flags & IR_DEBUG_RA) { \
2227 ir_live_interval *_ival = (ival); \
2228 ir_live_pos _start = _ival->range.start; \
2229 ir_live_pos _end = _ival->end; \
2230 fprintf(stderr, action " R%d [%d.%d...%d.%d)" comment "\n", \
2231 (_ival->flags & IR_LIVE_INTERVAL_TEMP) ? 0 : _ival->vreg, \
2232 IR_LIVE_POS_TO_REF(_start), IR_LIVE_POS_TO_SUB_REF(_start), \
2233 IR_LIVE_POS_TO_REF(_end), IR_LIVE_POS_TO_SUB_REF(_end)); \
2236# define IR_LOG_LSRA_ASSIGN(action, ival, comment) do { \
2237 if (ctx->flags & IR_DEBUG_RA) { \
2238 ir_live_interval *_ival = (ival); \
2239 ir_live_pos _start = _ival->range.start; \
2240 ir_live_pos _end = _ival->end; \
2241 fprintf(stderr, action " R%d [%d.%d...%d.%d) to %s" comment "\n", \
2242 (_ival->flags & IR_LIVE_INTERVAL_TEMP) ? 0 : _ival->vreg, \
2243 IR_LIVE_POS_TO_REF(_start), IR_LIVE_POS_TO_SUB_REF(_start), \
2244 IR_LIVE_POS_TO_REF(_end), IR_LIVE_POS_TO_SUB_REF(_end), \
2245 ir_reg_name(_ival->reg, _ival->type)); \
2248# define IR_LOG_LSRA_SPLIT(ival, pos) do { \
2249 if (ctx->flags & IR_DEBUG_RA) { \
2250 ir_live_interval *_ival = (ival); \
2251 ir_live_pos _start = _ival->range.start; \
2252 ir_live_pos _end = _ival->end; \
2253 ir_live_pos _pos = (pos); \
2254 fprintf(stderr, " ---- Split R%d [%d.%d...%d.%d) at %d.%d\n", \
2255 (_ival->flags & IR_LIVE_INTERVAL_TEMP) ? 0 : _ival->vreg, \
2256 IR_LIVE_POS_TO_REF(_start), IR_LIVE_POS_TO_SUB_REF(_start), \
2257 IR_LIVE_POS_TO_REF(_end), IR_LIVE_POS_TO_SUB_REF(_end), \
2258 IR_LIVE_POS_TO_REF(_pos), IR_LIVE_POS_TO_SUB_REF(_pos)); \
2261# define IR_LOG_LSRA_CONFLICT(action, ival, pos) do { \
2262 if (ctx->flags & IR_DEBUG_RA) { \
2263 ir_live_interval *_ival = (ival); \
2264 ir_live_pos _start = _ival->range.start; \
2265 ir_live_pos _end = _ival->end; \
2266 ir_live_pos _pos = (pos); \
2267 fprintf(stderr, action " R%d [%d.%d...%d.%d) assigned to %s at %d.%d\n", \
2268 (_ival->flags & IR_LIVE_INTERVAL_TEMP) ? 0 : _ival->vreg, \
2269 IR_LIVE_POS_TO_REF(_start), IR_LIVE_POS_TO_SUB_REF(_start), \
2270 IR_LIVE_POS_TO_REF(_end), IR_LIVE_POS_TO_SUB_REF(_end), \
2271 ir_reg_name(_ival->reg, _ival->type), \
2272 IR_LIVE_POS_TO_REF(_pos), IR_LIVE_POS_TO_SUB_REF(_pos)); \
2276# define IR_LOG_LSRA(action, ival, comment)
2277# define IR_LOG_LSRA_ASSIGN(action, ival, comment)
2278# define IR_LOG_LSRA_SPLIT(ival, pos)
2279# define IR_LOG_LSRA_CONFLICT(action, ival, pos);
2287 if (position < live_range->
end) {
2288 return position >= live_range->
start;
2290 live_range = live_range->
next;
2291 }
while (live_range);
2301 if (from < r->
start) {
2303 }
else if (to <= r->
end) {
2317 while (
p &&
p->pos <=
pos) {
2330 while (
p &&
p->pos <
pos) {
2333 if (
p &&
p->pos ==
pos &&
p->op_num != 0) {
2336 while (
p && !(
p->flags &
flags)) {
2339 return p ?
p->pos : 0x7fffffff;
2346 while (
p && !(
p->flags &
flags)) {
2349 return p ?
p->pos : 0x7fffffff;
2355 uint32_t b = ctx->
cfg_map[ref];
2370 if (min_pos == max_pos) {
2378 min_bb = ir_block_from_live_pos(ctx, min_pos);
2379 max_bb = ir_block_from_live_pos(ctx, max_pos);
2381 if (min_bb == max_bb
2382 || ir_ival_has_hole_between(ival, min_pos, max_pos)) {
2383 return (prefer_max) ? max_pos : min_pos;
2429 while (
p &&
pos >=
p->end) {
2441 prev_use_pos =
NULL;
2444 if (
p->start ==
pos) {
2445 while (use_pos &&
pos > use_pos->
pos) {
2452 prev_use_pos = use_pos;
2453 use_pos = use_pos->
next;
2456 while (use_pos &&
pos >= use_pos->
pos) {
2463 prev_use_pos = use_pos;
2464 use_pos = use_pos->
next;
2478 child->
use_pos = prev_use_pos ? prev_use_pos->
next : use_pos;
2483 if (
pos ==
p->start) {
2507 use_pos = use_pos->
next;
2521 }
else if (
size == 8) {
2524 }
else if (
size == 4) {
2525 if (
data->unused_slot_4) {
2527 data->unused_slot_4 = 0;
2528 }
else if (
data->handled &&
data->handled[8]) {
2529 ret =
data->handled[8]->stack_spill_pos;
2530 data->handled[8] =
data->handled[8]->list_next;
2531 data->unused_slot_4 =
ret + 4;
2534 if (
sizeof(
void*) == 8) {
2541 }
else if (
size == 2) {
2542 if (
data->unused_slot_2) {
2544 data->unused_slot_2 = 0;
2545 }
else if (
data->unused_slot_4) {
2547 data->unused_slot_2 =
data->unused_slot_4 + 2;
2548 data->unused_slot_4 = 0;
2549 }
else if (
data->handled &&
data->handled[4]) {
2550 ret =
data->handled[4]->stack_spill_pos;
2551 data->handled[4] =
data->handled[4]->list_next;
2552 data->unused_slot_2 =
ret + 2;
2553 }
else if (
data->handled &&
data->handled[8]) {
2554 ret =
data->handled[8]->stack_spill_pos;
2555 data->handled[8] =
data->handled[8]->list_next;
2556 data->unused_slot_2 =
ret + 2;
2557 data->unused_slot_4 =
ret + 4;
2561 if (
sizeof(
void*) == 8) {
2568 }
else if (
size == 1) {
2569 if (
data->unused_slot_1) {
2571 data->unused_slot_1 = 0;
2572 }
else if (
data->unused_slot_2) {
2574 data->unused_slot_1 =
data->unused_slot_2 + 1;
2575 data->unused_slot_2 = 0;
2576 }
else if (
data->unused_slot_4) {
2578 data->unused_slot_1 =
data->unused_slot_4 + 1;
2579 data->unused_slot_2 =
data->unused_slot_4 + 2;
2580 data->unused_slot_4 = 0;
2581 }
else if (
data->handled &&
data->handled[2]) {
2582 ret =
data->handled[2]->stack_spill_pos;
2583 data->handled[2] =
data->handled[2]->list_next;
2584 data->unused_slot_1 =
ret + 1;
2585 }
else if (
data->handled &&
data->handled[4]) {
2586 ret =
data->handled[4]->stack_spill_pos;
2587 data->handled[4] =
data->handled[4]->list_next;
2588 data->unused_slot_1 =
ret + 1;
2589 data->unused_slot_2 =
ret + 2;
2590 }
else if (
data->handled &&
data->handled[8]) {
2591 ret =
data->handled[8]->stack_spill_pos;
2592 data->handled[8] =
data->handled[8]->list_next;
2593 data->unused_slot_1 =
ret + 1;
2594 data->unused_slot_2 =
ret + 2;
2595 data->unused_slot_4 =
ret + 4;
2600 if (
sizeof(
void*) == 8) {
2628 return ir_allocate_small_spill_slot(ctx,
size,
data);
2647 reg = use_pos->
hint;
2648 if (reg >= 0 && IR_REGSET_IN(available, reg)) {
2651 use_pos = use_pos->
next;
2665 reg = use_pos->
hint;
2666 if (reg >= 0 && IR_REGSET_IN(available, reg)) {
2667 if (ival->
end <= freeUntilPos[reg]) {
2672 use_pos = use_pos->
next;
2681 if (reg >= 0 && IR_REGSET_IN(available, reg)) {
2682 if (ival->
end <= freeUntilPos[reg]) {
2688 use_pos = use_pos->
next;
2702 reg = use_pos->
hint;
2703 if (reg >= 0 && IR_REGSET_IN(available, reg)) {
2705 }
else if (use_pos->
hint_ref > 0) {
2707 if (reg >= 0 && IR_REGSET_IN(available, reg)) {
2711 use_pos = use_pos->
next;
2721 if (*unhandled ==
NULL
2722 ||
pos < (*unhandled)->range.start
2723 || (
pos == (*unhandled)->range.start
2726 || (
pos == (*unhandled)->range.start
2727 && ival->
vreg > (*unhandled)->vreg)) {
2733 while (
prev->list_next) {
2735 || (
pos ==
prev->list_next->range.start
2738 || (
pos ==
prev->list_next->range.start
2739 && ival->
vreg >
prev->list_next->vreg)) {
2745 prev->list_next = ival;
2755 if (*unhandled ==
NULL) {
2764 while (*
prev &&
pos >= (*prev)->range.start) {
2765 prev = &(*prev)->list_next;
2789 if (*unhandled ==
NULL
2790 ||
pos <= (*unhandled)->range.start) {
2796 while (
prev->list_next) {
2803 prev->list_next = ival;
2813 ir_regset available, overlapped, scratch;
2819 freeUntilPos[i] = 0x7fffffff;
2826#if defined(IR_TARGET_X86)
2836 freeUntilPos[i] = 0x7fffffff;
2840 available = IR_REGSET_DIFFERENCE(available, (ir_regset)ctx->
fixed_regset);
2853 available = IR_REGSET_EMPTY;
2856 IR_REGSET_EXCL(available, reg);
2866 overlapped = IR_REGSET_EMPTY;
2885 overlapped = IR_REGSET_UNION(overlapped, regset);
2886 IR_REGSET_FOREACH(regset, reg) {
2887 if (
next < freeUntilPos[reg]) {
2888 freeUntilPos[reg] =
next;
2890 } IR_REGSET_FOREACH_END();
2891 }
else if (IR_REGSET_IN(available, reg)) {
2892 IR_REGSET_INCL(overlapped, reg);
2893 if (
next < freeUntilPos[reg]) {
2894 freeUntilPos[reg] =
next;
2902 available = IR_REGSET_DIFFERENCE(available, overlapped);
2903 if (available != IR_REGSET_EMPTY) {
2907 reg = ir_try_allocate_preferred_reg(ctx, ival, available, freeUntilPos);
2911 if (*unhandled && ival->
end > (*unhandled)->range.start) {
2922 if (reg >= 0 && IR_REGSET_IN(available, reg)) {
2925 if (*unhandled && ival->
end > (*unhandled)->range.start) {
2935 if (scratch != IR_REGSET_EMPTY) {
2938 ir_regset non_conflicting = scratch;
2943 reg = ir_get_first_reg_hint(ctx, other, non_conflicting);
2946 IR_REGSET_EXCL(non_conflicting, reg);
2947 if (non_conflicting == IR_REGSET_EMPTY) {
2954 if (non_conflicting != IR_REGSET_EMPTY) {
2955 reg = IR_REGSET_FIRST(non_conflicting);
2957 reg = IR_REGSET_FIRST(scratch);
2960 reg = IR_REGSET_FIRST(scratch);
2963 reg = IR_REGSET_FIRST(available);
2967 if (*unhandled && ival->
end > (*unhandled)->range.start) {
2977 IR_REGSET_FOREACH(overlapped, i) {
2978 if (freeUntilPos[i] >
pos) {
2979 pos = freeUntilPos[i];
2981 }
else if (freeUntilPos[i] ==
pos
2985 pos = freeUntilPos[i];
2988 } IR_REGSET_FOREACH_END();
2996 split_pos = ir_find_optimal_split_position(ctx, ival, split_pos,
pos, 0);
2997 other = ir_split_interval_at(ctx, ival, split_pos);
2999 ir_reg pref_reg = ir_try_allocate_preferred_reg(ctx, ival, IR_REGSET_UNION(available, overlapped), freeUntilPos);
3002 ival->
reg = pref_reg;
3009 IR_LOG_LSRA_ASSIGN(
" ---- Assign", ival,
" (available without spilling for the first part)");
3010 if (*unhandled && ival->
end > (*unhandled)->range.start) {
3014 ir_add_to_unhandled(unhandled, other);
3030 ir_regset available, tmp_regset;
3035 use_pos = use_pos->
next;
3039 IR_LOG_LSRA(
" ---- Spill", ival,
" (no use pos that must be in reg)");
3043 next_use_pos = use_pos->
pos;
3052 nextUsePos[i] = 0x7fffffff;
3053 blockPos[i] = 0x7fffffff;
3060#if defined(IR_TARGET_X86)
3070 nextUsePos[i] = 0x7fffffff;
3071 blockPos[i] = 0x7fffffff;
3075 available = IR_REGSET_DIFFERENCE(available, (ir_regset)ctx->
fixed_regset);
3077 if (IR_REGSET_IS_EMPTY(available)) {
3078 fprintf(stderr,
"LSRA Internal Error: No registers available. Allocation is not possible\n");
3098 IR_REGSET_FOREACH(regset, reg) {
3099 blockPos[reg] = nextUsePos[reg] = 0;
3100 } IR_REGSET_FOREACH_END();
3101 }
else if (IR_REGSET_IN(available, reg)) {
3103 blockPos[reg] = nextUsePos[reg] = 0;
3107 if (
pos < nextUsePos[reg]) {
3108 nextUsePos[reg] =
pos;
3133 IR_REGSET_FOREACH(regset, reg) {
3134 if (overlap < nextUsePos[reg]) {
3135 nextUsePos[reg] = overlap;
3137 if (overlap < blockPos[reg]) {
3138 blockPos[reg] = overlap;
3140 } IR_REGSET_FOREACH_END();
3142 }
else if (IR_REGSET_IN(available, reg)) {
3147 if (overlap < nextUsePos[reg]) {
3148 nextUsePos[reg] = overlap;
3150 if (overlap < blockPos[reg]) {
3151 blockPos[reg] = overlap;
3156 if (
pos < nextUsePos[reg]) {
3157 nextUsePos[reg] =
pos;
3168 reg = ir_get_preferred_reg(ctx, ival, available);
3172 reg = IR_REGSET_FIRST(available);
3176 pos = nextUsePos[reg];
3177 tmp_regset = available;
3178 IR_REGSET_EXCL(tmp_regset, reg);
3179 IR_REGSET_FOREACH(tmp_regset, i) {
3180 if (nextUsePos[i] >
pos) {
3181 pos = nextUsePos[i];
3184 } IR_REGSET_FOREACH_END();
3197 split_pos = next_use_pos + 1;
3199 split_pos = ir_find_optimal_split_position(ctx, ival, ival->
range.
start, next_use_pos - 1, 1);
3203 IR_LOG_LSRA(
" ---- Conflict with others", ival,
" (all others are used before)");
3204 other = ir_split_interval_at(ctx, ival, split_pos);
3206 ir_add_to_unhandled(unhandled, other);
3212 if (ival->
end > blockPos[reg]) {
3214 IR_LOG_LSRA(
" ---- Conflict with others", ival,
" (spilling make a register free only for the first part)");
3216 ir_live_pos split_pos = ir_last_use_pos_before(ival, blockPos[reg] + 1,
3218 if (split_pos == 0) {
3219 split_pos = ir_first_use_pos_after(ival, blockPos[reg],
3221 other = ir_split_interval_at(ctx, ival, split_pos);
3222 ir_add_to_unhandled(unhandled, other);
3226 if (split_pos >= blockPos[reg]) {
3227try_next_available_register:
3228 IR_REGSET_EXCL(available, reg);
3229 if (IR_REGSET_IS_EMPTY(available)) {
3230 fprintf(stderr,
"LSRA Internal Error: Unsolvable conflict. Allocation is not possible\n");
3235 goto select_register;
3237 split_pos = ir_find_optimal_split_position(ctx, ival, split_pos, blockPos[reg], 1);
3238 other = ir_split_interval_at(ctx, ival, split_pos);
3239 ir_add_to_unhandled(unhandled, other);
3249 if (reg == other->
reg) {
3260 if (split_pos == 0) {
3263 split_pos = ir_find_optimal_split_position(ctx, other, split_pos, ival->
range.
start, 1);
3265 child = ir_split_interval_at(ctx, other, split_pos);
3279 split_pos = next_use_pos + 1;
3281 split_pos = ir_find_optimal_split_position(ctx, ival, ival->
range.
start, next_use_pos - 1, 1);
3288 goto try_next_available_register;
3297 IR_LOG_LSRA(
" ---- Spill and Finish", other,
" (it must not be in reg)");
3301 if (split_pos > child->
range.
start && split_pos < child->
end) {
3302 ir_live_pos opt_split_pos = ir_find_optimal_split_position(ctx, child, ival->
range.
start, split_pos, 1);
3304 split_pos = opt_split_pos;
3306 child2 = ir_split_interval_at(ctx, child, split_pos);
3308 ir_add_to_unhandled(unhandled, child2);
3310 }
else if (child != other) {
3312 ir_add_to_unhandled(unhandled, child);
3326 if (reg == other->
reg) {
3335 child = ir_split_interval_at(ctx, other, overlap);
3338 ir_add_to_unhandled(unhandled, child);
3349 if (*unhandled && ival->
end > (*unhandled)->range.start) {
3374 }
else if (from != 0) {
3390 if (!ir_has_tmp(ctx, bb->
end, tmp_reg.
num)) {
3391 ir_add_tmp(ctx, bb->
end, bb->
end, tmp_reg.
num, tmp_reg);
3405 use_pos = use_pos->
next;
3411 ir_block *bb = ir_block_from_live_pos(ctx, use_pos->
pos);
3422 use_pos = use_pos->
next;
3428 ir_block *bb = ir_block_from_live_pos(ctx, use_pos->
pos);
3436 for (;
n > 0;
p++,
n--) {
3438 if (ctx->
ir_base[use].op == IR_VSTORE) {
3442 }
else if (ctx->
ir_base[use].op == IR_VADDR) {
3454static void ir_assign_bound_spill_slots(
ir_ctx *ctx)
3479static int ir_linear_scan(
ir_ctx *ctx)
3499 for (b = 1, bb = &ctx->
cfg_blocks[1]; b <= ctx->cfg_blocks_count; b++, bb++) {
3510 data.unused_slot_4 = 0;
3511 data.unused_slot_2 = 0;
3512 data.unused_slot_1 = 0;
3519 IR_ASSERT(insn->op == IR_VAR || insn->op == IR_ALLOCA);
3522 if (insn->op == IR_VAR) {
3531 for (;
n > 0;
p++,
n--) {
3533 if (insn->op == IR_VADDR) {
3546 insn->op3 = ir_allocate_big_spill_slot(ctx,
val->val.i32, &
data);
3554 || !ir_ival_spill_for_fuse_load(ctx, ival, &
data)) {
3555 ir_add_to_unhandled(&unhandled, ival);
3562 ir_merge_to_unhandled(&unhandled, ival);
3578 if (ctx->
flags & IR_DEBUG_RA) {
3581 fprintf(stderr,
"---- Start LSRA\n");
3600 if (r->
end <= position) {
3603 }
while (r && r->
end <= position);
3608 prev->list_next = other;
3616 if (position < r->
start) {
3638 if (r->
end <= position) {
3641 }
while (r && r->
end <= position);
3646 prev->list_next = other;
3654 if (position >= r->
start) {
3666 other =
prev ?
prev->list_next : inactive;
3669 reg = ir_try_allocate_free_reg(ctx, ival, &
active, inactive, &unhandled);
3671 reg = ir_allocate_blocked_reg(ctx, ival, &
active, &inactive, &unhandled);
3692 ir_assign_bound_spill_slots(ctx);
3707 while (other->
next) {
3708 other = other->
next;
3715 ir_add_to_unhandled_spill(&unhandled, ival);
3725 data.handled = handled;
3737 if (other->
end <= position) {
3746 old = handled[
size];
3755 handled[
size] = other;
3770 old = handled[
size];
3779 handled[
size] = ival;
3798 if (ctx->
flags & IR_DEBUG_RA) {
3799 fprintf(stderr,
"---- Finish LSRA\n");
3847 use_pos = use_pos->
next;
3866static void assign_regs(
ir_ctx *ctx)
3871 int8_t reg, old_reg;
3873 ir_regset used_regs = 0;
3887 IR_REGSET_INCL(used_regs, reg);
3892 use_pos = use_pos->
next;
3908 IR_REGSET_INCL(used_regs, ival->
reg);
3918 use_pos = use_pos->
next;
3929 IR_REGSET_INCL(used_regs, ival->
reg);
3935 if (use_pos->
op_num == 0) {
3936 if ((ctx->
ir_base[ref].op == IR_COPY
3937 || ctx->
ir_base[ref].op == IR_BITCAST
3938 || ctx->
ir_base[ref].op == IR_TRUNC)
3944 use_pos = use_pos->
next;
3948 if (ctx->
ir_base[ref].op == IR_PHI) {
3952 }
else if (ctx->
ir_base[ref].op == IR_PARAM
3957 uint32_t use_b = ctx->
cfg_map[ref];
3969 }
else if ((!prev_use_ref || ctx->
cfg_map[prev_use_ref] != ctx->
cfg_map[ref])
3970 && needs_spill_reload(ctx, ival, ctx->
cfg_map[ref], available)) {
3972 && use_pos->
hint != reg
3975 && ctx->
ir_base[ref].op != IR_SNAPSHOT
3976 && !needs_spill_load(ctx, ival, use_pos)) {
3988 use_pos = use_pos->
next;
3997 uint32_t use_b = ctx->
cfg_map[ref];
4012 if (reg != old_reg) {
4016 use_pos = use_pos->
next;
4030 if (reg != old_reg) {
4034 use_pos = use_pos->
next;
4045 use_pos = use_pos->
next;
4051 if (ctx->
ir_base[ref].op == IR_SNAPSHOT) {
4057 use_pos = use_pos->
next;
4073 IR_REGSET_INCL(used_regs, ival->
reg);
4078 if (ival->
tmp_op_num <= insn->inputs_count) {
4113 if (ir_linear_scan(ctx)) {
fprintf($stream, string $format, mixed ... $values)
prev(array|object &$array)
count(Countable|array $value, int $mode=COUNT_NORMAL)
compact($var_name,... $var_names)
memset(ptr, 0, type->size)
const php_stream_filter_ops * ops
foreach($dp as $el) foreach( $dp as $el) if( $pass2< 2) echo ""
ir_ref ir_binding_find(const ir_ctx *ctx, ir_ref ref)
void ir_array_grow(ir_array *a, uint32_t size)
const uint32_t ir_op_flags[IR_LAST_OP]
const uint8_t ir_type_size[IR_LAST_TYPE]
#define IR_USE_FRAME_POINTER
struct _ir_live_range ir_live_range
#define IR_IS_TYPE_INT(t)
struct _ir_live_interval ir_live_interval
#define IR_IS_TYPE_UNSIGNED(t)
#define IR_REG_SPILL_STORE
void ir_dump_live_ranges(const ir_ctx *ctx, FILE *f)
IR_ALWAYS_INLINE ir_ref ir_insn_op(const ir_insn *insn, int32_t n)
#define IR_IS_CONST_REF(ref)
ir_ref ir_strtab_lookup(ir_strtab *strtab, const char *str, uint32_t len, ir_ref val)
#define IR_REG_SPILL_LOAD
#define IR_REG_SPILL_SPECIAL
struct _ir_strtab ir_strtab
void ir_strtab_init(ir_strtab *strtab, uint32_t count, uint32_t buf_size)
struct _ir_use_list ir_use_list
struct _ir_block ir_block
#define IR_REG_STACK_POINTER
#define IR_REGSET_PRESERVED
struct _ir_tmp_reg ir_tmp_reg
#define IR_REGSET_SCRATCH
#define IR_REG_FRAME_POINTER
void ir_fix_stack_frame(ir_ctx *ctx)
int ir_get_target_constraints(ir_ctx *ctx, ir_ref ref, ir_target_constraints *constraints)
#define IR_16B_FRAME_ALIGNMENT
IR_ALWAYS_INLINE uint32_t ir_list_len(const ir_list *l)
ir_bitset_base_t * ir_bitset
IR_ALWAYS_INLINE ir_ref ir_list_pop(ir_list *l)
IR_ALWAYS_INLINE void ir_list_push_unchecked(ir_list *l, ir_ref val)
IR_ALWAYS_INLINE void ir_worklist_free(ir_worklist *w)
#define IR_OP2_MUST_BE_IN_REG
int(* emit_copy_t)(ir_ctx *ctx, uint8_t type, ir_ref from, ir_ref to)
IR_ALWAYS_INLINE bool ir_bitset_in(const ir_bitset set, uint32_t n)
#define IR_ALIGNED_SIZE(size, alignment)
IR_ALWAYS_INLINE void ir_bitset_copy(ir_bitset set1, const ir_bitset set2, uint32_t len)
IR_ALWAYS_INLINE uint32_t ir_phi_input_number(const ir_ctx *ctx, const ir_block *bb, uint32_t from)
IR_ALWAYS_INLINE void ir_bitqueue_add(ir_bitqueue *q, uint32_t n)
#define IR_LOAD_LIVE_POS_FROM_REF(ref)
#define IR_BB_DESSA_MOVES
#define IR_OP3_MUST_BE_IN_REG
#define IR_LIVE_INTERVAL_FIXED
IR_ALWAYS_INLINE ir_bitset ir_bitset_malloc(uint32_t n)
IR_ALWAYS_INLINE bool ir_bitset_empty(const ir_bitset set, uint32_t len)
#define IR_DEF_CONFLICTS_WITH_INPUT_REGS
#define IR_USE_MUST_BE_IN_REG
IR_ALWAYS_INLINE bool ir_worklist_push(ir_worklist *w, ir_ref val)
#define IR_LIVE_INTERVAL_MEM_LOAD
#define IR_LIVE_INTERVAL_HAS_HINT_REFS
struct _ir_hashtab_bucket ir_hashtab_bucket
IR_ALWAYS_INLINE ir_arena * ir_arena_create(size_t size)
#define IR_LIVE_INTERVAL_MEM_PARAM
#define IR_RA_HAVE_SPILLS
struct _ir_worklist ir_worklist
IR_ALWAYS_INLINE void ir_bitset_union(ir_bitset set1, const ir_bitset set2, uint32_t len)
IR_ALWAYS_INLINE void ir_bitset_incl(ir_bitset set, uint32_t n)
#define IR_LIVE_INTERVAL_HAS_HINT_REGS
#define IR_LR_HAVE_DESSA_MOVES
#define IR_IS_SYM_CONST(op)
IR_ALWAYS_INLINE int8_t ir_get_alocated_reg(const ir_ctx *ctx, ir_ref ref, int op_num)
#define IR_OP1_MUST_BE_IN_REG
IR_ALWAYS_INLINE void ir_worklist_init(ir_worklist *w, uint32_t size)
IR_ALWAYS_INLINE void ir_list_free(ir_list *l)
IR_ALWAYS_INLINE uint32_t ir_bitset_len(uint32_t n)
#define IR_LIVE_INTERVAL_COALESCED
#define IR_RA_HAVE_SPLITS
IR_ALWAYS_INLINE void ir_bitqueue_init(ir_bitqueue *q, uint32_t n)
IR_ALWAYS_INLINE ir_ref ir_list_at(const ir_list *l, uint32_t i)
#define IR_DEF_REUSES_OP1_REG
#define IR_LIVE_INTERVAL_SPILL_SPECIAL
#define IR_SAVE_LIVE_POS_FROM_REF(ref)
IR_ALWAYS_INLINE void ir_list_init(ir_list *l, uint32_t size)
#define IR_OPND_KIND(flags, i)
struct _ir_reg_alloc_data ir_reg_alloc_data
IR_ALWAYS_INLINE int ir_bitqueue_pop(ir_bitqueue *q)
#define IR_OP_FLAG_COMMUTATIVE
#define IR_BB_UNREACHABLE
IR_ALWAYS_INLINE int ir_bitset_pop_first(ir_bitset set, uint32_t len)
IR_ALWAYS_INLINE void ir_list_set(ir_list *l, uint32_t i, ir_ref val)
#define IR_BITSET_FOREACH(set, len, bit)
#define IR_INPUT_EDGES_COUNT(flags)
#define IR_BITSET_FOREACH_END()
IR_ALWAYS_INLINE void ir_bitset_clear(ir_bitset set, uint32_t len)
#define IR_OP_FLAG_PINNED
#define IR_OP_FLAG_CONTROL
#define IR_END_LIVE_POS_FROM_REF(ref)
IR_ALWAYS_INLINE void ir_bitqueue_free(ir_bitqueue *q)
IR_ALWAYS_INLINE void ir_list_push(ir_list *l, ir_ref val)
#define IR_USE_FLAGS(def_flags, op_num)
IR_ALWAYS_INLINE void ir_bitqueue_clear(ir_bitqueue *q)
IR_ALWAYS_INLINE ir_ref ir_worklist_pop(ir_worklist *w)
#define IR_LIVE_INTERVAL_TEMP
#define IR_USE_LIVE_POS_FROM_REF(ref)
IR_ALWAYS_INLINE uint32_t ir_worklist_len(const ir_worklist *w)
struct _ir_bitqueue ir_bitqueue
struct _ir_use_pos ir_use_pos
IR_ALWAYS_INLINE void ir_set_alocated_reg(ir_ctx *ctx, ir_ref ref, int op_num, int8_t reg)
#define IR_LIVE_POS_TO_REF(pos)
struct _ir_target_constraints ir_target_constraints
#define IR_START_LIVE_POS_FROM_REF(ref)
#define IR_DEF_LIVE_POS_FROM_REF(ref)
#define IR_LIVE_INTERVAL_SPLIT_CHILD
#define IR_LIVE_INTERVAL_SPILLED
#define IR_HAS_FP_RET_SLOT
#define IR_BB_LOOP_HEADER
IR_ALWAYS_INLINE uint32_t ir_insn_len(const ir_insn *insn)
IR_ALWAYS_INLINE void ir_bitset_excl(ir_bitset set, uint32_t n)
IR_ALWAYS_INLINE void * ir_arena_alloc(ir_arena **arena_ptr, size_t size)
#define IR_USE_SHOULD_BE_IN_REG
#define IR_OP_HAS_VAR_INPUTS(flags)
IR_ALWAYS_INLINE ir_live_interval * ir_add_prev_live_range(ir_ctx *ctx, int v, ir_live_pos start, ir_live_pos end)
bool ir_reg_is_int(int32_t reg)
int ir_coalesce(ir_ctx *ctx)
#define IS_LIVE_IN_BLOCK(v, b)
#define IR_LOG_LSRA_CONFLICT(action, ival, pos)
int ir_assign_virtual_registers(ir_ctx *ctx)
#define IR_LOG_LSRA(action, ival, comment)
IR_ALWAYS_INLINE void ir_add_use(ir_ctx *ctx, ir_live_interval *ival, int op_num, ir_live_pos pos, ir_reg hint, uint8_t use_flags, ir_ref hint_ref)
#define IR_LOG_LSRA_ASSIGN(action, ival, comment)
int ir_compute_dessa_moves(ir_ctx *ctx)
int32_t ir_allocate_spill_slot(ir_ctx *ctx, ir_type type, ir_reg_alloc_data *data)
int ir_gen_dessa_moves(ir_ctx *ctx, uint32_t b, emit_copy_t emit_copy)
#define IR_LOG_LSRA_SPLIT(ival, pos)
IR_ALWAYS_INLINE void ir_live_out_push(ir_ctx *ctx, uint32_t *live_outs, ir_list *live_lists, uint32_t b, uint32_t v)
#define SET_LIVE_IN_BLOCK(v, b)
int ir_compute_live_ranges(ir_ctx *ctx)
int ir_reg_alloc(ir_ctx *ctx)
struct _ir_coalesce_block ir_coalesce_block
IR_ALWAYS_INLINE uint32_t ir_live_out_top(ir_ctx *ctx, uint32_t *live_outs, ir_list *live_lists, uint32_t b)
unsigned const char * end
unsigned const char * pos
php_output_handler * active
unsigned char key[REFLECTION_KEY_LEN]
uint32_t successors_count
uint32_t predecessors_count
ir_live_interval ** live_intervals
ir_live_range * unused_ranges
int32_t fixed_stack_frame_size
uint32_t cfg_blocks_count
uint64_t fixed_save_regset
uint64_t used_preserved_regs
ir_live_range * current_range
ir_live_interval * list_next
int8_t hints[IR_MAX_REG_ARGS+3]
exit(string|int $status=0)
#define EXPECTED(condition)
#define UNEXPECTED(condition)