14#define IR_GCM_IS_SCHEDULED_EARLY(b) (((int32_t)(b)) < 0)
15#define IR_GCM_EARLY_BLOCK(b) ((uint32_t)-((int32_t)(b)))
18#define IR_SCHEDULE_SWAP_OPS 1
29 IR_ASSERT(insn->op != IR_PARAM && insn->op != IR_VAR);
30 IR_ASSERT(insn->op != IR_PHI && insn->op != IR_PI);
35 n = insn->inputs_count;
36 for (
p = insn->ops + 1;
n > 0;
p++,
n--) {
43 b = ir_gcm_schedule_early(ctx, input, queue_late);
45 if (dom_depth < ctx->cfg_blocks[b].dom_depth) {
58static uint32_t ir_gcm_find_lca(
ir_ctx *ctx, uint32_t b1, uint32_t b2)
77static uint32_t ir_gcm_select_best_block(
ir_ctx *ctx,
ir_ref ref, uint32_t lca)
81 uint32_t
flags, best, b;
88 if (ctx->
ir_base[ref].op >= IR_EQ && ctx->
ir_base[ref].op <= IR_UGT) {
91 if (use_list->
count == 1) {
94 if (insn->op == IR_IF || insn->op == IR_GUARD || insn->op == IR_GUARD_NOT) {
126 while (
n && *
p != b) {
145 }
while (b != ctx->
cfg_map[ref]);
200static bool ir_split_partially_dead_node(
ir_ctx *ctx,
ir_ref ref, uint32_t b)
208 IR_ASSERT(b > 0 && b <= ctx->cfg_blocks_count);
220 if (insn->op == IR_PHI) {
223 ir_ref n = insn->inputs_count - 1;
225 for (;
n > 0;
p++, q++,
n--) {
228 IR_ASSERT(i > 0 && i <= ctx->cfg_blocks_count);
230 if (i == b)
return 0;
240 IR_ASSERT(i > 0 && i <= ctx->cfg_blocks_count);
242 if (i == b)
return 0;
249 if (ctx->
flags & IR_DEBUG_GCM_SPLIT) {
251 fprintf(stderr,
"*** Split partially dead node d_%d scheduled to BB%d\n", ref, b);
254 fprintf(stderr,
"\td_%d is USED in [BB%d", ref, i);
276 if (_check_successors(ctx, bb,
data)) {
283 _push_predecessors(ctx, bb,
data);
291 if (ctx->
flags & IR_DEBUG_GCM_SPLIT) {
295 fprintf(stderr,
"\td_%d is TOTALLY_USEFUL in [BB%d", ref, i);
308 uint32_t
j, clone, clones_count = 0, uses_count = 0;
326 if (insn->op == IR_PHI) {
329 ir_ref n = insn->inputs_count - 1;
336 for (;
n > 0;
p++, q++,
n--) {
344 clone = clones_count++;
346 clones[clone].block =
j;
347 clones[clone].use_count = 0;
348 clones[clone].use = (uint32_t)-1;
350 uses[uses_count].ref = use;
351 uses[uses_count].block = i;
352 uses[uses_count].next = clones[clone].use;
353 clones[clone].use_count++;
354 clones[clone].use = uses_count++;
367 clone = clones_count++;
369 clones[clone].block =
j;
370 clones[clone].use_count = 0;
371 clones[clone].use = -1;
373 uses[uses_count].ref = use;
374 uses[uses_count].block = i;
375 uses[uses_count].next = clones[clone].use;
376 clones[clone].use_count++;
377 clones[clone].use = uses_count++;
382 if (ctx->
flags & IR_DEBUG_GCM_SPLIT) {
383 for (i = 0; i < clones_count; i++) {
384 uint32_t
u = clones[i].use;
386 fprintf(stderr,
"\tCLONE #%d in BB%d USES(%d)=[d_%d/BB%d",
387 i, clones[i].block, clones[i].use_count, uses[
u].ref, uses[
u].block);
389 while (
u != (uint32_t)-1) {
390 fprintf(stderr,
", d_%d/BB%d", uses[
u].ref, uses[
u].block);
401 for (i = 1; i < clones_count; i++) {
402 clones[i].ref = clone =
ir_emit(ctx, insn->optx, insn->op1, insn->op2, insn->op3);
405 if (insn->op1 > 0 && insn->inputs_count >= 1)
ir_use_list_add(ctx, insn->op1, clone);
406 if (insn->op2 > 0 && insn->inputs_count >= 2)
ir_use_list_add(ctx, insn->op2, clone);
407 if (insn->op3 > 0 && insn->inputs_count >= 3)
ir_use_list_add(ctx, insn->op3, clone);
414 for (i = 0; i < clones_count; i++) {
415 clone = clones[i].ref;
416 if (clones[i].use_count == 1
422 clones[i].block = uses[clones[i].use].block;
424 ctx->
cfg_map[clone] = clones[i].block;
428 uint32_t
u = clones[i].use;
429 while (
u != (uint32_t)-1) {
436 ir_ref k, l = insn->inputs_count;
438 if (insn->op == IR_PHI) {
439 for (k = 1; k <= l; k++) {
442 if (
j != clones[i].block) {
447 if (
j != clones[i].block) {
456 for (k = 1; k <= l; k++) {
472 if (ctx->
flags & IR_DEBUG_GCM_SPLIT) {
482static bool ir_gcm_dominates(
ir_ctx *ctx, uint32_t b1, uint32_t b2)
495static void ir_gcm_schedule_late(
ir_ctx *ctx,
ir_ref ref, uint32_t b)
511 ir_gcm_schedule_late(ctx, use, b);
516 }
else if (ctx->
ir_base[use].op == IR_PHI) {
520 ir_ref n = insn->inputs_count - 1;
522 for (;
n > 0;
p++, q++,
n--) {
525 lca = !lca ? b : ir_gcm_find_lca(ctx, lca, b);
530 lca = !lca ? b : ir_gcm_find_lca(ctx, lca, b);
533 IR_ASSERT(lca != 0 &&
"No Common Ancestor");
534 IR_ASSERT(ir_gcm_dominates(ctx, ctx->
cfg_map[ref], lca) &&
"Early placement doesn't dominate the late");
538 && ir_split_partially_dead_node(ctx, ref, lca)) {
543 if (lca != ctx->
cfg_map[ref]) {
544 b = ir_gcm_select_best_block(ctx, ref, lca);
549 if (ctx->
ir_base[ref].op >= IR_ADD_OV && ctx->
ir_base[ref].op <= IR_MUL_OV) {
555 if (ctx->
ir_base[use].op == IR_OVERFLOW) {
570 uint32_t *_blocks, b;
584 if (insn->inputs_count > 1) {
597 if (use_insn->op == IR_PARAM || use_insn->op == IR_VAR) {
607 n = insn->inputs_count;
608 for (
p = insn->ops + 1;
n > 0;
p++,
n--) {
610 if (ref > 0 && _blocks[ref] == 0) {
626 for (bb = ctx->
cfg_blocks + b; b > 0; bb--, b--) {
633 if (insn->inputs_count > 1) {
639 while (ref != bb->
start) {
642 if (insn->inputs_count > 1) {
661 if (use_insn->op == IR_PHI || use_insn->op == IR_PI) {
667 }
else if (use_insn->op == IR_PARAM) {
670 }
else if (use_insn->op == IR_VAR) {
683 k = insn->inputs_count - 1;
684 for (
p = insn->ops + 2; k > 0;
p++, k--) {
686 if (ref > 0 && _blocks[ref] == 0) {
687 ir_gcm_schedule_early(ctx, ref, &queue_late);
693 if (ctx->
flags & IR_DEBUG_GCM) {
694 fprintf(stderr,
"GCM Schedule Early\n");
715 ir_gcm_schedule_late(ctx, ref, b);
729 if (ctx->
flags & IR_DEBUG_GCM) {
730 fprintf(stderr,
"GCM Schedule Late\n");
742 uint32_t n1, n2,
pos;
746 uint32_t hash_size = (uint32_t)(-(int32_t)binding->
mask);
748 memset((
char*)binding->
data - (hash_size *
sizeof(uint32_t)), -1, hash_size *
sizeof(uint32_t));
791 ir_ref i,
j, k,
n, *
p, *q, ref, new_ref, prev_ref, insns_count, consts_count, use_edges_count;
796 uint32_t *_blocks = ctx->
cfg_map;
809 _prev[prev_b_end] = 0;
813 if (b == prev_b && i <= prev_b_end) {
818 }
else if (b > prev_b) {
820 if (i == bb->
start) {
823 prev_b_end = bb->
end;
832 _next[i] = _move_down;
838 if (_prev[bb->
end]) {
856 _move_down = _next[i];
859 k = _next[bb->
start];
864 while (insn->op == IR_PHI || insn->op == IR_PARAM || insn->op == IR_VAR || insn->op == IR_PI) {
878 if (ctx->
flags & IR_DEBUG_SCHEDULE) {
879 fprintf(stderr,
"Before Schedule\n");
880 for (i = 1; i != 0; i = _next[i]) {
881 fprintf(stderr,
"%d -> %d\n", i, _blocks[i]);
896 for (b = 1, bb = ctx->
cfg_blocks + 1; b <= ctx->cfg_blocks_count; b++, bb++) {
902 _xlat[i] = bb->
start = insns_count;
904 if (insn->op == IR_CASE_VAL) {
908 n = insn->inputs_count;
916 while (insn->op == IR_PARAM || insn->op == IR_VAR || insn->op == IR_PI) {
917 _xlat[i] = insns_count;
924 while (insn->op == IR_PHI) {
927 _xlat[i] = insns_count;
930 for (
j =
n,
p = insn->ops + 2;
j > 0;
p++,
j--) {
949 if (!_xlat[use] && (_blocks[use] || use_insn->op == IR_PARAM)) {
950 IR_ASSERT(_blocks[use] == b || use_insn->op == IR_PARAM);
951 if (use_insn->op == IR_PARAM
952 || use_insn->op == IR_VAR
953 || use_insn->op == IR_PI
954 || use_insn->op == IR_PHI) {
955 if (_prev[use] != phis) {
957 _prev[_next[use]] = _prev[use];
958 _next[_prev[use]] = _next[use];
961 _next[use] = _next[phis];
962 _prev[_next[phis]] = use;
966 _xlat[use] = insns_count;
967 if (use_insn->op == IR_PHI) {
971 for (
j =
n, q = use_insn->ops + 2;
j > 0; q++,
j--) {
991 if (
end->op == IR_IF) {
994 if (input > 0 && _blocks[input] == b && !_xlat[input] && _prev[
j] != input) {
1000 _prev[_next[input]] = _prev[input];
1001 _next[_prev[input]] = _next[input];
1003 _prev[input] = _prev[
j];
1005 _next[_prev[
j]] = input;
1010 while (i != bb->
end) {
1014 n = insn->inputs_count;
1015 for (
j =
n,
p = insn->ops + 1;
j > 0;
p++,
j--) {
1017 if (!_xlat[input]) {
1020 if (_blocks[input] == b) {
1023 if (ctx->
flags & IR_DEBUG_SCHEDULE) {
1024 fprintf(stderr,
"Wrong dependency %d:%d -> %d\n", b, input, i);
1028 _prev[_next[input]] = _prev[input];
1029 _next[_prev[input]] = _next[input];
1031 _prev[input] = _prev[i];
1033 _next[_prev[i]] = input;
1045 _xlat[i] = insns_count;
1051 _xlat[i] = bb->
end = insns_count;
1061 if (ctx->
flags & IR_DEBUG_SCHEDULE) {
1062 fprintf(stderr,
"After Schedule\n");
1063 for (i = 1; i != 0; i = _next[i]) {
1064 fprintf(stderr,
"%d -> %d (%d)\n", i, _blocks[i], _xlat[i]);
1074 for (i = 1; i != 0; i = _next[i]) {
1075 if (_xlat[i] != i) {
1096 ir_init(&new_ctx, ctx->
flags, consts_count, insns_count);
1108#if defined(IR_TARGET_AARCH64)
1109 new_ctx.deoptimization_exits = ctx->deoptimization_exits;
1110 new_ctx.get_exit_addr = ctx->get_exit_addr;
1111 new_ctx.get_veneer = ctx->get_veneer;
1112 new_ctx.set_veneer = ctx->set_veneer;
1119 ref = 1 - consts_count;
1121 new_insn = &new_ctx.
ir_base[ref];
1126 if (new_insn->op == IR_FUNC_ADDR) {
1127 if (new_insn->proto) {
1130 new_insn->proto =
ir_strl(&new_ctx, proto,
len);
1132 }
else if (new_insn->op == IR_FUNC) {
1134 if (new_insn->proto) {
1137 new_insn->proto =
ir_strl(&new_ctx, proto,
len);
1139 }
else if (new_insn->op == IR_SYM || new_insn->op == IR_STR) {
1148 new_insn = &new_ctx.
ir_base[new_ref];
1153 new_insn->optx = insn->optx;
1154 new_insn->prev_const = 0;
1155 if (insn->op == IR_FUNC_ADDR) {
1160 new_insn->proto =
ir_strl(&new_ctx, proto,
len);
1162 new_insn->proto = 0;
1164 }
else if (insn->op == IR_FUNC) {
1169 new_insn->proto =
ir_strl(&new_ctx, proto,
len);
1171 new_insn->proto = 0;
1173 }
else if (insn->op == IR_SYM || insn->op == IR_STR) {
1178 _xlat[ref] = new_ref;
1192 use_edges_count = 0;
1193 for (i = 1; i != 0; i = _next[i]) {
1195 new_ctx.
cfg_map[new_ref] = _blocks[i];
1196 _prev[new_ref] = prev_ref;
1205 *edges = _xlat[ref];
1214 *edges = _xlat[ref];
1221 new_list = &lists[new_ref];
1222 new_list->
refs = use_edges_count;
1223 use_edges_count += k;
1224 new_list->
count = k;
1227 new_insn = &new_ctx.
ir_base[new_ref];
1229 new_insn->optx = insn->optx;
1230 n = new_insn->inputs_count;
1233 new_insn->op1 = insn->op1;
1234 new_insn->op2 = insn->op2;
1235 new_insn->op3 = insn->op3;
1238 new_insn->op1 = _xlat[insn->op1];
1239 if (new_insn->op == IR_PARAM || insn->op == IR_VAR) {
1241 }
else if (new_insn->op == IR_PROTO) {
1244 new_insn->op2 =
ir_strl(&new_ctx, proto,
len);
1246 new_insn->op2 = insn->op2;
1248 new_insn->op3 = insn->op3;
1251 new_insn->op1 = _xlat[insn->op1];
1252 new_insn->op2 = _xlat[insn->op2];
1253 new_insn->op3 = insn->op3;
1254#if IR_SCHEDULE_SWAP_OPS
1256 if (new_insn->op1 < new_insn->op2) {
1257 switch (new_insn->op) {
1269 SWAP_REFS(new_insn->op1, new_insn->op2);
1279 SWAP_REFS(new_insn->op1, new_insn->op2);
1287 new_insn->op1 = _xlat[insn->op1];
1288 new_insn->op2 = _xlat[insn->op2];
1289 new_insn->op3 = _xlat[insn->op3];
1292 for (
j =
n,
p = insn->ops + 1, q = new_insn->ops + 1;
j > 0;
p++, q++,
j--) {
1303 insn->op1 = ref = _xlat[ref];
1310 insn->op3 = ref = _xlat[ref];
1319 ir_xlat_binding(ctx, _xlat);
1356 for (b = 1, bb = ctx->
cfg_blocks + b; b <= ctx->cfg_blocks_count; b++, bb++) {
fprintf($stream, string $format, mixed ... $values)
prev(array|object &$array)
count(Countable|array $value, int $mode=COUNT_NORMAL)
memset(ptr, 0, type->size)
hash(string $algo, string $data, bool $binary=false, array $options=[])
const char * ir_get_strl(const ir_ctx *ctx, ir_ref idx, size_t *len)
bool ir_use_list_add(ir_ctx *ctx, ir_ref to, ir_ref ref)
ir_ref ir_binding_find(const ir_ctx *ctx, ir_ref ref)
void ir_free(ir_ctx *ctx)
ir_ref ir_str(ir_ctx *ctx, const char *s)
void ir_truncate(ir_ctx *ctx)
void ir_hashtab_init(ir_hashtab *tab, uint32_t size)
const char * ir_get_str(const ir_ctx *ctx, ir_ref idx)
ir_ref ir_emit(ir_ctx *ctx, uint32_t opt, ir_ref op1, ir_ref op2, ir_ref op3)
ir_ref ir_strl(ir_ctx *ctx, const char *s, size_t len)
const uint32_t ir_op_flags[IR_LAST_OP]
void ir_init(ir_ctx *ctx, uint32_t flags, ir_ref consts_limit, ir_ref insns_limit)
void ir_hashtab_free(ir_hashtab *tab)
ir_ref ir_hashtab_find(const ir_hashtab *tab, uint32_t key)
bool ir_hashtab_add(ir_hashtab *tab, uint32_t key, ir_ref val)
struct _ir_hashtab ir_hashtab
IR_ALWAYS_INLINE ir_ref ir_insn_op(const ir_insn *insn, int32_t n)
IR_ALWAYS_INLINE void ir_insn_set_op(ir_insn *insn, int32_t n, ir_ref val)
bool ir_check(const ir_ctx *ctx)
struct _ir_code_buffer ir_code_buffer
struct _ir_use_list ir_use_list
struct _ir_block ir_block
void ir_build_prev_refs(ir_ctx *ctx)
struct _ir_gcm_split_data ir_gcm_split_data
IR_ALWAYS_INLINE ir_ref ir_count_constant(ir_ref *_xlat, ir_ref ref)
#define IR_GCM_IS_SCHEDULED_EARLY(b)
#define IR_GCM_EARLY_BLOCK(b)
int ir_schedule(ir_ctx *ctx)
#define SWAP_REFS(_ref1, _ref2)
#define IR_SPARSE_SET_FOREACH_END()
IR_ALWAYS_INLINE uint32_t ir_list_len(const ir_list *l)
IR_ALWAYS_INLINE ir_ref ir_list_pop(ir_list *l)
IR_ALWAYS_INLINE void ir_list_push_unchecked(ir_list *l, ir_ref val)
#define IR_SPARSE_SET_FOREACH(set, bit)
IR_ALWAYS_INLINE void ir_sparse_set_add(ir_sparse_set *set, uint32_t n)
struct _ir_hashtab_bucket ir_hashtab_bucket
IR_ALWAYS_INLINE void ir_sparse_set_init(ir_sparse_set *set, uint32_t size)
IR_ALWAYS_INLINE bool ir_sparse_set_in(const ir_sparse_set *set, uint32_t n)
IR_ALWAYS_INLINE void ir_list_free(ir_list *l)
#define IR_BB_LOOP_WITH_ENTRY
IR_ALWAYS_INLINE ir_ref ir_list_at(const ir_list *l, uint32_t i)
IR_ALWAYS_INLINE void ir_list_init(ir_list *l, uint32_t size)
IR_ALWAYS_INLINE void ir_sparse_set_free(ir_sparse_set *set)
#define IR_BB_UNREACHABLE
IR_ALWAYS_INLINE uint32_t ir_insn_inputs_to_len(uint32_t inputs_count)
#define IR_INPUT_EDGES_COUNT(flags)
IR_ALWAYS_INLINE void ir_list_clear(ir_list *l)
IR_ALWAYS_INLINE void ir_list_push(ir_list *l, ir_ref val)
IR_ALWAYS_INLINE void ir_sparse_set_clear(ir_sparse_set *set)
struct _ir_sparse_set ir_sparse_set
#define IR_BB_LOOP_HEADER
IR_ALWAYS_INLINE uint32_t ir_insn_len(const ir_insn *insn)
unsigned const char * end
unsigned const char * pos
unsigned char key[REFLECTION_KEY_LEN]
uint32_t successors_count
uint32_t predecessors_count
int32_t fixed_stack_red_zone
int32_t fixed_stack_frame_size
ir_code_buffer * code_buffer
uint32_t cfg_blocks_count
uint64_t fixed_save_regset
int32_t fixed_call_stack_size
ir_sparse_set totally_useful
#define EXPECTED(condition)
#define UNEXPECTED(condition)