php-internal-docs 8.4.8
Unofficial docs for php/php-src
Loading...
Searching...
No Matches
ir_cfg.c
Go to the documentation of this file.
1/*
2 * IR - Lightweight JIT Compilation Framework
3 * (CFG - Control Flow Graph)
4 * Copyright (C) 2022 Zend by Perforce.
5 * Authors: Dmitry Stogov <dmitry@php.net>
6 */
7
8#include "ir.h"
9#include "ir_private.h"
10
11static int ir_remove_unreachable_blocks(ir_ctx *ctx);
12
14{
15 ir_use_list *use_list = &ctx->use_lists[ref];
16 ir_ref *p, use, n = use_list->count;
17
18 if (n < 2) {
19 if (n == 1) {
20 use = ctx->use_edges[use_list->refs];
22 ir_worklist_push(worklist, use);
23 }
24 } else {
25 p = &ctx->use_edges[use_list->refs];
26 if (n == 2) {
27 use = *p;
29 ir_worklist_push(worklist, use);
30 use = *(p + 1);
32 ir_worklist_push(worklist, use);
33 } else {
34 for (; n > 0; p++, n--) {
35 use = *p;
37 ir_worklist_push(worklist, use);
38 }
39 }
40 }
41}
42
44{
45 ir_ref n, ref;
46 const ir_ref *p;
47
48 if (insn->op == IR_MERGE || insn->op == IR_LOOP_BEGIN) {
49 n = insn->inputs_count;
50 for (p = insn->ops + 1; n > 0; p++, n--) {
51 ref = *p;
52 IR_ASSERT(ref);
53 ir_worklist_push(worklist, ref);
54 }
55 } else if (insn->op != IR_START) {
56 if (EXPECTED(insn->op1)) {
57 ir_worklist_push(worklist, insn->op1);
58 }
59 }
60}
61
63{
64 ctx->cfg_blocks_count = 0;
65 ctx->cfg_edges_count = 0;
66 if (ctx->cfg_blocks) {
68 ctx->cfg_blocks = NULL;
69 if (ctx->cfg_edges) {
71 ctx->cfg_edges = NULL;
72 }
73 if (ctx->cfg_map) {
74 ir_mem_free(ctx->cfg_map);
75 ctx->cfg_map = NULL;
76 }
77 }
78}
79
81{
82 ir_ref n, *p, ref, start, end;
83 uint32_t b;
84 ir_insn *insn;
85 ir_worklist worklist;
86 uint32_t bb_init_falgs;
87 uint32_t count, bb_count = 0;
88 uint32_t edges_count = 0;
89 ir_block *blocks, *bb;
90 uint32_t *_blocks, *edges;
91 ir_use_list *use_list;
92 uint32_t len = ir_bitset_len(ctx->insns_count);
93 ir_bitset bb_starts = ir_mem_calloc(len * 2, IR_BITSET_BITS / 8);
94 ir_bitset bb_leaks = bb_starts + len;
95 _blocks = ir_mem_calloc(ctx->insns_limit, sizeof(uint32_t));
96 ir_worklist_init(&worklist, ctx->insns_count);
97
98 /* First try to perform backward DFS search starting from "stop" nodes */
99
100 /* Add all "stop" nodes */
101 ref = ctx->ir_base[1].op1;
102 while (ref) {
103 ir_worklist_push(&worklist, ref);
104 ref = ctx->ir_base[ref].op3;
105 }
106
107 while (ir_worklist_len(&worklist)) {
108 ref = ir_worklist_pop(&worklist);
109 insn = &ctx->ir_base[ref];
110
111 if (insn->op == IR_NOP) {
112 continue;
113 }
114 IR_ASSERT(IR_IS_BB_END(insn->op));
115 /* Remember BB end */
116 end = ref;
117 /* Some successors of IF and SWITCH nodes may be inaccessible by backward DFS */
118 use_list = &ctx->use_lists[end];
119 n = use_list->count;
120 if (n > 1 || (n == 1 && (ir_op_flags[insn->op] & IR_OP_FLAG_TERMINATOR) != 0)) {
121 for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) {
122 /* Remember possible inaccessible successors */
123 ir_bitset_incl(bb_leaks, *p);
124 }
125 }
126 /* Skip control nodes untill BB start */
127 ref = insn->op1;
128 while (1) {
129 insn = &ctx->ir_base[ref];
130 if (IR_IS_BB_START(insn->op)) {
131 break;
132 }
133 ref = insn->op1; // follow connected control blocks untill BB start
134 }
135 /* Mark BB Start */
136 bb_count++;
137 _blocks[ref] = end;
138 ir_bitset_incl(bb_starts, ref);
139 /* Add predecessors */
140 _ir_add_predecessors(insn, &worklist);
141 }
142
143 /* Backward DFS way miss some branches ending by infinite loops. */
144 /* Try forward DFS. (in most cases all nodes are already proceed). */
145
146 /* START node may be inaccessible from "stop" nodes */
147 ir_bitset_incl(bb_leaks, 1);
148
149 /* Add not processed START and successor of IF and SWITCH */
150 IR_BITSET_FOREACH_DIFFERENCE(bb_leaks, bb_starts, len, start) {
151 ir_worklist_push(&worklist, start);
153
154 if (ir_worklist_len(&worklist)) {
155 ir_bitset_union(worklist.visited, bb_starts, len);
156 do {
157 ref = ir_worklist_pop(&worklist);
158 insn = &ctx->ir_base[ref];
159
160 if (insn->op == IR_NOP) {
161 continue;
162 }
163 IR_ASSERT(IR_IS_BB_START(insn->op));
164 /* Remember BB start */
165 start = ref;
166 /* Skip control nodes untill BB end */
167 while (1) {
168 ref = ir_next_control(ctx, ref);
169 insn = &ctx->ir_base[ref];
170 if (IR_IS_BB_END(insn->op)) {
171 break;
172 }
173 }
174 /* Mark BB Start */
175 bb_count++;
176 _blocks[start] = ref;
177 ir_bitset_incl(bb_starts, start);
178 /* Add successors */
179 _ir_add_successors(ctx, ref, &worklist);
180 } while (ir_worklist_len(&worklist));
181 }
182
183 IR_ASSERT(bb_count > 0);
184
185 /* Create array of basic blocks and count successor/predecessors edges for each BB */
186 blocks = ir_mem_malloc((bb_count + 1) * sizeof(ir_block));
187 b = 1;
188 bb = blocks + 1;
189 count = 0;
190 /* SCCP already removed UNREACHABKE blocks, otherwise all blocks are marked as UNREACHABLE first */
191 bb_init_falgs = (ctx->flags2 & IR_CFG_REACHABLE) ? 0 : IR_BB_UNREACHABLE;
192 IR_BITSET_FOREACH(bb_starts, len, start) {
193 insn = &ctx->ir_base[start];
194 if (insn->op == IR_NOP) {
195 _blocks[start] = 0;
196 continue;
197 }
198 end = _blocks[start];
199 _blocks[start] = b;
200 _blocks[end] = b;
201 IR_ASSERT(IR_IS_BB_START(insn->op));
203 bb->start = start;
204 bb->end = end;
205 bb->successors = count;
206 count += ctx->use_lists[end].count;
207 bb->successors_count = 0;
208 bb->predecessors = count;
209 bb->dom_parent = 0;
210 bb->dom_depth = 0;
211 bb->dom_child = 0;
212 bb->dom_next_child = 0;
213 bb->loop_header = 0;
214 bb->loop_depth = 0;
215 if (insn->op == IR_START) {
216 bb->flags = IR_BB_START;
217 bb->predecessors_count = 0;
218 } else {
219 bb->flags = bb_init_falgs;
220 if (insn->op == IR_MERGE || insn->op == IR_LOOP_BEGIN) {
221 n = insn->inputs_count;
222 bb->predecessors_count = n;
223 edges_count += n;
224 count += n;
225 } else if (EXPECTED(insn->op1)) {
226 if (insn->op == IR_ENTRY) {
227 bb->flags |= IR_BB_ENTRY;
228 ctx->entries_count++;
229 }
230 bb->predecessors_count = 1;
231 edges_count++;
232 count++;
233 } else {
234 IR_ASSERT(insn->op == IR_BEGIN); /* start of unreachable block */
235 bb->predecessors_count = 0;
236 }
237 }
238 b++;
239 bb++;
241 bb_count = b - 1;
242 IR_ASSERT(count == edges_count * 2);
243 ir_mem_free(bb_starts);
244
245 /* Create an array of successor/predecessors control edges */
246 edges = ir_mem_malloc(edges_count * 2 * sizeof(uint32_t));
247 bb = blocks + 1;
248 for (b = 1; b <= bb_count; b++, bb++) {
249 insn = &ctx->ir_base[bb->start];
250 if (bb->predecessors_count > 1) {
251 uint32_t *q = edges + bb->predecessors;
252 n = insn->inputs_count;
253 for (p = insn->ops + 1; n > 0; p++, q++, n--) {
254 ref = *p;
255 IR_ASSERT(ref);
256 ir_ref pred_b = _blocks[ref];
257 ir_block *pred_bb = &blocks[pred_b];
258 IR_ASSERT(pred_b > 0);
259 *q = pred_b;
260 edges[pred_bb->successors + pred_bb->successors_count++] = b;
261 }
262 } else if (bb->predecessors_count == 1) {
263 ref = insn->op1;
264 IR_ASSERT(ref);
266 ir_ref pred_b = _blocks[ref];
267 ir_block *pred_bb = &blocks[pred_b];
268 edges[bb->predecessors] = pred_b;
269 edges[pred_bb->successors + pred_bb->successors_count++] = b;
270 }
271 }
272
273 ctx->cfg_blocks_count = bb_count;
274 ctx->cfg_edges_count = edges_count * 2;
275 ctx->cfg_blocks = blocks;
276 ctx->cfg_edges = edges;
277 ctx->cfg_map = _blocks;
278
279 if (!(ctx->flags2 & IR_CFG_REACHABLE)) {
280 uint32_t reachable_count = 0;
281
282 /* Mark reachable blocks */
283 ir_worklist_clear(&worklist);
284 ir_worklist_push(&worklist, 1);
285 while (ir_worklist_len(&worklist) != 0) {
286 uint32_t *p;
287
288 reachable_count++;
289 b = ir_worklist_pop(&worklist);
290 bb = &blocks[b];
292 n = bb->successors_count;
293 if (n > 1) {
294 for (p = edges + bb->successors; n > 0; p++, n--) {
295 ir_worklist_push(&worklist, *p);
296 }
297 } else if (n == 1) {
298 ir_worklist_push(&worklist, edges[bb->successors]);
299 }
300 }
301 if (reachable_count != ctx->cfg_blocks_count) {
302 ir_remove_unreachable_blocks(ctx);
303 }
304 }
305
306 ir_worklist_free(&worklist);
307
308 return 1;
309}
310
311static void ir_remove_predecessor(ir_ctx *ctx, ir_block *bb, uint32_t from)
312{
313 uint32_t i, *p, *q, n = 0;
314
315 p = q = &ctx->cfg_edges[bb->predecessors];
316 for (i = 0; i < bb->predecessors_count; i++, p++) {
317 if (*p != from) {
318 if (p != q) {
319 *q = *p;
320 }
321 q++;
322 n++;
323 }
324 }
326 bb->predecessors_count = n;
327}
328
329static void ir_remove_merge_input(ir_ctx *ctx, ir_ref merge, ir_ref from)
330{
331 ir_ref i, j, n, k, *p, *q, use;
332 ir_insn *use_insn;
333 ir_use_list *use_list;
334 ir_bitset life_inputs;
335 ir_insn *insn = &ctx->ir_base[merge];
336
337 IR_ASSERT(insn->op == IR_MERGE || insn->op == IR_LOOP_BEGIN);
338 n = insn->inputs_count;
339 i = 1;
340 life_inputs = ir_bitset_malloc(n + 1);
341 for (j = 1; j <= n; j++) {
342 ir_ref input = ir_insn_op(insn, j);
343
344 if (input != from) {
345 if (i != j) {
346 ir_insn_set_op(insn, i, input);
347 }
348 ir_bitset_incl(life_inputs, j);
349 i++;
350 }
351 }
352 i--;
353 for (j = i + 1; j <= n; j++) {
354 ir_insn_set_op(insn, j, IR_UNUSED);
355 }
356 if (i == 1) {
357 insn->op = IR_BEGIN;
358 insn->inputs_count = 1;
359 use_list = &ctx->use_lists[merge];
360 if (use_list->count > 1) {
361 n++;
362 for (k = use_list->count, p = q = &ctx->use_edges[use_list->refs]; k > 0; p++, k--) {
363 use = *p;
364 use_insn = &ctx->ir_base[use];
365 if (use_insn->op == IR_PHI) {
366 /* Convert PHI to COPY */
367 i = 2;
368 for (j = 2; j <= n; j++) {
369 ir_ref input = ir_insn_op(use_insn, j);
370
371 if (ir_bitset_in(life_inputs, j - 1)) {
372 use_insn->op1 = ir_insn_op(use_insn, j);
373 } else if (input > 0) {
374 ir_use_list_remove_one(ctx, input, use);
375 }
376 }
377 use_insn->op = IR_COPY;
378 use_insn->inputs_count = 1;
379 for (j = 2; j <= n; j++) {
380 ir_insn_set_op(use_insn, j, IR_UNUSED);
381 }
382 continue;
383 }
384
385 /*compact use list */
386 if (p != q){
387 *q = use;
388 }
389 q++;
390 }
391
392 if (p != q) {
393 use_list->count -= (p - q);
394 do {
395 *q = IR_UNUSED; /* clenu-op the removed tail */
396 q++;
397 } while (p != q);
398 }
399 }
400 } else {
401 insn->inputs_count = i;
402
403 use_list = &ctx->use_lists[merge];
404 if (use_list->count > 1) {
405 n++;
406 for (k = use_list->count, p = &ctx->use_edges[use_list->refs]; k > 0; p++, k--) {
407 use = *p;
408 use_insn = &ctx->ir_base[use];
409 if (use_insn->op == IR_PHI) {
410 i = 2;
411 for (j = 2; j <= n; j++) {
412 ir_ref input = ir_insn_op(use_insn, j);
413
414 if (ir_bitset_in(life_inputs, j - 1)) {
415 IR_ASSERT(input);
416 if (i != j) {
417 ir_insn_set_op(use_insn, i, input);
418 }
419 i++;
420 } else if (input > 0) {
421 ir_use_list_remove_one(ctx, input, use);
422 }
423 }
424 use_insn->inputs_count = i - 1;
425 for (j = i; j <= n; j++) {
426 ir_insn_set_op(use_insn, j, IR_UNUSED);
427 }
428 }
429 }
430 }
431 }
432 ir_mem_free(life_inputs);
433 ir_use_list_remove_all(ctx, from, merge);
434}
435
436/* CFG constructed after SCCP pass doesn't have unreachable BBs, otherwise they should be removed */
437static int ir_remove_unreachable_blocks(ir_ctx *ctx)
438{
439 uint32_t b, *p, i;
440 uint32_t unreachable_count = 0;
441 uint32_t bb_count = ctx->cfg_blocks_count;
442 ir_block *bb = ctx->cfg_blocks + 1;
443
444 for (b = 1; b <= bb_count; b++, bb++) {
445 if (bb->flags & IR_BB_UNREACHABLE) {
446#if 0
447 do {if (!unreachable_count) ir_dump_cfg(ctx, stderr);} while(0);
448#endif
449 if (bb->successors_count) {
450 for (i = 0, p = &ctx->cfg_edges[bb->successors]; i < bb->successors_count; i++, p++) {
451 ir_block *succ_bb = &ctx->cfg_blocks[*p];
452
453 if (!(succ_bb->flags & IR_BB_UNREACHABLE)) {
454 ir_remove_predecessor(ctx, succ_bb, b);
455 ir_remove_merge_input(ctx, succ_bb->start, bb->end);
456 }
457 }
458 } else {
459 ir_ref prev, ref = bb->end;
460 ir_insn *insn = &ctx->ir_base[ref];
461
463 /* remove from terminators list */
464 prev = ctx->ir_base[1].op1;
465 if (prev == ref) {
466 ctx->ir_base[1].op1 = insn->op3;
467 } else {
468 while (prev) {
469 if (ctx->ir_base[prev].op3 == ref) {
470 ctx->ir_base[prev].op3 = insn->op3;
471 break;
472 }
473 prev = ctx->ir_base[prev].op3;
474 }
475 }
476 }
477 ctx->cfg_map[bb->start] = 0;
478 ctx->cfg_map[bb->end] = 0;
479 unreachable_count++;
480 }
481 }
482
483 if (unreachable_count) {
484 ir_block *dst_bb;
485 uint32_t n = 1;
486 uint32_t *edges;
487
488 dst_bb = bb = ctx->cfg_blocks + 1;
489 for (b = 1; b <= bb_count; b++, bb++) {
490 if (!(bb->flags & IR_BB_UNREACHABLE)) {
491 if (dst_bb != bb) {
492 memcpy(dst_bb, bb, sizeof(ir_block));
493 ctx->cfg_map[dst_bb->start] = n;
494 ctx->cfg_map[dst_bb->end] = n;
495 }
496 dst_bb->successors_count = 0;
497 dst_bb++;
498 n++;
499 }
500 }
501 ctx->cfg_blocks_count = bb_count = n - 1;
502
503 /* Rebuild successor/predecessors control edges */
504 edges = ctx->cfg_edges;
505 bb = ctx->cfg_blocks + 1;
506 for (b = 1; b <= bb_count; b++, bb++) {
507 ir_insn *insn = &ctx->ir_base[bb->start];
508 ir_ref *p, ref;
509
510 n = bb->predecessors_count;
511 if (n > 1) {
512 uint32_t *q = edges + bb->predecessors;
513
514 IR_ASSERT(n == insn->inputs_count);
515 for (p = insn->ops + 1; n > 0; p++, q++, n--) {
516 ref = *p;
517 IR_ASSERT(ref);
518 ir_ref pred_b = ctx->cfg_map[ref];
519 ir_block *pred_bb = &ctx->cfg_blocks[pred_b];
520 *q = pred_b;
521 edges[pred_bb->successors + pred_bb->successors_count++] = b;
522 }
523 } else if (n == 1) {
524 ref = insn->op1;
525 IR_ASSERT(ref);
527 ir_ref pred_b = ctx->cfg_map[ref];
528 ir_block *pred_bb = &ctx->cfg_blocks[pred_b];
529 edges[bb->predecessors] = pred_b;
530 edges[pred_bb->successors + pred_bb->successors_count++] = b;
531 }
532 }
533 }
534
535 return 1;
536}
537
538#if 0
539static void compute_postnum(const ir_ctx *ctx, uint32_t *cur, uint32_t b)
540{
541 uint32_t i, *p;
542 ir_block *bb = &ctx->cfg_blocks[b];
543
544 if (bb->postnum != 0) {
545 return;
546 }
547
548 if (bb->successors_count) {
549 bb->postnum = -1; /* Marker for "currently visiting" */
550 p = ctx->cfg_edges + bb->successors;
551 i = bb->successors_count;
552 do {
553 compute_postnum(ctx, cur, *p);
554 p++;
555 } while (--i);
556 }
557 bb->postnum = (*cur)++;
558}
559
560/* Computes dominator tree using algorithm from "A Simple, Fast Dominance Algorithm" by
561 * Cooper, Harvey and Kennedy. */
563{
564 uint32_t blocks_count, b, postnum;
565 ir_block *blocks, *bb;
566 uint32_t *edges;
567 bool changed;
568
569 ctx->flags2 &= ~IR_NO_LOOPS;
570
571 postnum = 1;
572 compute_postnum(ctx, &postnum, 1);
573
574 /* Find immediate dominators */
575 blocks = ctx->cfg_blocks;
576 edges = ctx->cfg_edges;
577 blocks_count = ctx->cfg_blocks_count;
578 blocks[1].idom = 1;
579 do {
580 changed = 0;
581 /* Iterating in Reverse Post Order */
582 for (b = 2, bb = &blocks[2]; b <= blocks_count; b++, bb++) {
584 if (bb->predecessors_count == 1) {
585 uint32_t pred_b = edges[bb->predecessors];
586
587 if (blocks[pred_b].idom <= 0) {
588 //IR_ASSERT("Wrong blocks order: BB is before its single predecessor");
589 } else if (bb->idom != pred_b) {
590 bb->idom = pred_b;
591 changed = 1;
592 }
593 } else if (bb->predecessors_count) {
594 uint32_t idom = 0;
595 uint32_t k = bb->predecessors_count;
596 uint32_t *p = edges + bb->predecessors;
597
598 do {
599 uint32_t pred_b = *p;
600 ir_block *pred_bb = &blocks[pred_b];
601 ir_block *idom_bb;
602
603 if (pred_bb->idom > 0) {
604 idom = pred_b;
605 idom_bb = &blocks[idom];
606
607 while (--k > 0) {
608 pred_b = *(++p);
609 pred_bb = &blocks[pred_b];
610 if (pred_bb->idom > 0) {
611 while (idom != pred_b) {
612 while (pred_bb->postnum < idom_bb->postnum) {
613 pred_b = pred_bb->idom;
614 pred_bb = &blocks[pred_b];
615 }
616 while (idom_bb->postnum < pred_bb->postnum) {
617 idom = idom_bb->idom;
618 idom_bb = &blocks[idom];
619 }
620 }
621 }
622 }
623
624 if (bb->idom != idom) {
625 bb->idom = idom;
626 changed = 1;
627 }
628 break;
629 }
630 p++;
631 } while (--k > 0);
632 }
633 }
634 } while (changed);
635 blocks[1].idom = 0;
636 blocks[1].dom_depth = 0;
637
638 /* Construct dominators tree */
639 for (b = 2, bb = &blocks[2]; b <= blocks_count; b++, bb++) {
641 if (bb->idom > 0) {
642 ir_block *idom_bb = &blocks[bb->idom];
643
644 bb->dom_depth = idom_bb->dom_depth + 1;
645 /* Sort by block number to traverse children in pre-order */
646 if (idom_bb->dom_child == 0) {
647 idom_bb->dom_child = b;
648 } else if (b < idom_bb->dom_child) {
649 bb->dom_next_child = idom_bb->dom_child;
650 idom_bb->dom_child = b;
651 } else {
652 int child = idom_bb->dom_child;
653 ir_block *child_bb = &blocks[child];
654
655 while (child_bb->dom_next_child > 0 && b > child_bb->dom_next_child) {
656 child = child_bb->dom_next_child;
657 child_bb = &blocks[child];
658 }
659 bb->dom_next_child = child_bb->dom_next_child;
660 child_bb->dom_next_child = b;
661 }
662 }
663 }
664
665 return 1;
666}
667#else
668/* A single pass modification of "A Simple, Fast Dominance Algorithm" by
669 * Cooper, Harvey and Kennedy, that relays on IR block ordering.
670 * It may fallback to the general slow fixed-point algorithm. */
671static int ir_build_dominators_tree_iterative(ir_ctx *ctx);
673{
674 uint32_t blocks_count, b;
675 ir_block *blocks, *bb;
676 uint32_t *edges;
677 ir_list worklist;
678
679 ir_list_init(&worklist, ctx->cfg_blocks_count / 2);
680
681 ctx->flags2 |= IR_NO_LOOPS;
682
683 /* Find immediate dominators */
684 blocks = ctx->cfg_blocks;
685 edges = ctx->cfg_edges;
686 blocks_count = ctx->cfg_blocks_count;
687 blocks[1].idom = 1;
688 blocks[1].dom_depth = 0;
689
690 /* Iterating in Reverse Post Order */
691 for (b = 2, bb = &blocks[2]; b <= blocks_count; b++, bb++) {
694 uint32_t k = bb->predecessors_count;
695 uint32_t *p = edges + bb->predecessors;
696 uint32_t idom = *p;
697 ir_block *idom_bb;
698
699 if (UNEXPECTED(idom >= b)) {
700 /* In rare cases, LOOP_BEGIN.op1 may be a back-edge. Skip back-edges. */
701 ctx->flags2 &= ~IR_NO_LOOPS;
702 IR_ASSERT(k > 1 && "Wrong blocks order: BB is before its single predecessor");
703 ir_list_push(&worklist, idom);
704 while (1) {
705 k--;
706 p++;
707 idom = *p;
708 if (idom < b) {
709 break;
710 }
711 IR_ASSERT(k > 0);
712 ir_list_push(&worklist, idom);
713 }
714 }
715 IR_ASSERT(blocks[idom].idom > 0);
716 IR_ASSERT(k != 0);
717
718 while (--k > 0) {
719 uint32_t pred_b = *(++p);
720
721 if (pred_b < b) {
722 IR_ASSERT(blocks[pred_b].idom > 0);
723 while (idom != pred_b) {
724 while (pred_b > idom) {
725 pred_b = blocks[pred_b].idom;
726 }
727 while (idom > pred_b) {
728 idom = blocks[idom].idom;
729 }
730 }
731 } else {
732 ctx->flags2 &= ~IR_NO_LOOPS;
734 ir_list_push(&worklist, pred_b);
735 }
736 }
737 bb->idom = idom;
738 idom_bb = &blocks[idom];
739
740 bb->dom_depth = idom_bb->dom_depth + 1;
741 /* Sort by block number to traverse children in pre-order */
742 if (idom_bb->dom_child == 0) {
743 idom_bb->dom_child = b;
744 } else if (b < idom_bb->dom_child) {
745 bb->dom_next_child = idom_bb->dom_child;
746 idom_bb->dom_child = b;
747 } else {
748 int child = idom_bb->dom_child;
749 ir_block *child_bb = &blocks[child];
750
751 while (child_bb->dom_next_child > 0 && b > child_bb->dom_next_child) {
752 child = child_bb->dom_next_child;
753 child_bb = &blocks[child];
754 }
755 bb->dom_next_child = child_bb->dom_next_child;
756 child_bb->dom_next_child = b;
757 }
758 }
759
760 blocks[1].idom = 0;
761
762 if (ir_list_len(&worklist) != 0) {
763 uint32_t dom_depth;
764 uint32_t succ_b;
765 bool complete = 1;
766
767 /* Check if all the back-edges lead to the loop headers */
768 do {
769 b = ir_list_pop(&worklist);
770 bb = &blocks[b];
771 succ_b = ctx->cfg_edges[bb->successors];
772 if (bb->successors_count != 1) {
773 /* LOOP_END/END may be linked with the following ENTRY by a fake edge */
774 IR_ASSERT(bb->successors_count == 2);
775 if (blocks[succ_b].flags & IR_BB_ENTRY) {
776 succ_b = ctx->cfg_edges[bb->successors + 1];
777 } else {
778 IR_ASSERT(blocks[ctx->cfg_edges[bb->successors + 1]].flags & IR_BB_ENTRY);
779 }
780 }
781 dom_depth = blocks[succ_b].dom_depth;;
782 while (bb->dom_depth > dom_depth) {
783 b = bb->dom_parent;
784 bb = &blocks[b];
785 }
786 if (UNEXPECTED(b != succ_b)) {
787 complete = 0;
788 break;
789 }
790 } while (ir_list_len(&worklist) != 0);
791
792 if (UNEXPECTED(!complete)) {
793 ir_list_free(&worklist);
794 return ir_build_dominators_tree_iterative(ctx);
795 }
796 }
797
798 ir_list_free(&worklist);
799
800 return 1;
801}
802
803static int ir_build_dominators_tree_iterative(ir_ctx *ctx)
804{
805 bool changed;
806 uint32_t blocks_count, b;
807 ir_block *blocks, *bb;
808 uint32_t *edges;
809
810 /* Find immediate dominators */
811 blocks = ctx->cfg_blocks;
812 edges = ctx->cfg_edges;
813 blocks_count = ctx->cfg_blocks_count;
814
815 /* Clear the dominators tree, but keep already found dominators */
816 for (b = 0, bb = &blocks[0]; b <= blocks_count; b++, bb++) {
817 bb->dom_depth = 0;
818 bb->dom_child = 0;
819 bb->dom_next_child = 0;
820 }
821
822 /* Find immediate dominators by iterative fixed-point algorithm */
823 blocks[1].idom = 1;
824 do {
825 changed = 0;
826
827 for (b = 2, bb = &blocks[2]; b <= blocks_count; b++, bb++) {
830 uint32_t k = bb->predecessors_count;
831 uint32_t *p = edges + bb->predecessors;
832 uint32_t idom = *p;
833
834 if (blocks[idom].idom == 0) {
835 while (1) {
836 k--;
837 p++;
838 idom = *p;
839 if (blocks[idom].idom > 0) {
840 break;
841 }
842 IR_ASSERT(k > 0);
843 }
844 }
845 IR_ASSERT(k != 0);
846 while (--k > 0) {
847 uint32_t pred_b = *(++p);
848
849 if (blocks[pred_b].idom > 0) {
850 IR_ASSERT(blocks[pred_b].idom > 0);
851 while (idom != pred_b) {
852 while (pred_b > idom) {
853 pred_b = blocks[pred_b].idom;
854 }
855 while (idom > pred_b) {
856 idom = blocks[idom].idom;
857 }
858 }
859 }
860 }
861 if (bb->idom != idom) {
862 bb->idom = idom;
863 changed = 1;
864 }
865 }
866 } while (changed);
867
868 /* Build dominators tree */
869 blocks[1].idom = 0;
870 blocks[1].dom_depth = 0;
871 for (b = 2, bb = &blocks[2]; b <= blocks_count; b++, bb++) {
872 uint32_t idom = bb->idom;
873 ir_block *idom_bb = &blocks[idom];
874
875 bb->dom_depth = idom_bb->dom_depth + 1;
876 /* Sort by block number to traverse children in pre-order */
877 if (idom_bb->dom_child == 0) {
878 idom_bb->dom_child = b;
879 } else if (b < idom_bb->dom_child) {
880 bb->dom_next_child = idom_bb->dom_child;
881 idom_bb->dom_child = b;
882 } else {
883 int child = idom_bb->dom_child;
884 ir_block *child_bb = &blocks[child];
885
886 while (child_bb->dom_next_child > 0 && b > child_bb->dom_next_child) {
887 child = child_bb->dom_next_child;
888 child_bb = &blocks[child];
889 }
890 bb->dom_next_child = child_bb->dom_next_child;
891 child_bb->dom_next_child = b;
892 }
893 }
894
895 return 1;
896}
897#endif
898
899static bool ir_dominates(const ir_block *blocks, uint32_t b1, uint32_t b2)
900{
901 uint32_t b1_depth = blocks[b1].dom_depth;
902 const ir_block *bb2 = &blocks[b2];
903
904 while (bb2->dom_depth > b1_depth) {
905 b2 = bb2->dom_parent;
906 bb2 = &blocks[b2];
907 }
908 return b1 == b2;
909}
910
912{
913 uint32_t i, j, n, count;
914 uint32_t *entry_times, *exit_times, *sorted_blocks, time = 1;
915 ir_block *blocks = ctx->cfg_blocks;
916 uint32_t *edges = ctx->cfg_edges;
917 ir_worklist work;
918
919 if (ctx->flags2 & IR_NO_LOOPS) {
920 return 1;
921 }
922
923 /* We don't materialize the DJ spanning tree explicitly, as we are only interested in ancestor
924 * queries. These are implemented by checking entry/exit times of the DFS search. */
925 ir_worklist_init(&work, ctx->cfg_blocks_count + 1);
926 entry_times = ir_mem_malloc((ctx->cfg_blocks_count + 1) * 3 * sizeof(uint32_t));
927 exit_times = entry_times + ctx->cfg_blocks_count + 1;
928 sorted_blocks = exit_times + ctx->cfg_blocks_count + 1;
929
930 memset(entry_times, 0, (ctx->cfg_blocks_count + 1) * sizeof(uint32_t));
931
932 ir_worklist_push(&work, 1);
933 while (ir_worklist_len(&work)) {
934 ir_block *bb;
935 int child;
936
937next:
938 i = ir_worklist_peek(&work);
939 if (!entry_times[i]) {
940 entry_times[i] = time++;
941 }
942
943 /* Visit blocks immediately dominated by i. */
944 bb = &blocks[i];
945 for (child = bb->dom_child; child > 0; child = blocks[child].dom_next_child) {
946 if (ir_worklist_push(&work, child)) {
947 goto next;
948 }
949 }
950
951 /* Visit join edges. */
952 if (bb->successors_count) {
953 uint32_t *p = edges + bb->successors;
954 for (j = 0; j < bb->successors_count; j++,p++) {
955 uint32_t succ = *p;
956
957 if (blocks[succ].idom == i) {
958 continue;
959 } else if (ir_worklist_push(&work, succ)) {
960 goto next;
961 }
962 }
963 }
964 exit_times[i] = time++;
965 ir_worklist_pop(&work);
966 }
967
968 /* Sort blocks by level, which is the opposite order in which we want to process them */
969 sorted_blocks[1] = 1;
970 j = 1;
971 n = 2;
972 while (j != n) {
973 i = j;
974 j = n;
975 for (; i < j; i++) {
976 int child;
977 for (child = blocks[sorted_blocks[i]].dom_child; child > 0; child = blocks[child].dom_next_child) {
978 sorted_blocks[n++] = child;
979 }
980 }
981 }
982 count = n;
983
984 /* Identify loops. See Sreedhar et al, "Identifying Loops Using DJ Graphs". */
985 while (n > 1) {
986 i = sorted_blocks[--n];
987 ir_block *bb = &blocks[i];
988
989 if (bb->predecessors_count > 1) {
990 bool irreducible = 0;
991 uint32_t *p = &edges[bb->predecessors];
992
993 j = bb->predecessors_count;
994 do {
995 uint32_t pred = *p;
996
997 /* A join edge is one for which the predecessor does not
998 immediately dominate the successor. */
999 if (bb->idom != pred) {
1000 /* In a loop back-edge (back-join edge), the successor dominates
1001 the predecessor. */
1002 if (ir_dominates(blocks, i, pred)) {
1003 if (!ir_worklist_len(&work)) {
1005 }
1006 blocks[pred].loop_header = 0; /* support for merged loops */
1007 ir_worklist_push(&work, pred);
1008 } else {
1009 /* Otherwise it's a cross-join edge. See if it's a branch
1010 to an ancestor on the DJ spanning tree. */
1011 if (entry_times[pred] > entry_times[i] && exit_times[pred] < exit_times[i]) {
1012 irreducible = 1;
1013 }
1014 }
1015 }
1016 p++;
1017 } while (--j);
1018
1019 if (UNEXPECTED(irreducible)) {
1020 // TODO: Support for irreducible loops ???
1022 ctx->flags2 |= IR_IRREDUCIBLE_CFG;
1023 while (ir_worklist_len(&work)) {
1024 ir_worklist_pop(&work);
1025 }
1026 } else if (ir_worklist_len(&work)) {
1027 bb->flags |= IR_BB_LOOP_HEADER;
1028 ctx->flags2 |= IR_CFG_HAS_LOOPS;
1029 bb->loop_depth = 1;
1030 while (ir_worklist_len(&work)) {
1031 j = ir_worklist_pop(&work);
1032 while (blocks[j].loop_header > 0) {
1033 j = blocks[j].loop_header;
1034 }
1035 if (j != i) {
1036 ir_block *bb = &blocks[j];
1037 if (bb->idom == 0 && j != 1) {
1038 /* Ignore blocks that are unreachable or only abnormally reachable. */
1039 continue;
1040 }
1041 bb->loop_header = i;
1042 if (bb->predecessors_count) {
1043 uint32_t *p = &edges[bb->predecessors];
1044 j = bb->predecessors_count;
1045 do {
1046 ir_worklist_push(&work, *p);
1047 p++;
1048 } while (--j);
1049 }
1050 }
1051 }
1052 }
1053 }
1054 }
1055
1056 if (ctx->flags2 & IR_CFG_HAS_LOOPS) {
1057 for (n = 1; n < count; n++) {
1058 i = sorted_blocks[n];
1059 ir_block *bb = &blocks[i];
1060 if (bb->loop_header > 0) {
1061 ir_block *loop = &blocks[bb->loop_header];
1062 uint32_t loop_depth = loop->loop_depth;
1063
1064 if (bb->flags & IR_BB_LOOP_HEADER) {
1065 loop_depth++;
1066 }
1067 bb->loop_depth = loop_depth;
1070 if (loop_depth > 1) {
1071 /* Set IR_BB_LOOP_WITH_ENTRY flag for all the enclosing loops */
1072 bb = &blocks[loop->loop_header];
1073 while (1) {
1074 if (bb->flags & IR_BB_LOOP_WITH_ENTRY) {
1075 break;
1076 }
1078 if (bb->loop_depth == 1) {
1079 break;
1080 }
1081 bb = &blocks[loop->loop_header];
1082 }
1083 }
1084 }
1085 }
1086 }
1087 }
1088
1089 ir_mem_free(entry_times);
1090 ir_worklist_free(&work);
1091
1092 return 1;
1093}
1094
1095static uint32_t _ir_skip_empty_blocks(const ir_ctx *ctx, uint32_t b)
1096{
1097 while (1) {
1098 ir_block *bb = &ctx->cfg_blocks[b];
1099
1101 IR_ASSERT(bb->successors_count == 1);
1102 b = ctx->cfg_edges[bb->successors];
1103 } else {
1104 return b;
1105 }
1106 }
1107}
1108
1109/* A variation of "Bottom-up Positioning" algorithm described by
1110 * Karl Pettis and Robert C. Hansen "Profile Guided Code Positioning"
1111 */
1112typedef struct _ir_edge_info {
1113 uint32_t from;
1114 uint32_t to;
1115 float freq;
1117
1118typedef struct _ir_chain {
1119 uint32_t head;
1120 uint32_t next;
1121 union {
1122 uint32_t prev;
1123 uint32_t tail;
1124 };
1126
1127static int ir_edge_info_cmp(const void *b1, const void *b2)
1128{
1129 ir_edge_info *e1 = (ir_edge_info*)b1;
1130 ir_edge_info *e2 = (ir_edge_info*)b2;
1131
1132 if (e1->freq != e2->freq) {
1133 return e1->freq < e2->freq ? 1 : -1;
1134 }
1135 /* In case of equal frequencies, try to avoid penalization of one of the "equal" paths by
1136 * preferring the first RPO successor (in conditional branches) and the last RPO predecessor
1137 * (in merge points).
1138 *
1139 * See "Static Basic Block Reordering Heuristics for Implicit Control Flow in Baseline JITs"
1140 * Polito Guillermo, Ducasse Stephane, and Tesone Pablo (2021)
1141 */
1142 if (e1->from != e2->from) {
1143 return e2->from - e1->from;
1144 } else {
1145 return e1->to - e2->to;
1146 }
1147}
1148
1149static IR_NEVER_INLINE uint32_t ir_chain_head_path_compress(ir_chain *chains, uint32_t src, uint32_t head)
1150{
1151 IR_ASSERT(head != 0);
1152 do {
1153 head = chains[head].head;
1154 } while (chains[head].head != head);
1155 do {
1156 uint32_t next = chains[src].head;
1157 chains[src].head = head;
1158 src = next;
1159 } while (chains[src].head != head);
1160 return head;
1161}
1162
1163IR_ALWAYS_INLINE uint32_t ir_chain_head(ir_chain *chains, uint32_t src)
1164{
1165 uint32_t head = chains[src].head;
1166 if (chains[head].head == head) {
1167 return head;
1168 } else {
1169 return ir_chain_head_path_compress(chains, src, head);
1170 }
1171}
1172
1173static void ir_join_chains(ir_chain *chains, uint32_t src, uint32_t dst)
1174{
1175 uint32_t dst_tail = chains[dst].tail;
1176 uint32_t src_tail = chains[src].tail;
1177
1178 chains[dst_tail].next = src;
1179 chains[dst].prev = src_tail;
1180 chains[src_tail].next = dst;
1181 chains[src].tail = dst_tail;
1182 chains[dst].head = src;
1183}
1184
1185static void ir_insert_chain_before(ir_chain *chains, uint32_t c, uint32_t before)
1186{
1187 ir_chain *this = &chains[c];
1188 ir_chain *next = &chains[before];
1189
1190 IR_ASSERT(chains[c].head == c);
1191 if (chains[before].head != before) {
1192 this->head = next->head;
1193 } else {
1194 next->head = c;
1195 }
1196 this->next = before;
1197 this->prev = next->prev;
1198 next->prev = c;
1199 chains[this->prev].next = c;
1200}
1201
1202#ifndef IR_DEBUG_BB_SCHEDULE_GRAPH
1203# ifdef IR_DEBUG
1204# define IR_DEBUG_BB_SCHEDULE_GRAPH 1
1205# else
1206# define IR_DEBUG_BB_SCHEDULE_GRAPH 0
1207# endif
1208#endif
1209
1210#if IR_DEBUG_BB_SCHEDULE_GRAPH
1211static void ir_dump_cfg_freq_graph(ir_ctx *ctx, float *bb_freq, uint32_t edges_count, ir_edge_info *edges, ir_chain *chains)
1212{
1213 uint32_t b, i;
1214 ir_block *bb;
1215 uint8_t c, *colors;
1216 bool is_head, is_empty;
1217 uint32_t max_colors = 12;
1218
1219 colors = alloca(sizeof(uint8_t) * (ctx->cfg_blocks_count + 1));
1220 memset(colors, 0, sizeof(uint8_t) * (ctx->cfg_blocks_count + 1));
1221 i = 0;
1222 for (b = 1; b < ctx->cfg_blocks_count + 1; b++) {
1223 if (chains[b].head == b) {
1224 colors[b] = (i % max_colors) + 1;
1225 i++;
1226 }
1227 }
1228
1229 fprintf(stderr, "digraph {\n");
1230 fprintf(stderr, "\trankdir=TB;\n");
1231 for (b = 1; b <= ctx->cfg_blocks_count; b++) {
1232 bb = &ctx->cfg_blocks[b];
1233 c = (chains[b].head) ? colors[ir_chain_head(chains, b)] : 0;
1234 is_head = chains[b].head == b;
1235 is_empty = (bb->flags & (IR_BB_START|IR_BB_ENTRY|IR_BB_EMPTY)) == IR_BB_EMPTY;
1236 if (c) {
1237 fprintf(stderr, "\tBB%d [label=\"BB%d: (%d),%0.3f\\n%s\\n%s\",colorscheme=set312,fillcolor=%d%s%s]\n", b, b,
1238 bb->loop_depth, bb_freq[b],
1239 ir_op_name[ctx->ir_base[bb->start].op], ir_op_name[ctx->ir_base[bb->end].op],
1240 c,
1241 is_head ? ",penwidth=3" : "",
1242 is_empty ? ",style=\"dotted,filled\"" : ",style=\"filled\"");
1243 } else {
1244 fprintf(stderr, "\tBB%d [label=\"BB%d: (%d),%0.3f\\n%s\\n%s\"%s%s]\n", b, b,
1245 bb->loop_depth, bb_freq[b],
1246 ir_op_name[ctx->ir_base[bb->start].op], ir_op_name[ctx->ir_base[bb->end].op],
1247 is_head ? ",penwidth=3" : "",
1248 is_empty ? ",style=\"dotted\"" : "");
1249 }
1250 }
1251 fprintf(stderr, "\n");
1252
1253 for (i = 0; i < edges_count; i++) {
1254 fprintf(stderr, "\tBB%d -> BB%d [label=\"%0.3f\"]\n", edges[i].from, edges[i].to, edges[i].freq);
1255 }
1256 fprintf(stderr, "}\n");
1257}
1258#endif
1259
1260#ifdef IR_DEBUG
1261static void ir_dump_edges(ir_ctx *ctx, uint32_t edges_count, ir_edge_info *edges)
1262{
1263 uint32_t i;
1264
1265 fprintf(stderr, "Edges:\n");
1266 for (i = 0; i < edges_count; i++) {
1267 fprintf(stderr, "\tBB%d -> BB%d %0.3f\n", edges[i].from, edges[i].to, edges[i].freq);
1268 }
1269}
1270
1271static void ir_dump_chains(ir_ctx *ctx, ir_chain *chains)
1272{
1273 uint32_t b, tail, i;
1274
1275 fprintf(stderr, "Chains:\n");
1276 for (b = 1; b < ctx->cfg_blocks_count + 1; b++) {
1277 if (chains[b].head == b) {
1278 tail = chains[b].tail;
1279 i = b;
1280 fprintf(stderr, "(BB%d", i);
1281 while (i != tail) {
1282 i = chains[i].next;
1283 fprintf(stderr, ", BB%d", i);
1284 }
1285 fprintf(stderr, ")\n");
1286 }
1287 }
1288}
1289#endif
1290
1291static int ir_schedule_blocks_bottom_up(ir_ctx *ctx)
1292{
1293 uint32_t max_edges_count = ctx->cfg_edges_count / 2;
1294 uint32_t edges_count = 0;
1295 uint32_t b, i, loop_depth;
1296 float *bb_freq, freq;
1297 ir_block *bb;
1298 ir_edge_info *edges, *e;
1299 ir_chain *chains;
1300 ir_bitqueue worklist;
1301 ir_bitset visited;
1302 uint32_t *schedule_end, count;
1303
1304 ctx->cfg_schedule = ir_mem_malloc(sizeof(uint32_t) * (ctx->cfg_blocks_count + 2));
1305 schedule_end = ctx->cfg_schedule + ctx->cfg_blocks_count;
1306
1307 /* 1. Create initial chains for each BB */
1308 chains = ir_mem_malloc(sizeof(ir_chain) * (ctx->cfg_blocks_count + 1));
1309 chains[0].head = 0;
1310 chains[0].next = 0;
1311 chains[0].prev = 0;
1312 for (b = 1; b <= ctx->cfg_blocks_count; b++) {
1313 chains[b].head = b;
1314 chains[b].next = b;
1315 chains[b].prev = b;
1316 }
1317
1318 /* 2. Collect information about BBs and EDGEs frequencies */
1319 edges = ir_mem_malloc(sizeof(ir_edge_info) * max_edges_count);
1320 bb_freq = ir_mem_calloc(ctx->cfg_blocks_count + 1, sizeof(float));
1321 bb_freq[1] = 1.0f;
1322 visited = ir_bitset_malloc(ctx->cfg_blocks_count + 1);
1323 ir_bitqueue_init(&worklist, ctx->cfg_blocks_count + 1);
1324 ir_bitqueue_add(&worklist, 1);
1325 while ((b = ir_bitqueue_pop(&worklist)) != (uint32_t)-1) {
1326restart:
1327 bb = &ctx->cfg_blocks[b];
1328 if (bb->predecessors_count) {
1329 uint32_t n = bb->predecessors_count;
1330 uint32_t *p = ctx->cfg_edges + bb->predecessors;
1331 for (; n > 0; p++, n--) {
1332 uint32_t predecessor = *p;
1333 /* Basic Blocks are ordered in a way that usual predecessors ids are less than successors.
1334 * So we may compare blocks ids (predecessor < b) instead of a more expensive check for back edge
1335 * (b != predecessor && ctx->cfg_blocks[predecessor].loop_header != b)
1336 */
1337 if (predecessor < b) {
1338 if (!ir_bitset_in(visited, predecessor)) {
1339 b = predecessor;
1340 ir_bitqueue_del(&worklist, b);
1341 goto restart;
1342 }
1343 } else if (b != predecessor && ctx->cfg_blocks[predecessor].loop_header != b) {
1344 ir_dump_cfg(ctx, stderr);
1345 IR_ASSERT(b == predecessor || ctx->cfg_blocks[predecessor].loop_header == b);
1346 }
1347 }
1348 }
1349
1350 ir_bitset_incl(visited, b);
1351
1353 uint32_t successor = ctx->cfg_edges[bb->successors];
1354
1355 /* move empty blocks to the end */
1356 IR_ASSERT(chains[b].head == b);
1357 chains[b].head = 0;
1358 *schedule_end = b;
1359 schedule_end--;
1360
1361 if (successor > b) {
1362 bb_freq[successor] += bb_freq[b];
1363 b = successor;
1364 ir_bitqueue_del(&worklist, b);
1365 goto restart;
1366 } else {
1367 continue;
1368 }
1369 }
1370
1371 loop_depth = bb->loop_depth;
1372 if (bb->flags & IR_BB_LOOP_HEADER) {
1373 // TODO: Estimate the loop iterations count
1374 bb_freq[b] *= 10;
1375 }
1376
1377 if (bb->successors_count) {
1378 uint32_t n = bb->successors_count;
1379 uint32_t *p = ctx->cfg_edges + bb->successors;
1380
1381 if (n == 1) {
1382 uint32_t successor = *p;
1383
1384 IR_ASSERT(edges_count < max_edges_count);
1385 freq = bb_freq[b];
1386 if (successor > b) {
1387 IR_ASSERT(!ir_bitset_in(visited, successor));
1388 bb_freq[successor] += freq;
1389 ir_bitqueue_add(&worklist, successor);
1390 }
1391 successor = _ir_skip_empty_blocks(ctx, successor);
1392 edges[edges_count].from = b;
1393 edges[edges_count].to = successor;
1394 edges[edges_count].freq = freq;
1395 edges_count++;
1396 } else if (n == 2 && ctx->ir_base[bb->end].op == IR_IF) {
1397 uint32_t successor1 = *p;
1398 ir_block *successor1_bb = &ctx->cfg_blocks[successor1];
1399 ir_insn *insn1 = &ctx->ir_base[successor1_bb->start];
1400 uint32_t successor2 = *(p + 1);
1401 ir_block *successor2_bb = &ctx->cfg_blocks[successor2];
1402 ir_insn *insn2 = &ctx->ir_base[successor2_bb->start];
1403 int prob1, prob2, probN = 100;
1404
1405 if (insn1->op2) {
1406 prob1 = insn1->op2;
1407 if (insn2->op2) {
1408 prob2 = insn2->op2;
1409 probN = prob1 + prob2;
1410 } else {
1411 if (prob1 > 100) {
1412 prob1 = 100;
1413 }
1414 prob2 = 100 - prob1;
1415 }
1416
1417 } else if (insn2->op2) {
1418 prob2 = insn2->op2;
1419 if (prob2 > 100) {
1420 prob2 = 100;
1421 }
1422 prob1 = 100 - prob2;
1423 } else if (successor1_bb->loop_depth >= loop_depth
1424 && successor2_bb->loop_depth < loop_depth) {
1425 prob1 = 90;
1426 prob2 = 10;
1427 } else if (successor1_bb->loop_depth < loop_depth
1428 && successor2_bb->loop_depth >= loop_depth) {
1429 prob1 = 10;
1430 prob2 = 90;
1431 } else if (successor2_bb->flags & IR_BB_EMPTY) {
1432 prob1 = 51;
1433 prob2 = 49;
1434 } else if (successor1_bb->flags & IR_BB_EMPTY) {
1435 prob1 = 49;
1436 prob2 = 51;
1437 } else {
1438 prob1 = prob2 = 50;
1439 }
1440 do {
1441 freq = bb_freq[b] * (float)prob1 / (float)probN;
1442 if (successor1 > b) {
1443 IR_ASSERT(!ir_bitset_in(visited, successor1));
1444 bb_freq[successor1] += freq;
1445 if (successor1_bb->successors_count == 0 && insn1->op2 == 1) {
1446 /* move cold block without successors to the end */
1447 IR_ASSERT(chains[successor1].head == successor1);
1448 chains[successor1].head = 0;
1449 *schedule_end = successor1;
1450 schedule_end--;
1451 break;
1452 } else {
1453 ir_bitqueue_add(&worklist, successor1);
1454 }
1455 }
1456 /* try to join edges early to reduce number of edges and the cost of their sorting */
1457 if (prob1 > prob2
1458 && (successor1_bb->flags & (IR_BB_START|IR_BB_ENTRY|IR_BB_EMPTY)) != IR_BB_EMPTY) {
1459 uint32_t src = chains[b].next;
1460 IR_ASSERT(chains[src].head == src);
1461 IR_ASSERT(src == ir_chain_head(chains, b));
1462 IR_ASSERT(chains[successor1].head == successor1);
1463 ir_join_chains(chains, src, successor1);
1464 if (!IR_DEBUG_BB_SCHEDULE_GRAPH) break;
1465 }
1466 successor1 = _ir_skip_empty_blocks(ctx, successor1);
1467 IR_ASSERT(edges_count < max_edges_count);
1468 edges[edges_count].from = b;
1469 edges[edges_count].to = successor1;
1470 edges[edges_count].freq = freq;
1471 edges_count++;
1472 } while (0);
1473 do {
1474 freq = bb_freq[b] * (float)prob2 / (float)probN;
1475 if (successor2 > b) {
1476 IR_ASSERT(!ir_bitset_in(visited, successor2));
1477 bb_freq[successor2] += freq;
1478 if (successor2_bb->successors_count == 0 && insn2->op2 == 1) {
1479 /* move cold block without successors to the end */
1480 IR_ASSERT(chains[successor2].head == successor2);
1481 chains[successor2].head = 0;
1482 *schedule_end = successor2;
1483 schedule_end--;
1484 break;
1485 } else {
1486 ir_bitqueue_add(&worklist, successor2);
1487 }
1488 }
1489 if (prob2 > prob1
1490 && (successor2_bb->flags & (IR_BB_START|IR_BB_ENTRY|IR_BB_EMPTY)) != IR_BB_EMPTY) {
1491 uint32_t src = chains[b].next;
1492 IR_ASSERT(chains[src].head == src);
1493 IR_ASSERT(src == ir_chain_head(chains, b));
1494 IR_ASSERT(chains[successor2].head == successor2);
1495 ir_join_chains(chains, src, successor2);
1496 if (!IR_DEBUG_BB_SCHEDULE_GRAPH) break;
1497 }
1498 successor2 = _ir_skip_empty_blocks(ctx, successor2);
1499 IR_ASSERT(edges_count < max_edges_count);
1500 edges[edges_count].from = b;
1501 edges[edges_count].to = successor2;
1502 edges[edges_count].freq = freq;
1503 edges_count++;
1504 } while (0);
1505 } else {
1506 int prob;
1507
1508 for (; n > 0; p++, n--) {
1509 uint32_t successor = *p;
1510 ir_block *successor_bb = &ctx->cfg_blocks[successor];
1511 ir_insn *insn = &ctx->ir_base[successor_bb->start];
1512
1513 if (insn->op == IR_CASE_DEFAULT) {
1514 prob = insn->op2;
1515 if (!prob) {
1516 prob = 100 / bb->successors_count;
1517 }
1518 } else if (insn->op == IR_CASE_VAL) {
1519 prob = insn->op3;
1520 if (!prob) {
1521 prob = 100 / bb->successors_count;
1522 }
1523 } else if (insn->op == IR_ENTRY) {
1524 if ((ctx->flags & IR_MERGE_EMPTY_ENTRIES) && (successor_bb->flags & IR_BB_EMPTY)) {
1525 prob = 99; /* prefer empty ENTRY block to go first */
1526 } else {
1527 prob = 1;
1528 }
1529 } else {
1530 prob = 100 / bb->successors_count;
1531 }
1532 freq = bb_freq[b] * (float)prob / 100.0f;
1533 if (successor > b) {
1534 IR_ASSERT(!ir_bitset_in(visited, successor));
1535 bb_freq[successor] += freq;
1536 ir_bitqueue_add(&worklist, successor);
1537 }
1538 successor = _ir_skip_empty_blocks(ctx, successor);
1539 IR_ASSERT(edges_count < max_edges_count);
1540 edges[edges_count].from = b;
1541 edges[edges_count].to = successor;
1542 edges[edges_count].freq = freq;
1543 edges_count++;
1544 }
1545 }
1546 }
1547 }
1548 ir_bitqueue_free(&worklist);
1549 ir_mem_free(visited);
1550
1551 /* 2. Sort EDGEs according to their frequencies */
1552 qsort(edges, edges_count, sizeof(ir_edge_info), ir_edge_info_cmp);
1553
1554#ifdef IR_DEBUG
1555 if (ctx->flags & IR_DEBUG_BB_SCHEDULE) {
1556 ir_dump_edges(ctx, edges_count, edges);
1557 }
1558#endif
1559
1560 /* 3. Process EDGEs in the decreasing frequency order and join the connected chains */
1561 for (e = edges, i = edges_count; i > 0; e++, i--) {
1562 uint32_t dst = chains[e->to].head;
1563 if (dst == e->to) {
1564 uint32_t src = chains[e->from].next;
1565 if (chains[src].head == src) {
1566 IR_ASSERT(src == ir_chain_head(chains, e->from) && chains[src].tail == e->from);
1567 if (src != dst) {
1568 ir_join_chains(chains, src, dst);
1569 } else if (ctx->cfg_blocks[e->from].successors_count < 2) {
1570 /* Try to rotate loop chian to finish it with a conditional branch */
1571 uint32_t tail = e->from;
1572 uint32_t prev = src;
1573 uint32_t next = chains[prev].next;
1574 uint32_t best = 0;
1575
1576 while (prev != tail) {
1577 if (ctx->ir_base[ctx->cfg_blocks[prev].end].op == IR_IF) {
1578 if (ctx->ir_base[ctx->cfg_blocks[prev].start].op == IR_LOOP_BEGIN
1579 && ctx->cfg_blocks[prev].loop_depth > 1) {
1580 best = next;
1581 break;
1582 } else if (!best || bb_freq[next] < bb_freq[best]) {
1583 /* Find the successor of IF with the least frequency */
1584 best = next;
1585 }
1586 }
1587 prev = next;
1588 next = chains[next].next;
1589 }
1590 if (best) {
1591 /* change the head of the chain */
1592 chains[src].head = best;
1593 chains[best].head = best;
1594 }
1595 }
1596#if !IR_DEBUG_BB_SCHEDULE_GRAPH
1597 e->from = 0; /* reset "from" to avoid check on step #5 */
1598#endif
1599 }
1600 }
1601 }
1602
1603#if IR_DEBUG_BB_SCHEDULE_GRAPH
1604 if (ctx->flags & IR_DEBUG_BB_SCHEDULE) {
1605 ir_dump_cfg_freq_graph(ctx, bb_freq, edges_count, edges, chains);
1606 }
1607#endif
1608
1609 ir_mem_free(bb_freq);
1610
1611#ifdef IR_DEBUG
1612 if (ctx->flags & IR_DEBUG_BB_SCHEDULE) {
1613 ir_dump_chains(ctx, chains);
1614 }
1615#endif
1616
1617 /* 4. Merge empty entry blocks */
1618 if ((ctx->flags & IR_MERGE_EMPTY_ENTRIES) && ctx->entries_count) {
1619 for (i = 0; i < ctx->entries_count; i++) {
1620 b = ctx->entries[i];
1622 if (b && chains[b].head == b && chains[b].tail == b) {
1623 bb = &ctx->cfg_blocks[b];
1624 if (bb->flags & IR_BB_EMPTY) {
1625 uint32_t successor;
1626
1627 do {
1628 IR_ASSERT(bb->successors_count == 1);
1629 successor = ctx->cfg_edges[bb->successors];
1630 bb = &ctx->cfg_blocks[successor];
1631 } while ((bb->flags & (IR_BB_START|IR_BB_ENTRY|IR_BB_EMPTY)) == IR_BB_EMPTY);
1632 IR_ASSERT(chains[successor].head);
1633 ir_insert_chain_before(chains, b, successor);
1634 }
1635 }
1636 }
1637
1638#ifdef IR_DEBUG
1639 if (ctx->flags & IR_DEBUG_BB_SCHEDULE) {
1640 ir_dump_chains(ctx, chains);
1641 }
1642#endif
1643 }
1644
1645 /* 5. Align loop headers */
1646 for (b = 1; b <= ctx->cfg_blocks_count; b++) {
1647 if (chains[b].head == b) {
1648 bb = &ctx->cfg_blocks[b];
1649 if (bb->loop_depth) {
1650 if ((bb->flags & IR_BB_LOOP_HEADER) || ir_chain_head(chains, bb->loop_header) == b) {
1651 bb->flags |= IR_BB_ALIGN_LOOP;
1652 }
1653 }
1654 }
1655 }
1656
1657 /* 6. Group chains according to the most frequent edge between them */
1658 // TODO: Try to find a better heuristic
1659 for (e = edges, i = edges_count; i > 0; e++, i--) {
1660#if !IR_DEBUG_BB_SCHEDULE_GRAPH
1661 if (!e->from) continue;
1662#endif
1663 uint32_t src = ir_chain_head(chains, e->from);
1664 uint32_t dst = ir_chain_head(chains, e->to);
1665 if (src != dst) {
1666 if (dst == 1) {
1667 ir_join_chains(chains, dst, src);
1668 } else {
1669 ir_join_chains(chains, src, dst);
1670 }
1671 }
1672 }
1673
1674#ifdef IR_DEBUG
1675 if (ctx->flags & IR_DEBUG_BB_SCHEDULE) {
1676 ir_dump_chains(ctx, chains);
1677 }
1678#endif
1679
1680 /* 7. Form a final BB order */
1681 count = 0;
1682 for (b = 1; b <= ctx->cfg_blocks_count; b++) {
1683 if (chains[b].head == b) {
1684 uint32_t tail = chains[b].tail;
1685 uint32_t i = b;
1686 while (1) {
1687 count++;
1688 ctx->cfg_schedule[count] = i;
1689 if (i == tail) break;
1690 i = chains[i].next;
1691 }
1692 }
1693 }
1694
1695 IR_ASSERT(ctx->cfg_schedule + count == schedule_end);
1696 ctx->cfg_schedule[ctx->cfg_blocks_count + 1] = 0;
1697
1698 ir_mem_free(edges);
1699 ir_mem_free(chains);
1700
1701 return 1;
1702}
1703
1704/* A variation of "Top-down Positioning" algorithm described by
1705 * Karl Pettis and Robert C. Hansen "Profile Guided Code Positioning"
1706 */
1707static int ir_schedule_blocks_top_down(ir_ctx *ctx)
1708{
1709 ir_bitqueue blocks;
1710 uint32_t b, best_successor, last_non_empty;
1711 ir_block *bb, *best_successor_bb;
1712 ir_insn *insn;
1713 uint32_t *list, *schedule_end;
1714 uint32_t count = 0;
1715
1716 ir_bitqueue_init(&blocks, ctx->cfg_blocks_count + 1);
1717 blocks.pos = 0;
1718 list = ir_mem_malloc(sizeof(uint32_t) * (ctx->cfg_blocks_count + 2));
1719 list[ctx->cfg_blocks_count + 1] = 0;
1720 schedule_end = list + ctx->cfg_blocks_count;
1721 for (b = 1; b <= ctx->cfg_blocks_count; b++) {
1722 ir_bitset_incl(blocks.set, b);
1723 }
1724
1725 while ((b = ir_bitqueue_pop(&blocks)) != (uint32_t)-1) {
1726 bb = &ctx->cfg_blocks[b];
1727 /* Start trace */
1728 last_non_empty = 0;
1729 do {
1730 if (UNEXPECTED(bb->flags & IR_BB_PREV_EMPTY_ENTRY) && ir_bitqueue_in(&blocks, b - 1)) {
1731 /* Schedule the previous empty ENTRY block before this one */
1732 uint32_t predecessor = b - 1;
1733
1734 ir_bitqueue_del(&blocks, predecessor);
1735 count++;
1736 list[count] = predecessor;
1737 }
1739 /* move empty blocks to the end */
1740 *schedule_end = b;
1741 schedule_end--;
1742 } else {
1743 count++;
1744 list[count] = b;
1745 last_non_empty = b;
1746 }
1747 best_successor_bb = NULL;
1748 if (bb->successors_count == 1) {
1749 best_successor = ctx->cfg_edges[bb->successors];
1750 if (ir_bitqueue_in(&blocks, best_successor)) {
1751 best_successor_bb = &ctx->cfg_blocks[best_successor];
1752 }
1753 } else if (bb->successors_count > 1) {
1754 uint32_t prob, best_successor_prob;
1755 uint32_t *p, successor;
1756 ir_block *successor_bb;
1757
1758 for (b = 0, p = &ctx->cfg_edges[bb->successors]; b < bb->successors_count; b++, p++) {
1759 successor = *p;
1760 if (ir_bitqueue_in(&blocks, successor)) {
1761 successor_bb = &ctx->cfg_blocks[successor];
1762 insn = &ctx->ir_base[successor_bb->start];
1763 if (insn->op == IR_IF_TRUE || insn->op == IR_IF_FALSE) {
1764 prob = insn->op2;
1765 if (!prob) {
1766 prob = 100 / bb->successors_count;
1767 if (!(successor_bb->flags & IR_BB_EMPTY)) {
1768 prob++;
1769 }
1770 }
1771 } else if (insn->op == IR_CASE_DEFAULT) {
1772 prob = insn->op2;
1773 if (!prob) {
1774 prob = 100 / bb->successors_count;
1775 }
1776 } else if (insn->op == IR_CASE_VAL) {
1777 prob = insn->op3;
1778 if (!prob) {
1779 prob = 100 / bb->successors_count;
1780 }
1781 } else if (insn->op == IR_ENTRY) {
1782 if ((ctx->flags & IR_MERGE_EMPTY_ENTRIES) && (successor_bb->flags & IR_BB_EMPTY)) {
1783 prob = 99; /* prefer empty ENTRY block to go first */
1784 } else {
1785 prob = 1;
1786 }
1787 } else {
1788 prob = 100 / bb->successors_count;
1789 }
1790 if (!best_successor_bb
1791 || successor_bb->loop_depth > best_successor_bb->loop_depth
1792 || prob > best_successor_prob) {
1793 best_successor = successor;
1794 best_successor_bb = successor_bb;
1795 best_successor_prob = prob;
1796 }
1797 }
1798 }
1799 }
1800 if (!best_successor_bb) {
1801 /* Try to continue trace using the other successor of the last IF */
1802 if ((bb->flags & IR_BB_EMPTY) && last_non_empty) {
1803 bb = &ctx->cfg_blocks[last_non_empty];
1804 if (bb->successors_count == 2 && ctx->ir_base[bb->end].op == IR_IF) {
1805 b = ctx->cfg_edges[bb->successors];
1806
1807 if (!ir_bitqueue_in(&blocks, b)) {
1808 b = ctx->cfg_edges[bb->successors + 1];
1809 }
1810 if (ir_bitqueue_in(&blocks, b)) {
1811 bb = &ctx->cfg_blocks[b];
1812 ir_bitqueue_del(&blocks, b);
1813 continue;
1814 }
1815 }
1816 }
1817 /* End trace */
1818 break;
1819 }
1820 b = best_successor;
1821 bb = best_successor_bb;
1822 ir_bitqueue_del(&blocks, b);
1823 } while (1);
1824 }
1825
1826 IR_ASSERT(list + count == schedule_end);
1827 ctx->cfg_schedule = list;
1828 ir_bitqueue_free(&blocks);
1829
1830 return 1;
1831}
1832
1834{
1835 ir_ref ref;
1836
1837 if (ctx->cfg_blocks_count <= 2) {
1838 return 1;
1839 }
1840
1841 /* Mark "stop" blocks termintated with UNREACHABLE as "unexpected" */
1842 ref = ctx->ir_base[1].op1;
1843 while (ref) {
1844 ir_insn *insn = &ctx->ir_base[ref];
1845
1846 if (insn->op == IR_UNREACHABLE && ctx->ir_base[insn->op1].op != IR_TAILCALL) {
1847 ir_block *bb = &ctx->cfg_blocks[ctx->cfg_map[ref]];
1848 uint32_t n = bb->predecessors_count;
1849
1850 if (n == 1) {
1851 ir_insn *start_insn = &ctx->ir_base[bb->start];
1852 if (start_insn->op == IR_IF_TRUE
1853 || start_insn->op == IR_IF_FALSE
1854 || start_insn->op == IR_CASE_DEFAULT) {
1855 if (!start_insn->op2) start_insn->op2 = 1;
1856 } else if (start_insn->op == IR_CASE_VAL) {
1857 if (!start_insn->op3) start_insn->op3 = 1;
1858 }
1859 } else if (n > 1) {
1860 uint32_t *p = &ctx->cfg_edges[bb->predecessors];
1861
1862 for (; n > 0; p++, n--) {
1863 bb = &ctx->cfg_blocks[*p];
1864 if (bb->predecessors_count == 1) {
1865 ir_insn *start_insn = &ctx->ir_base[bb->start];
1866 if (start_insn->op == IR_IF_TRUE
1867 || start_insn->op == IR_IF_FALSE
1868 || start_insn->op == IR_CASE_DEFAULT) {
1869 if (!start_insn->op2) start_insn->op2 = 1;
1870 } else if (start_insn->op == IR_CASE_VAL) {
1871 if (!start_insn->op3) start_insn->op3 = 1;
1872 }
1873 }
1874 }
1875 }
1876 }
1877 ref = insn->op3;
1878 }
1879
1880 /* The bottom-up Pettis-Hansen algorithm is expensive - O(n^3),
1881 * use it only for relatively small functions.
1882 *
1883 * TODO: make the choice between top-down and bottom-up algorithm configurable
1884 */
1885 if (UNEXPECTED(ctx->flags2 & IR_IRREDUCIBLE_CFG) || ctx->cfg_blocks_count > 256) {
1886 return ir_schedule_blocks_top_down(ctx);
1887 } else {
1888 return ir_schedule_blocks_bottom_up(ctx);
1889 }
1890}
1891
1892/* JMP target optimisation */
1893uint32_t ir_skip_empty_target_blocks(const ir_ctx *ctx, uint32_t b)
1894{
1895 return _ir_skip_empty_blocks(ctx, b);
1896}
1897
1898uint32_t ir_next_block(const ir_ctx *ctx, uint32_t b)
1899{
1900 ir_block *bb;
1901
1902 if (ctx->cfg_schedule) {
1903 uint32_t ret = ctx->cfg_schedule[++b];
1904
1905 /* Check for empty ENTRY block */
1906 while (ret && ((ctx->cfg_blocks[ret].flags & (IR_BB_START|IR_BB_EMPTY)) == IR_BB_EMPTY)) {
1907 ret = ctx->cfg_schedule[++b];
1908 }
1909 return ret;
1910 }
1911
1912 b++;
1913 while (1) {
1914 if (b > ctx->cfg_blocks_count) {
1915 return 0;
1916 }
1917
1918 bb = &ctx->cfg_blocks[b];
1919
1920 if ((bb->flags & (IR_BB_START|IR_BB_EMPTY)) == IR_BB_EMPTY) {
1921 b++;
1922 } else {
1923 break;
1924 }
1925 }
1926 return b;
1927}
1928
1929void ir_get_true_false_blocks(const ir_ctx *ctx, uint32_t b, uint32_t *true_block, uint32_t *false_block)
1930{
1931 ir_block *bb;
1932 uint32_t *p, use_block;
1933
1934 *true_block = 0;
1935 *false_block = 0;
1936 bb = &ctx->cfg_blocks[b];
1937 IR_ASSERT(ctx->ir_base[bb->end].op == IR_IF);
1938 IR_ASSERT(bb->successors_count == 2);
1939 p = &ctx->cfg_edges[bb->successors];
1940 use_block = *p;
1941 if (ctx->ir_base[ctx->cfg_blocks[use_block].start].op == IR_IF_TRUE) {
1942 *true_block = ir_skip_empty_target_blocks(ctx, use_block);
1943 use_block = *(p+1);
1944 IR_ASSERT(ctx->ir_base[ctx->cfg_blocks[use_block].start].op == IR_IF_FALSE);
1945 *false_block = ir_skip_empty_target_blocks(ctx, use_block);
1946 } else {
1947 IR_ASSERT(ctx->ir_base[ctx->cfg_blocks[use_block].start].op == IR_IF_FALSE);
1948 *false_block = ir_skip_empty_target_blocks(ctx, use_block);
1949 use_block = *(p+1);
1950 IR_ASSERT(ctx->ir_base[ctx->cfg_blocks[use_block].start].op == IR_IF_TRUE);
1951 *true_block = ir_skip_empty_target_blocks(ctx, use_block);
1952 }
1953 IR_ASSERT(*true_block && *false_block);
1954}
size_t len
Definition apprentice.c:174
fprintf($stream, string $format, mixed ... $values)
prev(array|object &$array)
count(Countable|array $value, int $mode=COUNT_NORMAL)
zend_long n
Definition ffi.c:4979
memcpy(ptr1, ptr2, size)
memset(ptr, 0, type->size)
buf start
Definition ffi.c:4687
#define NULL
Definition gdcache.h:45
again j
const uint32_t ir_op_flags[IR_LAST_OP]
Definition ir.c:294
void ir_use_list_remove_one(ir_ctx *ctx, ir_ref from, ir_ref ref)
Definition ir.c:1321
void ir_use_list_remove_all(ir_ctx *ctx, ir_ref from, ir_ref ref)
Definition ir.c:1295
const char * ir_op_name[IR_LAST_OP]
Definition ir.c:71
#define IR_NEVER_INLINE
Definition ir.h:111
#define IR_UNUSED
Definition ir.h:395
void ir_dump_cfg(ir_ctx *ctx, FILE *f)
Definition ir_dump.c:301
#define ir_mem_calloc
Definition ir.h:1009
int32_t ir_ref
Definition ir.h:390
IR_ALWAYS_INLINE ir_ref ir_insn_op(const ir_insn *insn, int32_t n)
Definition ir.h:727
#define ir_mem_malloc
Definition ir.h:1006
IR_ALWAYS_INLINE void ir_insn_set_op(ir_insn *insn, int32_t n, ir_ref val)
Definition ir.h:733
#define ir_mem_free
Definition ir.h:1015
struct _ir_ctx ir_ctx
Definition ir.h:550
#define IR_ALWAYS_INLINE
Definition ir.h:108
struct _ir_use_list ir_use_list
Definition ir.h:551
struct _ir_insn ir_insn
#define IR_MERGE_EMPTY_ENTRIES
Definition ir.h:528
struct _ir_block ir_block
Definition ir.h:552
int ir_schedule_blocks(ir_ctx *ctx)
Definition ir_cfg.c:1833
int ir_find_loops(ir_ctx *ctx)
Definition ir_cfg.c:911
int ir_build_dominators_tree(ir_ctx *ctx)
Definition ir_cfg.c:672
int ir_build_cfg(ir_ctx *ctx)
Definition ir_cfg.c:80
struct _ir_edge_info ir_edge_info
IR_ALWAYS_INLINE void _ir_add_predecessors(const ir_insn *insn, ir_worklist *worklist)
Definition ir_cfg.c:43
IR_ALWAYS_INLINE void _ir_add_successors(const ir_ctx *ctx, ir_ref ref, ir_worklist *worklist)
Definition ir_cfg.c:13
struct _ir_chain ir_chain
void ir_reset_cfg(ir_ctx *ctx)
Definition ir_cfg.c:62
#define IR_DEBUG_BB_SCHEDULE_GRAPH
Definition ir_cfg.c:1206
uint32_t ir_skip_empty_target_blocks(const ir_ctx *ctx, uint32_t b)
Definition ir_cfg.c:1893
uint32_t ir_next_block(const ir_ctx *ctx, uint32_t b)
Definition ir_cfg.c:1898
void ir_get_true_false_blocks(const ir_ctx *ctx, uint32_t b, uint32_t *true_block, uint32_t *false_block)
Definition ir_cfg.c:1929
IR_ALWAYS_INLINE uint32_t ir_chain_head(ir_chain *chains, uint32_t src)
Definition ir_cfg.c:1163
#define IR_BB_ALIGN_LOOP
struct _ir_list ir_list
IR_ALWAYS_INLINE uint32_t ir_list_len(const ir_list *l)
Definition ir_private.h:728
ir_bitset_base_t * ir_bitset
Definition ir_private.h:317
#define IR_CFG_REACHABLE
#define IR_IS_BB_END(op)
IR_ALWAYS_INLINE ir_ref ir_list_pop(ir_list *l)
Definition ir_private.h:748
IR_ALWAYS_INLINE void ir_worklist_free(ir_worklist *w)
Definition ir_private.h:797
IR_ALWAYS_INLINE bool ir_bitset_in(const ir_bitset set, uint32_t n)
Definition ir_private.h:339
#define IR_BB_EMPTY
IR_ALWAYS_INLINE void ir_bitqueue_del(ir_bitqueue *q, uint32_t n)
Definition ir_private.h:638
IR_ALWAYS_INLINE void ir_bitqueue_add(ir_bitqueue *q, uint32_t n)
Definition ir_private.h:629
#define IR_BITSET_BITS
Definition ir_private.h:307
#define IR_OPND_CONTROL
Definition ir_private.h:947
IR_ALWAYS_INLINE bool ir_bitqueue_in(const ir_bitqueue *q, uint32_t n)
Definition ir_private.h:643
IR_ALWAYS_INLINE ir_bitset ir_bitset_malloc(uint32_t n)
Definition ir_private.h:324
#define IR_IRREDUCIBLE_CFG
IR_ALWAYS_INLINE bool ir_worklist_push(ir_worklist *w, ir_ref val)
Definition ir_private.h:819
IR_ALWAYS_INLINE ir_ref ir_worklist_peek(const ir_worklist *w)
Definition ir_private.h:836
IR_ALWAYS_INLINE void ir_worklist_clear(ir_worklist *w)
Definition ir_private.h:813
#define IR_BITSET_FOREACH_DIFFERENCE(set1, set2, len, bit)
Definition ir_private.h:470
struct _ir_worklist ir_worklist
IR_ALWAYS_INLINE void ir_bitset_union(ir_bitset set1, const ir_bitset set2, uint32_t len)
Definition ir_private.h:384
IR_ALWAYS_INLINE void ir_bitset_incl(ir_bitset set, uint32_t n)
Definition ir_private.h:329
#define IR_ASSERT(x)
Definition ir_private.h:17
#define IR_IS_BB_START(op)
IR_ALWAYS_INLINE void ir_worklist_init(ir_worklist *w, uint32_t size)
Definition ir_private.h:791
IR_ALWAYS_INLINE uint32_t ir_worklist_capasity(const ir_worklist *w)
Definition ir_private.h:808
IR_ALWAYS_INLINE void ir_list_free(ir_list *l)
Definition ir_private.h:717
IR_ALWAYS_INLINE uint32_t ir_bitset_len(uint32_t n)
Definition ir_private.h:319
#define IR_BB_IRREDUCIBLE_LOOP
IR_ALWAYS_INLINE void ir_bitqueue_init(ir_bitqueue *q, uint32_t n)
Definition ir_private.h:581
#define IR_BB_LOOP_WITH_ENTRY
#define IR_BB_ENTRY
#define IR_BB_PREV_EMPTY_ENTRY
IR_ALWAYS_INLINE void ir_list_init(ir_list *l, uint32_t size)
Definition ir_private.h:711
#define IR_OPND_KIND(flags, i)
Definition ir_private.h:963
#define IR_CFG_HAS_LOOPS
IR_ALWAYS_INLINE int ir_bitqueue_pop(ir_bitqueue *q)
Definition ir_private.h:610
#define IR_BB_UNREACHABLE
#define IR_NO_LOOPS
#define IR_BITSET_FOREACH(set, len, bit)
Definition ir_private.h:461
#define IR_BITSET_FOREACH_END()
Definition ir_private.h:480
IR_ALWAYS_INLINE void ir_bitset_clear(ir_bitset set, uint32_t len)
Definition ir_private.h:344
#define IR_OP_FLAG_CONTROL
Definition ir_private.h:931
IR_ALWAYS_INLINE void ir_bitqueue_free(ir_bitqueue *q)
Definition ir_private.h:599
#define IR_BB_START
IR_ALWAYS_INLINE void ir_list_push(ir_list *l, ir_ref val)
Definition ir_private.h:738
IR_ALWAYS_INLINE ir_ref ir_next_control(const ir_ctx *ctx, ir_ref ref)
IR_ALWAYS_INLINE ir_ref ir_worklist_pop(ir_worklist *w)
Definition ir_private.h:831
IR_ALWAYS_INLINE uint32_t ir_worklist_len(const ir_worklist *w)
Definition ir_private.h:803
struct _ir_bitqueue ir_bitqueue
#define IR_BB_LOOP_HEADER
#define IR_OP_FLAG_TERMINATOR
Definition ir_private.h:936
#define next(ls)
Definition minilua.c:2661
time()
unsigned const char * end
Definition php_ffi.h:51
struct php_pcntl_pending_signal * head
Definition php_pcntl.h:47
struct php_pcntl_pending_signal * tail
Definition php_pcntl.h:47
const phpdbg_color_t * colors[PHPDBG_COLORS]
Definition phpdbg.h:295
p
Definition session.c:1105
uint32_t pos
Definition ir_private.h:577
ir_bitset set
Definition ir_private.h:578
ir_ref end
uint32_t dom_next_child
uint32_t dom_parent
ir_ref start
uint32_t idom
uint32_t loop_header
uint32_t flags
uint32_t loop_depth
uint32_t successors
uint32_t dom_child
uint32_t successors_count
uint32_t dom_depth
uint32_t predecessors
uint32_t postnum
uint32_t predecessors_count
uint32_t tail
Definition ir_cfg.c:1123
uint32_t next
Definition ir_cfg.c:1120
uint32_t prev
Definition ir_cfg.c:1122
uint32_t head
Definition ir_cfg.c:1119
uint32_t * cfg_edges
Definition ir.h:593
uint32_t cfg_edges_count
Definition ir.h:591
uint32_t * entries
Definition ir.h:632
uint32_t entries_count
Definition ir.h:631
uint32_t * cfg_schedule
Definition ir.h:595
ir_block * cfg_blocks
Definition ir.h:592
ir_use_list * use_lists
Definition ir.h:587
ir_insn * ir_base
Definition ir.h:574
uint32_t flags
Definition ir.h:579
uint32_t cfg_blocks_count
Definition ir.h:590
ir_ref insns_count
Definition ir.h:575
uint32_t flags2
Definition ir.h:580
ir_ref * use_edges
Definition ir.h:588
uint32_t * cfg_map
Definition ir.h:594
ir_ref insns_limit
Definition ir.h:576
uint32_t from
Definition ir_cfg.c:1113
uint32_t to
Definition ir_cfg.c:1114
ir_bitset visited
Definition ir_private.h:788
char * alloca()
#define EXPECTED(condition)
#define UNEXPECTED(condition)
zval * ret