php-internal-docs 8.4.8
Unofficial docs for php/php-src
Loading...
Searching...
No Matches
ir_gcm.c
Go to the documentation of this file.
1/*
2 * IR - Lightweight JIT Compilation Framework
3 * (GCM - Global Code Motion and Scheduler)
4 * Copyright (C) 2022 Zend by Perforce.
5 * Authors: Dmitry Stogov <dmitry@php.net>
6 *
7 * The GCM algorithm is based on Cliff Click's publication
8 * See: C. Click. "Global code motion, global value numbering" Submitted to PLDI'95.
9 */
10
11#include "ir.h"
12#include "ir_private.h"
13
14#define IR_GCM_IS_SCHEDULED_EARLY(b) (((int32_t)(b)) < 0)
15#define IR_GCM_EARLY_BLOCK(b) ((uint32_t)-((int32_t)(b)))
16
17#define IR_GCM_SPLIT 1
18#define IR_SCHEDULE_SWAP_OPS 1
19
20static uint32_t ir_gcm_schedule_early(ir_ctx *ctx, ir_ref ref, ir_list *queue_late)
21{
22 ir_ref n, *p, input;
23 ir_insn *insn;
24 uint32_t dom_depth;
25 uint32_t b, result;
26
27 insn = &ctx->ir_base[ref];
28
29 IR_ASSERT(insn->op != IR_PARAM && insn->op != IR_VAR);
30 IR_ASSERT(insn->op != IR_PHI && insn->op != IR_PI);
31
32 result = 1;
33 dom_depth = 0;
34
35 n = insn->inputs_count;
36 for (p = insn->ops + 1; n > 0; p++, n--) {
37 input = *p;
38 if (input > 0) {
39 b = ctx->cfg_map[input];
41 b = IR_GCM_EARLY_BLOCK(b);
42 } else if (!b) {
43 b = ir_gcm_schedule_early(ctx, input, queue_late);
44 }
45 if (dom_depth < ctx->cfg_blocks[b].dom_depth) {
46 dom_depth = ctx->cfg_blocks[b].dom_depth;
47 result = b;
48 }
49 }
50 }
51
53 ir_list_push_unchecked(queue_late, ref);
54 return result;
55}
56
57/* Last Common Ancestor */
58static uint32_t ir_gcm_find_lca(ir_ctx *ctx, uint32_t b1, uint32_t b2)
59{
60 uint32_t dom_depth;
61
62 dom_depth = ctx->cfg_blocks[b2].dom_depth;
63 while (ctx->cfg_blocks[b1].dom_depth > dom_depth) {
64 b1 = ctx->cfg_blocks[b1].dom_parent;
65 }
66 dom_depth = ctx->cfg_blocks[b1].dom_depth;
67 while (ctx->cfg_blocks[b2].dom_depth > dom_depth) {
68 b2 = ctx->cfg_blocks[b2].dom_parent;
69 }
70 while (b1 != b2) {
71 b1 = ctx->cfg_blocks[b1].dom_parent;
72 b2 = ctx->cfg_blocks[b2].dom_parent;
73 }
74 return b2;
75}
76
77static uint32_t ir_gcm_select_best_block(ir_ctx *ctx, ir_ref ref, uint32_t lca)
78{
79 ir_block *bb = &ctx->cfg_blocks[lca];
80 uint32_t loop_depth = bb->loop_depth;
81 uint32_t flags, best, b;
82
83 if (!loop_depth) {
84 return lca;
85 }
86
87#if 0 /* This is not necessary anymore. Conditions may be fused with IF across BBs. */
88 if (ctx->ir_base[ref].op >= IR_EQ && ctx->ir_base[ref].op <= IR_UGT) {
89 ir_use_list *use_list = &ctx->use_lists[ref];
90
91 if (use_list->count == 1) {
92 ir_ref use = ctx->use_edges[use_list->refs];
93 ir_insn *insn = &ctx->ir_base[use];
94 if (insn->op == IR_IF || insn->op == IR_GUARD || insn->op == IR_GUARD_NOT) {
95 /* Don't hoist invariant comparison */
96 return lca;
97 }
98 }
99 }
100#endif
101
102 flags = (bb->flags & IR_BB_LOOP_HEADER) ? bb->flags : ctx->cfg_blocks[bb->loop_header].flags;
104 && !(ctx->binding && ir_binding_find(ctx, ref))) {
105 /* Don't move loop invariant code across an OSR ENTRY if we can't restore it */
106 return lca;
107 }
108
109 best = b = lca;
110 do {
111 b = bb->dom_parent;
112 bb = &ctx->cfg_blocks[b];
113 if (bb->loop_depth < loop_depth) {
114 if (!bb->loop_depth) {
115#if 1
116 /* Avoid LICM if LOOP doesn't have a pre-header block */
117 ir_block *loop_bb = &ctx->cfg_blocks[best];
118
119 if (!(loop_bb->flags & IR_BB_LOOP_HEADER)) {
120 loop_bb = &ctx->cfg_blocks[loop_bb->loop_header];
121 }
122 if (loop_bb->predecessors_count > 2) {
123 int n = loop_bb->predecessors_count;
124 uint32_t *p = ctx->cfg_edges + loop_bb->predecessors;
125
126 while (n && *p != b) {
127 n--; p++;
128 }
129 if (!n) {
130 break;
131 }
132 }
133#endif
134 best = b;
135 break;
136 }
137 flags = (bb->flags & IR_BB_LOOP_HEADER) ? bb->flags : ctx->cfg_blocks[bb->loop_header].flags;
139 && !(ctx->binding && ir_binding_find(ctx, ref))) {
140 break;
141 }
142 loop_depth = bb->loop_depth;
143 best = b;
144 }
145 } while (b != ctx->cfg_map[ref]);
146
147 return best;
148}
149
150#if IR_GCM_SPLIT
151/* Partially Dead Code Elimination through splitting the node and sunking the clones
152 *
153 * This code is based on the Benedikt Meurer's idea first implemented in V8.
154 * See: https://codereview.chromium.org/899433005
155 */
156
161
162static void _push_predecessors(ir_ctx *ctx, ir_block *bb, ir_gcm_split_data *data)
163{
164 uint32_t *p, i, n = bb->predecessors_count;
165
166 IR_ASSERT(n > 0);
167 p = ctx->cfg_edges + bb->predecessors;
168 do {
169 i = *p;
170 if (!ir_sparse_set_in(&data->totally_useful, i)) {
171 ir_list_push(&data->worklist, i);
172 }
173 p++;
174 n--;
175 } while (n > 0);
176}
177
178static bool _check_successors(ir_ctx *ctx, ir_block *bb, ir_gcm_split_data *data)
179{
180 uint32_t *p, i, n = bb->successors_count;
181
182 if (n <= 1) {
183 IR_ASSERT(ir_sparse_set_in(&data->totally_useful, ctx->cfg_edges[bb->successors]));
184 return 1;
185 }
186
187 p = ctx->cfg_edges + bb->successors;
188 do {
189 i = *p;
190 if (!ir_sparse_set_in(&data->totally_useful, i)) {
191 return 0;
192 }
193 p++;
194 n--;
195 } while (n > 0);
196
197 return 1;
198}
199
200static bool ir_split_partially_dead_node(ir_ctx *ctx, ir_ref ref, uint32_t b)
201{
202 ir_use_list *use_list;
203 ir_insn *insn;
204 ir_ref n, *p, use;
205 uint32_t i;
207
208 IR_ASSERT(b > 0 && b <= ctx->cfg_blocks_count);
209
210 /* 1. Find a set of blocks where the node is TOTALLY_USEFUL (not PARTIALLY_DEAD)
211 * 1.1. Collect the blocks where the node is really USED.
212 */
213 ir_sparse_set_clear(&data->totally_useful);
214
215 use_list = &ctx->use_lists[ref];
216 n = use_list->count;
217 for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) {
218 use = *p;
219 insn = &ctx->ir_base[use];
220 if (insn->op == IR_PHI) {
221 ir_ref *p = insn->ops + 2; /* PHI data inputs */
222 ir_ref *q = ctx->ir_base[insn->op1].ops + 1; /* MERGE inputs */
223 ir_ref n = insn->inputs_count - 1;
224
225 for (;n > 0; p++, q++, n--) {
226 if (*p == ref) {
227 i = ctx->cfg_map[*q];
228 IR_ASSERT(i > 0 && i <= ctx->cfg_blocks_count);
229 if (!ir_sparse_set_in(&data->totally_useful, i)) {
230 if (i == b) return 0; /* node is totally-useful in the scheduled block */
231 ir_sparse_set_add(&data->totally_useful, i);
232 }
233 }
234 }
235 } else {
236 i = ctx->cfg_map[use];
237 if (!i) {
238 continue;
239 }
240 IR_ASSERT(i > 0 && i <= ctx->cfg_blocks_count);
241 if (!ir_sparse_set_in(&data->totally_useful, i)) {
242 if (i == b) return 0; /* node is totally-useful in the scheduled block */
243 ir_sparse_set_add(&data->totally_useful, i);
244 }
245 }
246 }
247
248#ifdef IR_DEBUG
249 if (ctx->flags & IR_DEBUG_GCM_SPLIT) {
250 bool first = 1;
251 fprintf(stderr, "*** Split partially dead node d_%d scheduled to BB%d\n", ref, b);
252 IR_SPARSE_SET_FOREACH(&data->totally_useful, i) {
253 if (first) {
254 fprintf(stderr, "\td_%d is USED in [BB%d", ref, i);
255 first = 0;
256 } else {
257 fprintf(stderr, ", BB%d", i);
258 }
260 fprintf(stderr, "]\n");
261 }
262#endif
263
264 /* 1.2. Iteratively check the predecessors of already found TOTALLY_USEFUL blocks and
265 * add them into TOTALLY_USEFUL set if all of their sucessors are already there.
266 */
267 IR_SPARSE_SET_FOREACH(&data->totally_useful, i) {
268 _push_predecessors(ctx, &ctx->cfg_blocks[i], data);
270
271 while (ir_list_len(&data->worklist)) {
272 i = ir_list_pop(&data->worklist);
273 if (!ir_sparse_set_in(&data->totally_useful, i)) {
274 ir_block *bb = &ctx->cfg_blocks[i];
275
276 if (_check_successors(ctx, bb, data)) {
277 if (i == b) {
278 /* node is TOTALLY_USEFUL in the scheduled block */
279 ir_list_clear(&data->worklist);
280 return 0;
281 }
282 ir_sparse_set_add(&data->totally_useful, i);
283 _push_predecessors(ctx, bb, data);
284 }
285 }
286 }
287
288 IR_ASSERT(!ir_sparse_set_in(&data->totally_useful, b));
289
290#ifdef IR_DEBUG
291 if (ctx->flags & IR_DEBUG_GCM_SPLIT) {
292 bool first = 1;
293 IR_SPARSE_SET_FOREACH(&data->totally_useful, i) {
294 if (first) {
295 fprintf(stderr, "\td_%d is TOTALLY_USEFUL in [BB%d", ref, i);
296 first = 0;
297 } else {
298 fprintf(stderr, ", BB%d", i);
299 }
301 fprintf(stderr, "]\n");
302 }
303#endif
304
305 /* 2. Split the USEs into partitions */
306 use_list = &ctx->use_lists[ref];
308 uint32_t j, clone, clones_count = 0, uses_count = 0;
309 struct {
310 ir_ref ref;
311 uint32_t block;
312 uint32_t use_count;
313 uint32_t use;
314 } *clones = ir_mem_malloc(sizeof(*clones) * use_list->count);
315 struct {
316 ir_ref ref;
317 uint32_t block;
318 uint32_t next;
319 } *uses = ir_mem_malloc(sizeof(*uses) * use_list->count);
320
321 ir_hashtab_init(&hash, use_list->count);
322 n = use_list->count;
323 for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) {
324 use = *p;
325 insn = &ctx->ir_base[use];
326 if (insn->op == IR_PHI) {
327 ir_ref *p = insn->ops + 2; /* PHI data inputs */
328 ir_ref *q = ctx->ir_base[insn->op1].ops + 1; /* MERGE inputs */
329 ir_ref n = insn->inputs_count - 1;
330
331 /* PHIs must be processed once */
332 if (ir_hashtab_find(&hash, -use) != (ir_ref)IR_INVALID_VAL) {
333 continue;
334 }
335 ir_hashtab_add(&hash, -use, IR_NULL);
336 for (;n > 0; p++, q++, n--) {
337 if (*p == ref) {
338 j = i = ctx->cfg_map[*q];
339 while (ir_sparse_set_in(&data->totally_useful, ctx->cfg_blocks[j].idom)) {
340 j = ctx->cfg_blocks[j].idom;
341 }
342 clone = ir_hashtab_find(&hash, j);
343 if (clone == IR_INVALID_VAL) {
344 clone = clones_count++;
345 ir_hashtab_add(&hash, j, clone);
346 clones[clone].block = j;
347 clones[clone].use_count = 0;
348 clones[clone].use = (uint32_t)-1;
349 }
350 uses[uses_count].ref = use;
351 uses[uses_count].block = i;
352 uses[uses_count].next = clones[clone].use;
353 clones[clone].use_count++;
354 clones[clone].use = uses_count++;
355 }
356 }
357 } else {
358 j = i = ctx->cfg_map[use];
359 if (i) {
360 IR_ASSERT(i > 0);
361 while (ir_sparse_set_in(&data->totally_useful, ctx->cfg_blocks[j].idom)) {
362 j = ctx->cfg_blocks[j].idom;
363 }
364 }
365 clone = ir_hashtab_find(&hash, j);
366 if (clone == IR_INVALID_VAL) {
367 clone = clones_count++;
368 ir_hashtab_add(&hash, j, clone);
369 clones[clone].block = j;
370 clones[clone].use_count = 0;
371 clones[clone].use = -1;
372 }
373 uses[uses_count].ref = use;
374 uses[uses_count].block = i;
375 uses[uses_count].next = clones[clone].use;
376 clones[clone].use_count++;
377 clones[clone].use = uses_count++;
378 }
379 }
380
381#ifdef IR_DEBUG
382 if (ctx->flags & IR_DEBUG_GCM_SPLIT) {
383 for (i = 0; i < clones_count; i++) {
384 uint32_t u = clones[i].use;
385
386 fprintf(stderr, "\tCLONE #%d in BB%d USES(%d)=[d_%d/BB%d",
387 i, clones[i].block, clones[i].use_count, uses[u].ref, uses[u].block);
388 u = uses[u].next;
389 while (u != (uint32_t)-1) {
390 fprintf(stderr, ", d_%d/BB%d", uses[u].ref, uses[u].block);
391 u = uses[u].next;
392 }
393 fprintf(stderr, "]\n");
394 }
395 }
396#endif
397
398 /* Create Clones */
399 insn = &ctx->ir_base[ref];
400 clones[0].ref = ref;
401 for (i = 1; i < clones_count; i++) {
402 clones[i].ref = clone = ir_emit(ctx, insn->optx, insn->op1, insn->op2, insn->op3);
403 insn = &ctx->ir_base[ref];
404 /* Depending on the flags in IR_OPS, these can be references or data. */
405 if (insn->op1 > 0 && insn->inputs_count >= 1) ir_use_list_add(ctx, insn->op1, clone);
406 if (insn->op2 > 0 && insn->inputs_count >= 2) ir_use_list_add(ctx, insn->op2, clone);
407 if (insn->op3 > 0 && insn->inputs_count >= 3) ir_use_list_add(ctx, insn->op3, clone);
408 }
409
410 /* Reconstruct IR: Update DEF->USE lists, CFG mapping and etc */
411 ctx->use_lists = ir_mem_realloc(ctx->use_lists, ctx->insns_count * sizeof(ir_use_list));
412 ctx->cfg_map = ir_mem_realloc(ctx->cfg_map, ctx->insns_count * sizeof(uint32_t));
413 n = ctx->use_lists[ref].refs;
414 for (i = 0; i < clones_count; i++) {
415 clone = clones[i].ref;
416 if (clones[i].use_count == 1
417 && ctx->cfg_blocks[clones[i].block].loop_depth >= ctx->cfg_blocks[uses[clones[i].use].block].loop_depth) {
418 /* TOTALLY_USEFUL block may be a head of a diamond above the real usage.
419 * Sink it down to the real usage block.
420 * Clones with few uses will be sunk into the LCA block.
421 */
422 clones[i].block = uses[clones[i].use].block;
423 }
424 ctx->cfg_map[clone] = clones[i].block;
425 ctx->use_lists[clone].count = clones[i].use_count;
426 ctx->use_lists[clone].refs = n;
427
428 uint32_t u = clones[i].use;
429 while (u != (uint32_t)-1) {
430 use = uses[u].ref;
431 ctx->use_edges[n++] = use;
432 u = uses[u].next;
433 if (i > 0) {
434 /* replace inputs */
435 ir_insn *insn = &ctx->ir_base[use];
436 ir_ref k, l = insn->inputs_count;
437
438 if (insn->op == IR_PHI) {
439 for (k = 1; k <= l; k++) {
440 if (ir_insn_op(insn, k) == ref) {
441 j = ctx->cfg_map[ir_insn_op(&ctx->ir_base[insn->op1], k - 1)];
442 if (j != clones[i].block) {
443 uint32_t dom_depth = ctx->cfg_blocks[clones[i].block].dom_depth;
444 while (ctx->cfg_blocks[j].dom_depth > dom_depth) {
445 j = ctx->cfg_blocks[j].dom_parent;
446 }
447 if (j != clones[i].block) {
448 continue;
449 }
450 }
451 ir_insn_set_op(insn, k, clone);
452 break;
453 }
454 }
455 } else {
456 for (k = 1; k <= l; k++) {
457 if (ir_insn_op(insn, k) == ref) {
458 ir_insn_set_op(insn, k, clone);
459 break;
460 }
461 }
462 }
463 }
464 }
465 }
466
467 ir_mem_free(uses);
468 ir_mem_free(clones);
470
471#ifdef IR_DEBUG
472 if (ctx->flags & IR_DEBUG_GCM_SPLIT) {
473 ir_check(ctx);
474 }
475#endif
476
477 return 1;
478}
479#endif
480
481#ifdef IR_DEBUG
482static bool ir_gcm_dominates(ir_ctx *ctx, uint32_t b1, uint32_t b2)
483{
484 uint32_t b1_depth = ctx->cfg_blocks[b1].dom_depth;
485 const ir_block *bb2 = &ctx->cfg_blocks[b2];
486
487 while (bb2->dom_depth > b1_depth) {
488 b2 = bb2->dom_parent;
489 bb2 = &ctx->cfg_blocks[b2];
490 }
491 return b1 == b2;
492}
493#endif
494
495static void ir_gcm_schedule_late(ir_ctx *ctx, ir_ref ref, uint32_t b)
496{
497 ir_ref n, use;
498 uint32_t lca = 0;
499
500 IR_ASSERT(ctx->ir_base[ref].op != IR_PARAM && ctx->ir_base[ref].op != IR_VAR);
501 IR_ASSERT(ctx->ir_base[ref].op != IR_PHI && ctx->ir_base[ref].op != IR_PI);
502
504 b = IR_GCM_EARLY_BLOCK(b);
505 ctx->cfg_map[ref] = b;
506
507 for (n = 0; n < ctx->use_lists[ref].count; n++) {
508 use = ctx->use_edges[ctx->use_lists[ref].refs + n];
509 b = ctx->cfg_map[use];
511 ir_gcm_schedule_late(ctx, use, b);
512 b = ctx->cfg_map[use];
513 IR_ASSERT(b != 0);
514 } else if (!b) {
515 continue;
516 } else if (ctx->ir_base[use].op == IR_PHI) {
517 ir_insn *insn = &ctx->ir_base[use];
518 ir_ref *p = insn->ops + 2; /* PHI data inputs */
519 ir_ref *q = ctx->ir_base[insn->op1].ops + 1; /* MERGE inputs */
520 ir_ref n = insn->inputs_count - 1;
521
522 for (;n > 0; p++, q++, n--) {
523 if (*p == ref) {
524 b = ctx->cfg_map[*q];
525 lca = !lca ? b : ir_gcm_find_lca(ctx, lca, b);
526 }
527 }
528 continue;
529 }
530 lca = !lca ? b : ir_gcm_find_lca(ctx, lca, b);
531 }
532
533 IR_ASSERT(lca != 0 && "No Common Ancestor");
534 IR_ASSERT(ir_gcm_dominates(ctx, ctx->cfg_map[ref], lca) && "Early placement doesn't dominate the late");
535
536#if IR_GCM_SPLIT
537 if (ctx->use_lists[ref].count > 1
538 && ir_split_partially_dead_node(ctx, ref, lca)) {
539 return;
540 }
541#endif
542
543 if (lca != ctx->cfg_map[ref]) {
544 b = ir_gcm_select_best_block(ctx, ref, lca);
545
546 ctx->cfg_map[ref] = b;
547
548 /* OVERFLOW is a projection of ADD/SUB/MUL_OV and must be scheduled into the same block */
549 if (ctx->ir_base[ref].op >= IR_ADD_OV && ctx->ir_base[ref].op <= IR_MUL_OV) {
550 ir_use_list *use_list = &ctx->use_lists[ref];
551 ir_ref n, *p, use;
552
553 for (n = use_list->count, p = &ctx->use_edges[use_list->refs]; n < 0; p++, n--) {
554 use = *p;
555 if (ctx->ir_base[use].op == IR_OVERFLOW) {
556 ctx->cfg_map[use] = b;
557 break;
558 }
559 }
560 }
561 }
562}
563
564int ir_gcm(ir_ctx *ctx)
565{
566 ir_ref k, n, *p, ref;
567 ir_block *bb;
568 ir_list queue_early;
569 ir_list queue_late;
570 uint32_t *_blocks, b;
571 ir_insn *insn, *use_insn;
572 ir_use_list *use_list;
573
574 IR_ASSERT(ctx->cfg_map);
575 _blocks = ctx->cfg_map;
576
577 ir_list_init(&queue_early, ctx->insns_count);
578
579 if (ctx->cfg_blocks_count == 1) {
580 ref = ctx->cfg_blocks[1].end;
581 do {
582 insn = &ctx->ir_base[ref];
583 _blocks[ref] = 1; /* pin to block */
584 if (insn->inputs_count > 1) {
585 /* insn has input data edges */
586 ir_list_push_unchecked(&queue_early, ref);
587 }
588 ref = insn->op1; /* control predecessor */
589 } while (ref != 1); /* IR_START */
590 _blocks[1] = 1; /* pin to block */
591
592 use_list = &ctx->use_lists[1];
593 n = use_list->count;
594 for (p = &ctx->use_edges[use_list->refs]; n > 0; n--, p++) {
595 ref = *p;
596 use_insn = &ctx->ir_base[ref];
597 if (use_insn->op == IR_PARAM || use_insn->op == IR_VAR) {
598 ctx->cfg_blocks[1].flags |= (use_insn->op == IR_PARAM) ? IR_BB_HAS_PARAM : IR_BB_HAS_VAR;
599 _blocks[ref] = 1; /* pin to block */
600 }
601 }
602
603 /* Place all live nodes to the first block */
604 while (ir_list_len(&queue_early)) {
605 ref = ir_list_pop(&queue_early);
606 insn = &ctx->ir_base[ref];
607 n = insn->inputs_count;
608 for (p = insn->ops + 1; n > 0; p++, n--) {
609 ref = *p;
610 if (ref > 0 && _blocks[ref] == 0) {
611 _blocks[ref] = 1;
612 ir_list_push_unchecked(&queue_early, ref);
613 }
614 }
615 }
616
617 ir_list_free(&queue_early);
618
619 return 1;
620 }
621
622 ir_list_init(&queue_late, ctx->insns_count);
623
624 /* pin and collect control and control depended (PARAM, VAR, PHI, PI) instructions */
625 b = ctx->cfg_blocks_count;
626 for (bb = ctx->cfg_blocks + b; b > 0; bb--, b--) {
628 ref = bb->end;
629
630 /* process the last instruction of the block */
631 insn = &ctx->ir_base[ref];
632 _blocks[ref] = b; /* pin to block */
633 if (insn->inputs_count > 1) {
634 /* insn has input data edges */
635 ir_list_push_unchecked(&queue_early, ref);
636 }
637 ref = insn->op1; /* control predecessor */
638
639 while (ref != bb->start) {
640 insn = &ctx->ir_base[ref];
641 _blocks[ref] = b; /* pin to block */
642 if (insn->inputs_count > 1) {
643 /* insn has input data edges */
644 ir_list_push_unchecked(&queue_early, ref);
645 }
646 if (insn->type != IR_VOID) {
648 }
649 ref = insn->op1; /* control predecessor */
650 }
651
652 /* process the first instruction of the block */
653 _blocks[ref] = b; /* pin to block */
654
655 use_list = &ctx->use_lists[ref];
656 n = use_list->count;
657 if (n > 1) {
658 for (p = &ctx->use_edges[use_list->refs]; n > 0; n--, p++) {
659 ref = *p;
660 use_insn = &ctx->ir_base[ref];
661 if (use_insn->op == IR_PHI || use_insn->op == IR_PI) {
662 bb->flags |= (use_insn->op == IR_PHI) ? IR_BB_HAS_PHI : IR_BB_HAS_PI;
663 if (EXPECTED(ctx->use_lists[ref].count != 0)) {
664 _blocks[ref] = b; /* pin to block */
665 ir_list_push_unchecked(&queue_early, ref);
666 }
667 } else if (use_insn->op == IR_PARAM) {
668 bb->flags |= IR_BB_HAS_PARAM;
669 _blocks[ref] = b; /* pin to block */
670 } else if (use_insn->op == IR_VAR) {
671 bb->flags |= IR_BB_HAS_VAR;
672 _blocks[ref] = b; /* pin to block */
673 }
674 }
675 }
676 }
677
678 n = ir_list_len(&queue_early);
679 while (n > 0) {
680 n--;
681 ref = ir_list_at(&queue_early, n);
682 insn = &ctx->ir_base[ref];
683 k = insn->inputs_count - 1;
684 for (p = insn->ops + 2; k > 0; p++, k--) {
685 ref = *p;
686 if (ref > 0 && _blocks[ref] == 0) {
687 ir_gcm_schedule_early(ctx, ref, &queue_late);
688 }
689 }
690 }
691
692#ifdef IR_DEBUG
693 if (ctx->flags & IR_DEBUG_GCM) {
694 fprintf(stderr, "GCM Schedule Early\n");
695 for (n = 1; n < ctx->insns_count; n++) {
696 fprintf(stderr, "%d -> %d\n", n, ctx->cfg_map[n]);
697 }
698 }
699#endif
700
701#if IR_GCM_SPLIT
703
704 ir_sparse_set_init(&data.totally_useful, ctx->cfg_blocks_count + 1);
705 ir_list_init(&data.worklist, ctx->cfg_blocks_count + 1);
706 ctx->data = &data;
707#endif
708
709 n = ir_list_len(&queue_late);
710 while (n > 0) {
711 n--;
712 ref = ir_list_at(&queue_late, n);
713 b = ctx->cfg_map[ref];
715 ir_gcm_schedule_late(ctx, ref, b);
716 }
717 }
718
719#if IR_GCM_SPLIT
720 ir_list_free(&data.worklist);
721 ir_sparse_set_free(&data.totally_useful);
722 ctx->data = NULL;
723#endif
724
725 ir_list_free(&queue_early);
726 ir_list_free(&queue_late);
727
728#ifdef IR_DEBUG
729 if (ctx->flags & IR_DEBUG_GCM) {
730 fprintf(stderr, "GCM Schedule Late\n");
731 for (n = 1; n < ctx->insns_count; n++) {
732 fprintf(stderr, "%d -> %d\n", n, ctx->cfg_map[n]);
733 }
734 }
735#endif
736
737 return 1;
738}
739
740static void ir_xlat_binding(ir_ctx *ctx, ir_ref *_xlat)
741{
742 uint32_t n1, n2, pos;
743 ir_ref key;
744 ir_hashtab_bucket *b1, *b2;
745 ir_hashtab *binding = ctx->binding;
746 uint32_t hash_size = (uint32_t)(-(int32_t)binding->mask);
747
748 memset((char*)binding->data - (hash_size * sizeof(uint32_t)), -1, hash_size * sizeof(uint32_t));
749 n1 = binding->count;
750 n2 = 0;
751 pos = 0;
752 b1 = binding->data;
753 b2 = binding->data;
754 while (n1 > 0) {
755 key = b1->key;
756 IR_ASSERT(key < ctx->insns_count);
757 if (_xlat[key]) {
758 key = _xlat[key];
759 b2->key = key;
760 if (b1->val > 0) {
761 IR_ASSERT(_xlat[b1->val]);
762 b2->val = _xlat[b1->val];
763 } else {
764 b2->val = b1->val;
765 }
766 key |= binding->mask;
767 b2->next = ((uint32_t*)binding->data)[key];
768 ((uint32_t*)binding->data)[key] = pos;
769 pos += sizeof(ir_hashtab_bucket);
770 b2++;
771 n2++;
772 }
773 b1++;
774 n1--;
775 }
776 binding->count = n2;
777}
778
780{
781 if (!_xlat[ref]) {
782 _xlat[ref] = ref; /* this is only a "used constant" marker */
783 return 1;
784 }
785 return 0;
786}
787
789{
790 ir_ctx new_ctx;
791 ir_ref i, j, k, n, *p, *q, ref, new_ref, prev_ref, insns_count, consts_count, use_edges_count;
792 ir_ref *_xlat;
793 ir_ref *edges;
794 ir_ref prev_b_end;
795 uint32_t b, prev_b;
796 uint32_t *_blocks = ctx->cfg_map;
797 ir_ref *_next = ir_mem_malloc(ctx->insns_count * sizeof(ir_ref));
798 ir_ref *_prev = ir_mem_malloc(ctx->insns_count * sizeof(ir_ref));
799 ir_ref _move_down = 0;
800 ir_block *bb;
801 ir_insn *insn, *new_insn;
802 ir_use_list *lists, *use_list, *new_list;
803
804 /* Create a double-linked list of nodes ordered by BB, respecting BB->start and BB->end */
805 IR_ASSERT(_blocks[1] == 1);
806 prev_b = 1;
807 prev_b_end = ctx->cfg_blocks[1].end;
808 _prev[1] = 0;
809 _prev[prev_b_end] = 0;
810 for (i = 2, j = 1; i < ctx->insns_count; i++) {
811 b = _blocks[i];
812 IR_ASSERT((int32_t)b >= 0);
813 if (b == prev_b && i <= prev_b_end) {
814 /* add to the end of the list */
815 _next[j] = i;
816 _prev[i] = j;
817 j = i;
818 } else if (b > prev_b) {
819 bb = &ctx->cfg_blocks[b];
820 if (i == bb->start) {
821 IR_ASSERT(bb->end > bb->start);
822 prev_b = b;
823 prev_b_end = bb->end;
824 _prev[bb->end] = 0;
825 /* add to the end of the list */
826 _next[j] = i;
827 _prev[i] = j;
828 j = i;
829 } else {
830 IR_ASSERT(i != bb->end);
831 /* move down late (see the following loop) */
832 _next[i] = _move_down;
833 _move_down = i;
834 }
835 } else if (b) {
836 bb = &ctx->cfg_blocks[b];
837 IR_ASSERT(i != bb->start);
838 if (_prev[bb->end]) {
839 /* move up, insert before the end of the already scheduled BB */
840 k = bb->end;
841 } else {
842 /* move up, insert at the end of the block */
843 k = ctx->cfg_blocks[b + 1].start;
844 }
845 /* insert before "k" */
846 _prev[i] = _prev[k];
847 _next[i] = k;
848 _next[_prev[k]] = i;
849 _prev[k] = i;
850 }
851 }
852 _next[j] = 0;
853
854 while (_move_down) {
855 i = _move_down;
856 _move_down = _next[i];
857 b = _blocks[i];
858 bb = &ctx->cfg_blocks[b];
859 k = _next[bb->start];
860
862 /* insert after the start of the block and all PARAM, VAR, PI, PHI */
863 insn = &ctx->ir_base[k];
864 while (insn->op == IR_PHI || insn->op == IR_PARAM || insn->op == IR_VAR || insn->op == IR_PI) {
865 k = _next[k];
866 insn = &ctx->ir_base[k];
867 }
868 }
869
870 /* insert before "k" */
871 _prev[i] = _prev[k];
872 _next[i] = k;
873 _next[_prev[k]] = i;
874 _prev[k] = i;
875 }
876
877#ifdef IR_DEBUG
878 if (ctx->flags & IR_DEBUG_SCHEDULE) {
879 fprintf(stderr, "Before Schedule\n");
880 for (i = 1; i != 0; i = _next[i]) {
881 fprintf(stderr, "%d -> %d\n", i, _blocks[i]);
882 }
883 }
884#endif
885
886 _xlat = ir_mem_calloc((ctx->consts_count + ctx->insns_count), sizeof(ir_ref));
887 _xlat += ctx->consts_count;
888 _xlat[IR_TRUE] = IR_TRUE;
889 _xlat[IR_FALSE] = IR_FALSE;
890 _xlat[IR_NULL] = IR_NULL;
891 _xlat[IR_UNUSED] = IR_UNUSED;
892 insns_count = 1;
893 consts_count = -(IR_TRUE - 1);
894
895 /* Topological sort according dependencies inside each basic block */
896 for (b = 1, bb = ctx->cfg_blocks + 1; b <= ctx->cfg_blocks_count; b++, bb++) {
898
900 /* Schedule BB start */
901 start = i = bb->start;
902 _xlat[i] = bb->start = insns_count;
903 insn = &ctx->ir_base[i];
904 if (insn->op == IR_CASE_VAL) {
905 IR_ASSERT(insn->op2 < IR_TRUE);
906 consts_count += ir_count_constant(_xlat, insn->op2);
907 }
908 n = insn->inputs_count;
909 insns_count += ir_insn_inputs_to_len(n);
910 i = _next[i];
911 insn = &ctx->ir_base[i];
913 int count = 0;
914
915 /* Schedule PARAM, VAR, PI */
916 while (insn->op == IR_PARAM || insn->op == IR_VAR || insn->op == IR_PI) {
917 _xlat[i] = insns_count;
918 insns_count += 1;
919 i = _next[i];
920 insn = &ctx->ir_base[i];
921 count++;
922 }
923 /* Schedule PHIs */
924 while (insn->op == IR_PHI) {
925 ir_ref j, *p, input;
926
927 _xlat[i] = insns_count;
928 /* Reuse "n" from MERGE and skip first input */
929 insns_count += ir_insn_inputs_to_len(n + 1);
930 for (j = n, p = insn->ops + 2; j > 0; p++, j--) {
931 input = *p;
932 if (input < IR_TRUE) {
933 consts_count += ir_count_constant(_xlat, input);
934 }
935 }
936 i = _next[i];
937 insn = &ctx->ir_base[i];
938 count++;
939 }
940 /* Schedule remaining PHIs */
941 if (UNEXPECTED(count < ctx->use_lists[start].count - 1)) {
942 ir_use_list *use_list = &ctx->use_lists[start];
943 ir_ref *p, count = use_list->count;
944 ir_ref phis = _prev[i];
945
946 for (p = &ctx->use_edges[use_list->refs]; count > 0; p++, count--) {
947 ir_ref use = *p;
948 ir_insn *use_insn = &ctx->ir_base[use];
949 if (!_xlat[use] && (_blocks[use] || use_insn->op == IR_PARAM)) {
950 IR_ASSERT(_blocks[use] == b || use_insn->op == IR_PARAM);
951 if (use_insn->op == IR_PARAM
952 || use_insn->op == IR_VAR
953 || use_insn->op == IR_PI
954 || use_insn->op == IR_PHI) {
955 if (_prev[use] != phis) {
956 /* remove "use" */
957 _prev[_next[use]] = _prev[use];
958 _next[_prev[use]] = _next[use];
959 /* insert "use" after "phis" */
960 _prev[use] = phis;
961 _next[use] = _next[phis];
962 _prev[_next[phis]] = use;
963 _next[phis] = use;
964 }
965 phis = use;
966 _xlat[use] = insns_count;
967 if (use_insn->op == IR_PHI) {
968 ir_ref *q;
969 /* Reuse "n" from MERGE and skip first input */
970 insns_count += ir_insn_inputs_to_len(n + 1);
971 for (j = n, q = use_insn->ops + 2; j > 0; q++, j--) {
972 ir_ref input = *q;
973 if (input < IR_TRUE) {
974 consts_count += ir_count_constant(_xlat, input);
975 }
976 }
977 } else {
978 insns_count += 1;
979 }
980 }
981 }
982 }
983 i = _next[phis];
984 insn = &ctx->ir_base[i];
985 }
986 }
987 if (bb->successors_count > 1) {
988 ir_ref input, j = bb->end;
989 ir_insn *end = &ctx->ir_base[j];
990
991 if (end->op == IR_IF) {
992 /* Move condition closer to IF */
993 input = end->op2;
994 if (input > 0 && _blocks[input] == b && !_xlat[input] && _prev[j] != input) {
995 if (input == i) {
996 i = _next[i];
997 insn = &ctx->ir_base[i];
998 }
999 /* remove "input" */
1000 _prev[_next[input]] = _prev[input];
1001 _next[_prev[input]] = _next[input];
1002 /* insert before "j" */
1003 _prev[input] = _prev[j];
1004 _next[input] = j;
1005 _next[_prev[j]] = input;
1006 _prev[j] = input;
1007 }
1008 }
1009 }
1010 while (i != bb->end) {
1011 ir_ref n, j, *p, input;
1012
1013restart:
1014 n = insn->inputs_count;
1015 for (j = n, p = insn->ops + 1; j > 0; p++, j--) {
1016 input = *p;
1017 if (!_xlat[input]) {
1018 /* input is not scheduled yet */
1019 if (input > 0) {
1020 if (_blocks[input] == b) {
1021 /* "input" should be before "i" to satisfy dependency */
1022#ifdef IR_DEBUG
1023 if (ctx->flags & IR_DEBUG_SCHEDULE) {
1024 fprintf(stderr, "Wrong dependency %d:%d -> %d\n", b, input, i);
1025 }
1026#endif
1027 /* remove "input" */
1028 _prev[_next[input]] = _prev[input];
1029 _next[_prev[input]] = _next[input];
1030 /* insert before "i" */
1031 _prev[input] = _prev[i];
1032 _next[input] = i;
1033 _next[_prev[i]] = input;
1034 _prev[i] = input;
1035 /* restart from "input" */
1036 i = input;
1037 insn = &ctx->ir_base[i];
1038 goto restart;
1039 }
1040 } else if (input < IR_TRUE) {
1041 consts_count += ir_count_constant(_xlat, input);
1042 }
1043 }
1044 }
1045 _xlat[i] = insns_count;
1046 insns_count += ir_insn_inputs_to_len(n);
1047 i = _next[i];
1048 insn = &ctx->ir_base[i];
1049 }
1050 /* Schedule BB end */
1051 _xlat[i] = bb->end = insns_count;
1052 insns_count++;
1053 if (IR_INPUT_EDGES_COUNT(ir_op_flags[insn->op]) == 2) {
1054 if (insn->op2 < IR_TRUE) {
1055 consts_count += ir_count_constant(_xlat, insn->op2);
1056 }
1057 }
1058 }
1059
1060#ifdef IR_DEBUG
1061 if (ctx->flags & IR_DEBUG_SCHEDULE) {
1062 fprintf(stderr, "After Schedule\n");
1063 for (i = 1; i != 0; i = _next[i]) {
1064 fprintf(stderr, "%d -> %d (%d)\n", i, _blocks[i], _xlat[i]);
1065 }
1066 }
1067#endif
1068
1069#if 1
1070 /* Check if scheduling didn't make any modifications */
1071 if (consts_count == ctx->consts_count && insns_count == ctx->insns_count) {
1072 bool changed = 0;
1073
1074 for (i = 1; i != 0; i = _next[i]) {
1075 if (_xlat[i] != i) {
1076 changed = 1;
1077 break;
1078 }
1079 }
1080 if (!changed) {
1081 _xlat -= ctx->consts_count;
1082 ir_mem_free(_xlat);
1083 ir_mem_free(_next);
1084
1085 ctx->prev_ref = _prev;
1086 ctx->flags2 |= IR_LINEAR;
1087 ir_truncate(ctx);
1088
1089 return 1;
1090 }
1091 }
1092#endif
1093
1094 ir_mem_free(_prev);
1095
1096 ir_init(&new_ctx, ctx->flags, consts_count, insns_count);
1097 new_ctx.insns_count = insns_count;
1098 new_ctx.flags2 = ctx->flags2;
1099 new_ctx.ret_type = ctx->ret_type;
1100 new_ctx.mflags = ctx->mflags;
1101 new_ctx.spill_base = ctx->spill_base;
1105 new_ctx.fixed_regset = ctx->fixed_regset;
1106 new_ctx.fixed_save_regset = ctx->fixed_save_regset;
1107 new_ctx.entries_count = ctx->entries_count;
1108#if defined(IR_TARGET_AARCH64)
1109 new_ctx.deoptimization_exits = ctx->deoptimization_exits;
1110 new_ctx.get_exit_addr = ctx->get_exit_addr;
1111 new_ctx.get_veneer = ctx->get_veneer;
1112 new_ctx.set_veneer = ctx->set_veneer;
1113#endif
1114 new_ctx.loader = ctx->loader;
1115
1116 /* Copy constants */
1117 if (consts_count == ctx->consts_count) {
1118 new_ctx.consts_count = consts_count;
1119 ref = 1 - consts_count;
1120 insn = &ctx->ir_base[ref];
1121 new_insn = &new_ctx.ir_base[ref];
1122
1123 memcpy(new_insn, insn, sizeof(ir_insn) * (IR_TRUE - ref));
1124 if (ctx->strtab.data) {
1125 while (ref != IR_TRUE) {
1126 if (new_insn->op == IR_FUNC_ADDR) {
1127 if (new_insn->proto) {
1128 size_t len;
1129 const char *proto = ir_get_strl(ctx, new_insn->proto, &len);
1130 new_insn->proto = ir_strl(&new_ctx, proto, len);
1131 }
1132 } else if (new_insn->op == IR_FUNC) {
1133 new_insn->val.u64 = ir_str(&new_ctx, ir_get_str(ctx, new_insn->val.name));
1134 if (new_insn->proto) {
1135 size_t len;
1136 const char *proto = ir_get_strl(ctx, new_insn->proto, &len);
1137 new_insn->proto = ir_strl(&new_ctx, proto, len);
1138 }
1139 } else if (new_insn->op == IR_SYM || new_insn->op == IR_STR) {
1140 new_insn->val.u64 = ir_str(&new_ctx, ir_get_str(ctx, new_insn->val.name));
1141 }
1142 new_insn++;
1143 ref++;
1144 }
1145 }
1146 } else {
1147 new_ref = -new_ctx.consts_count;
1148 new_insn = &new_ctx.ir_base[new_ref];
1149 for (ref = IR_TRUE - 1, insn = &ctx->ir_base[ref]; ref > -ctx->consts_count; insn--, ref--) {
1150 if (!_xlat[ref]) {
1151 continue;
1152 }
1153 new_insn->optx = insn->optx;
1154 new_insn->prev_const = 0;
1155 if (insn->op == IR_FUNC_ADDR) {
1156 new_insn->val.u64 = insn->val.u64;
1157 if (insn->proto) {
1158 size_t len;
1159 const char *proto = ir_get_strl(ctx, insn->proto, &len);
1160 new_insn->proto = ir_strl(&new_ctx, proto, len);
1161 } else {
1162 new_insn->proto = 0;
1163 }
1164 } else if (insn->op == IR_FUNC) {
1165 new_insn->val.u64 = ir_str(&new_ctx, ir_get_str(ctx, insn->val.name));
1166 if (insn->proto) {
1167 size_t len;
1168 const char *proto = ir_get_strl(ctx, insn->proto, &len);
1169 new_insn->proto = ir_strl(&new_ctx, proto, len);
1170 } else {
1171 new_insn->proto = 0;
1172 }
1173 } else if (insn->op == IR_SYM || insn->op == IR_STR) {
1174 new_insn->val.u64 = ir_str(&new_ctx, ir_get_str(ctx, insn->val.name));
1175 } else {
1176 new_insn->val.u64 = insn->val.u64;
1177 }
1178 _xlat[ref] = new_ref;
1179 new_ref--;
1180 new_insn--;
1181 }
1182 new_ctx.consts_count = -new_ref;
1183 }
1184
1185 new_ctx.cfg_map = ir_mem_calloc(ctx->insns_count, sizeof(uint32_t));
1186 new_ctx.prev_ref = _prev = ir_mem_malloc(insns_count * sizeof(ir_ref));
1187 new_ctx.use_lists = lists = ir_mem_malloc(insns_count * sizeof(ir_use_list));
1188 new_ctx.use_edges = edges = ir_mem_malloc(ctx->use_edges_count * sizeof(ir_ref));
1189
1190 /* Copy instructions, use lists and use edges */
1191 prev_ref = 0;
1192 use_edges_count = 0;
1193 for (i = 1; i != 0; i = _next[i]) {
1194 new_ref = _xlat[i];
1195 new_ctx.cfg_map[new_ref] = _blocks[i];
1196 _prev[new_ref] = prev_ref;
1197 prev_ref = new_ref;
1198
1199 use_list = &ctx->use_lists[i];
1200 n = use_list->count;
1201 k = 0;
1202 if (n == 1) {
1203 ref = ctx->use_edges[use_list->refs];
1204 if (_xlat[ref]) {
1205 *edges = _xlat[ref];
1206 edges++;
1207 k = 1;
1208 }
1209 } else {
1210 p = &ctx->use_edges[use_list->refs];
1211 while (n--) {
1212 ref = *p;
1213 if (_xlat[ref]) {
1214 *edges = _xlat[ref];
1215 edges++;
1216 k++;
1217 }
1218 p++;
1219 }
1220 }
1221 new_list = &lists[new_ref];
1222 new_list->refs = use_edges_count;
1223 use_edges_count += k;
1224 new_list->count = k;
1225
1226 insn = &ctx->ir_base[i];
1227 new_insn = &new_ctx.ir_base[new_ref];
1228
1229 new_insn->optx = insn->optx;
1230 n = new_insn->inputs_count;
1231 switch (n) {
1232 case 0:
1233 new_insn->op1 = insn->op1;
1234 new_insn->op2 = insn->op2;
1235 new_insn->op3 = insn->op3;
1236 break;
1237 case 1:
1238 new_insn->op1 = _xlat[insn->op1];
1239 if (new_insn->op == IR_PARAM || insn->op == IR_VAR) {
1240 new_insn->op2 = ir_str(&new_ctx, ir_get_str(ctx, insn->op2));
1241 } else if (new_insn->op == IR_PROTO) {
1242 size_t len;
1243 const char *proto = ir_get_strl(ctx, insn->op2, &len);
1244 new_insn->op2 = ir_strl(&new_ctx, proto, len);
1245 } else {
1246 new_insn->op2 = insn->op2;
1247 }
1248 new_insn->op3 = insn->op3;
1249 break;
1250 case 2:
1251 new_insn->op1 = _xlat[insn->op1];
1252 new_insn->op2 = _xlat[insn->op2];
1253 new_insn->op3 = insn->op3;
1254#if IR_SCHEDULE_SWAP_OPS
1255 /* Swap operands according to folding rules */
1256 if (new_insn->op1 < new_insn->op2) {
1257 switch (new_insn->op) {
1258 case IR_EQ:
1259 case IR_NE:
1260 case IR_ADD:
1261 case IR_MUL:
1262 case IR_ADD_OV:
1263 case IR_MUL_OV:
1264 case IR_OR:
1265 case IR_AND:
1266 case IR_XOR:
1267 case IR_MIN:
1268 case IR_MAX:
1269 SWAP_REFS(new_insn->op1, new_insn->op2);
1270 break;
1271 case IR_LT:
1272 case IR_GE:
1273 case IR_LE:
1274 case IR_GT:
1275 case IR_ULT:
1276 case IR_UGE:
1277 case IR_ULE:
1278 case IR_UGT:
1279 SWAP_REFS(new_insn->op1, new_insn->op2);
1280 new_insn->op ^= 3; /* [U]LT <-> [U]GT, [U]LE <-> [U]GE */
1281 break;
1282 }
1283 }
1284#endif
1285 break;
1286 case 3:
1287 new_insn->op1 = _xlat[insn->op1];
1288 new_insn->op2 = _xlat[insn->op2];
1289 new_insn->op3 = _xlat[insn->op3];
1290 break;
1291 default:
1292 for (j = n, p = insn->ops + 1, q = new_insn->ops + 1; j > 0; p++, q++, j--) {
1293 *q = _xlat[*p];
1294 }
1295 break;
1296 }
1297 }
1298
1299 /* Update list of terminators (IR_OPND_CONTROL_REF) */
1300 insn = &new_ctx.ir_base[1];
1301 ref = insn->op1;
1302 if (ref) {
1303 insn->op1 = ref = _xlat[ref];
1304 while (1) {
1305 insn = &new_ctx.ir_base[ref];
1306 ref = insn->op3;
1307 if (!ref) {
1308 break;
1309 }
1310 insn->op3 = ref = _xlat[ref];
1311 }
1312 }
1313
1314 IR_ASSERT(ctx->use_edges_count >= use_edges_count);
1315 new_ctx.use_edges_count = use_edges_count;
1316 new_ctx.use_edges = ir_mem_realloc(new_ctx.use_edges, use_edges_count * sizeof(ir_ref));
1317
1318 if (ctx->binding) {
1319 ir_xlat_binding(ctx, _xlat);
1320 new_ctx.binding = ctx->binding;
1321 ctx->binding = NULL;
1322 }
1323
1324 _xlat -= ctx->consts_count;
1325 ir_mem_free(_xlat);
1326
1327 new_ctx.cfg_blocks_count = ctx->cfg_blocks_count;
1328 new_ctx.cfg_edges_count = ctx->cfg_edges_count;
1329 new_ctx.cfg_blocks = ctx->cfg_blocks;
1330 new_ctx.cfg_edges = ctx->cfg_edges;
1331 ctx->cfg_blocks = NULL;
1332 ctx->cfg_edges = NULL;
1333 ir_code_buffer *saved_code_buffer = ctx->code_buffer;
1334
1335 ir_free(ctx);
1336 IR_ASSERT(new_ctx.consts_count == new_ctx.consts_limit);
1337 IR_ASSERT(new_ctx.insns_count == new_ctx.insns_limit);
1338 memcpy(ctx, &new_ctx, sizeof(ir_ctx));
1339 ctx->code_buffer = saved_code_buffer;
1340 ctx->flags2 |= IR_LINEAR;
1341
1342 ir_mem_free(_next);
1343
1344 return 1;
1345}
1346
1348{
1349 uint32_t b;
1350 ir_block *bb;
1351 ir_ref i, n, prev;
1352 ir_insn *insn;
1353
1354 ctx->prev_ref = ir_mem_malloc(ctx->insns_count * sizeof(ir_ref));
1355 prev = 0;
1356 for (b = 1, bb = ctx->cfg_blocks + b; b <= ctx->cfg_blocks_count; b++, bb++) {
1358 for (i = bb->start, insn = ctx->ir_base + i; i < bb->end;) {
1359 ctx->prev_ref[i] = prev;
1360 n = ir_insn_len(insn);
1361 prev = i;
1362 i += n;
1363 insn += n;
1364 }
1365 ctx->prev_ref[i] = prev;
1366 }
1367}
size_t len
Definition apprentice.c:174
fprintf($stream, string $format, mixed ... $values)
prev(array|object &$array)
count(Countable|array $value, int $mode=COUNT_NORMAL)
uint32_t u
Definition cdf.c:78
zend_long n
Definition ffi.c:4979
memcpy(ptr1, ptr2, size)
memset(ptr, 0, type->size)
buf start
Definition ffi.c:4687
#define NULL
Definition gdcache.h:45
hash(string $algo, string $data, bool $binary=false, array $options=[])
Definition hash.stub.php:12
again j
const char * ir_get_strl(const ir_ctx *ctx, ir_ref idx, size_t *len)
Definition ir.c:715
bool ir_use_list_add(ir_ctx *ctx, ir_ref to, ir_ref ref)
Definition ir.c:1378
ir_ref ir_binding_find(const ir_ctx *ctx, ir_ref ref)
Definition ir.c:1161
void ir_free(ir_ctx *ctx)
Definition ir.c:412
ir_ref ir_str(ir_ctx *ctx, const char *s)
Definition ir.c:688
void ir_truncate(ir_ctx *ctx)
Definition ir.c:370
void ir_hashtab_init(ir_hashtab *tab, uint32_t size)
Definition ir.c:1580
const char * ir_get_str(const ir_ctx *ctx, ir_ref idx)
Definition ir.c:709
ir_ref ir_emit(ir_ctx *ctx, uint32_t opt, ir_ref op1, ir_ref op2, ir_ref op3)
Definition ir.c:811
ir_ref ir_strl(ir_ctx *ctx, const char *s, size_t len)
Definition ir.c:700
const uint32_t ir_op_flags[IR_LAST_OP]
Definition ir.c:294
void ir_init(ir_ctx *ctx, uint32_t flags, ir_ref consts_limit, ir_ref insns_limit)
Definition ir.c:381
void ir_hashtab_free(ir_hashtab *tab)
Definition ir.c:1593
ir_ref ir_hashtab_find(const ir_hashtab *tab, uint32_t key)
Definition ir.c:1601
bool ir_hashtab_add(ir_hashtab *tab, uint32_t key, ir_ref val)
Definition ir.c:1617
#define IR_TRUE
Definition ir.h:398
struct _ir_hashtab ir_hashtab
Definition ir.h:482
#define IR_UNUSED
Definition ir.h:395
@ IR_VOID
Definition ir.h:151
#define IR_NULL
Definition ir.h:396
#define ir_mem_calloc
Definition ir.h:1009
int32_t ir_ref
Definition ir.h:390
IR_ALWAYS_INLINE ir_ref ir_insn_op(const ir_insn *insn, int32_t n)
Definition ir.h:727
#define ir_mem_malloc
Definition ir.h:1006
IR_ALWAYS_INLINE void ir_insn_set_op(ir_insn *insn, int32_t n, ir_ref val)
Definition ir.h:733
bool ir_check(const ir_ctx *ctx)
Definition ir_check.c:89
#define IR_FALSE
Definition ir.h:397
#define ir_mem_realloc
Definition ir.h:1012
struct _ir_code_buffer ir_code_buffer
#define ir_mem_free
Definition ir.h:1015
struct _ir_ctx ir_ctx
Definition ir.h:550
#define IR_ALWAYS_INLINE
Definition ir.h:108
struct _ir_use_list ir_use_list
Definition ir.h:551
struct _ir_insn ir_insn
struct _ir_block ir_block
Definition ir.h:552
void ir_build_prev_refs(ir_ctx *ctx)
Definition ir_gcm.c:1347
struct _ir_gcm_split_data ir_gcm_split_data
IR_ALWAYS_INLINE ir_ref ir_count_constant(ir_ref *_xlat, ir_ref ref)
Definition ir_gcm.c:779
int ir_gcm(ir_ctx *ctx)
Definition ir_gcm.c:564
#define IR_GCM_IS_SCHEDULED_EARLY(b)
Definition ir_gcm.c:14
#define IR_GCM_EARLY_BLOCK(b)
Definition ir_gcm.c:15
int ir_schedule(ir_ctx *ctx)
Definition ir_gcm.c:788
#define SWAP_REFS(_ref1, _ref2)
#define IR_SPARSE_SET_FOREACH_END()
Definition ir_private.h:570
struct _ir_list ir_list
IR_ALWAYS_INLINE uint32_t ir_list_len(const ir_list *l)
Definition ir_private.h:728
IR_ALWAYS_INLINE ir_ref ir_list_pop(ir_list *l)
Definition ir_private.h:748
IR_ALWAYS_INLINE void ir_list_push_unchecked(ir_list *l, ir_ref val)
Definition ir_private.h:743
#define IR_MIN(a, b)
Definition ir_private.h:63
#define IR_SPARSE_SET_FOREACH(set, bit)
Definition ir_private.h:563
IR_ALWAYS_INLINE void ir_sparse_set_add(ir_sparse_set *set, uint32_t n)
Definition ir_private.h:528
#define IR_BB_HAS_PARAM
#define IR_BB_HAS_PHI
struct _ir_hashtab_bucket ir_hashtab_bucket
IR_ALWAYS_INLINE void ir_sparse_set_init(ir_sparse_set *set, uint32_t size)
Definition ir_private.h:495
#define IR_ASSERT(x)
Definition ir_private.h:17
IR_ALWAYS_INLINE bool ir_sparse_set_in(const ir_sparse_set *set, uint32_t n)
Definition ir_private.h:521
#define IR_INVALID_VAL
Definition ir_private.h:843
IR_ALWAYS_INLINE void ir_list_free(ir_list *l)
Definition ir_private.h:717
#define IR_MAX(a, b)
Definition ir_private.h:62
#define IR_BB_HAS_VAR
#define IR_BB_LOOP_WITH_ENTRY
IR_ALWAYS_INLINE ir_ref ir_list_at(const ir_list *l, uint32_t i)
Definition ir_private.h:760
IR_ALWAYS_INLINE void ir_list_init(ir_list *l, uint32_t size)
Definition ir_private.h:711
#define IR_LINEAR
IR_ALWAYS_INLINE void ir_sparse_set_free(ir_sparse_set *set)
Definition ir_private.h:511
#define IR_BB_UNREACHABLE
IR_ALWAYS_INLINE uint32_t ir_insn_inputs_to_len(uint32_t inputs_count)
Definition ir_private.h:992
#define IR_INPUT_EDGES_COUNT(flags)
Definition ir_private.h:958
IR_ALWAYS_INLINE void ir_list_clear(ir_list *l)
Definition ir_private.h:723
IR_ALWAYS_INLINE void ir_list_push(ir_list *l, ir_ref val)
Definition ir_private.h:738
#define IR_BB_HAS_PI
IR_ALWAYS_INLINE void ir_sparse_set_clear(ir_sparse_set *set)
Definition ir_private.h:506
struct _ir_sparse_set ir_sparse_set
#define IR_OP_FLAG_MEM
Definition ir_private.h:932
#define IR_BB_LOOP_HEADER
IR_ALWAYS_INLINE uint32_t ir_insn_len(const ir_insn *insn)
Definition ir_private.h:997
#define next(ls)
Definition minilua.c:2661
unsigned const char * end
Definition php_ffi.h:51
unsigned const char * pos
Definition php_ffi.h:52
unsigned char key[REFLECTION_KEY_LEN]
zend_constant * data
p
Definition session.c:1105
ir_ref end
uint32_t dom_parent
ir_ref start
uint32_t idom
uint32_t loop_header
uint32_t flags
uint32_t loop_depth
uint32_t successors
uint32_t successors_count
uint32_t dom_depth
uint32_t predecessors
uint32_t predecessors_count
uint32_t * cfg_edges
Definition ir.h:593
uint32_t cfg_edges_count
Definition ir.h:591
uint32_t mflags
Definition ir.h:582
int32_t fixed_stack_red_zone
Definition ir.h:601
ir_hashtab * binding
Definition ir.h:586
ir_ref consts_count
Definition ir.h:577
uint32_t entries_count
Definition ir.h:631
int32_t fixed_stack_frame_size
Definition ir.h:602
ir_ref * prev_ref
Definition ir.h:614
void * data
Definition ir.h:616
ir_code_buffer * code_buffer
Definition ir.h:634
ir_block * cfg_blocks
Definition ir.h:592
ir_ref use_edges_count
Definition ir.h:589
ir_type ret_type
Definition ir.h:581
ir_strtab strtab
Definition ir.h:643
ir_loader * loader
Definition ir.h:642
ir_use_list * use_lists
Definition ir.h:587
ir_ref consts_limit
Definition ir.h:578
uint64_t fixed_regset
Definition ir.h:600
ir_insn * ir_base
Definition ir.h:574
uint32_t flags
Definition ir.h:579
uint32_t cfg_blocks_count
Definition ir.h:590
ir_ref insns_count
Definition ir.h:575
uint32_t flags2
Definition ir.h:580
ir_ref * use_edges
Definition ir.h:588
uint64_t fixed_save_regset
Definition ir.h:604
uint32_t * cfg_map
Definition ir.h:594
ir_ref insns_limit
Definition ir.h:576
int32_t fixed_call_stack_size
Definition ir.h:603
int32_t spill_base
Definition ir.h:599
ir_sparse_set totally_useful
Definition ir_gcm.c:158
ir_list worklist
Definition ir_gcm.c:159
void * data
Definition ir_private.h:852
uint32_t mask
Definition ir_private.h:853
uint32_t count
Definition ir_private.h:855
ir_val val
Definition ir.h:477
void * data
Definition ir.h:486
uint64_t u64
Definition ir.h:411
#define EXPECTED(condition)
#define UNEXPECTED(condition)
bool result