php-internal-docs 8.4.8
Unofficial docs for php/php-src
Loading...
Searching...
No Matches
ir_sccp.c
Go to the documentation of this file.
1/*
2 * IR - Lightweight JIT Compilation Framework
3 * (SCCP - Sparse Conditional Constant Propagation)
4 * Copyright (C) 2022 Zend by Perforce.
5 * Authors: Dmitry Stogov <dmitry@php.net>
6 *
7 * The SCCP algorithm is based on M. N. Wegman and F. K. Zadeck publication
8 * See: M. N. Wegman and F. K. Zadeck. "Constant propagation with conditional branches"
9 * ACM Transactions on Programming Languages and Systems, 13(2):181-210, April 1991
10 */
11
12#include "ir.h"
13#include "ir_private.h"
14
15#define IR_COMBO_COPY_PROPAGATION 1
16
17#define IR_TOP IR_UNUSED
18#define IR_BOTTOM IR_LAST_OP
19
20#define IR_MAKE_TOP(ref) do {IR_ASSERT(ref > 0); _values[ref].optx = IR_TOP;} while (0)
21#define IR_MAKE_BOTTOM(ref) do {IR_ASSERT(ref > 0); _values[ref].optx = IR_BOTTOM;} while (0)
22
23#define IR_IS_TOP(ref) (ref >= 0 && _values[ref].op == IR_TOP)
24#define IR_IS_BOTTOM(ref) (ref >= 0 && _values[ref].op == IR_BOTTOM)
25#define IR_IS_REACHABLE(ref) _ir_is_reachable_ctrl(ctx, _values, ref)
26#define IR_IS_CONST(ref) (IR_IS_CONST_REF(ref) || IR_IS_CONST_OP(_values[ref].op))
27
29{
32 return _values[ref].op != IR_TOP; /* BOTTOM, IF or MERGE */
33}
34
36{
37 ir_use_list *use_list;
38 ir_ref n, *p, use;
39
41 use_list = &ctx->use_lists[ref];
42 n = use_list->count;
43 for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) {
44 use = *p;
45 if (_values[use].op != IR_BOTTOM) {
46 ir_bitqueue_add(worklist, use);
47 }
48 }
49}
50
52{
54 IR_ASSERT(_values[ref].op == IR_TOP);
55 /* do backward propagaton only once */
56 if (!_values[ref].op1) {
57 _values[ref].op1 = 1;
58 ir_bitqueue_add(worklist, ref);
59 }
60}
61
62#if IR_COMBO_COPY_PROPAGATION
64{
65 if (a > 0 && _values[a].op == IR_COPY) {
66 do {
67 a = _values[a].op1;
68 IR_ASSERT(a > 0);
69 } while (_values[a].op == IR_COPY);
70 IR_ASSERT(_values[a].op == IR_BOTTOM);
71 }
72 return a;
73}
74
75#if 0
76static void CHECK_LIST(ir_insn *_values, ir_ref ref)
77{
78 ir_ref member = _values[ref].op2;
79 while (member != ref) {
80 IR_ASSERT(_values[_values[member].op2].op3 == member);
81 member = _values[member].op2;
82 }
83 IR_ASSERT(_values[_values[ref].op2].op3 == ref);
84}
85#else
86# define CHECK_LIST(_values, ref)
87#endif
88
89static void ir_sccp_add_identity(ir_ctx *ctx, ir_insn *_values, ir_ref src, ir_ref dst)
90{
91 IR_ASSERT(dst > 0 && _values[dst].op != IR_BOTTOM && _values[dst].op != IR_COPY);
92 IR_ASSERT((src > 0 && (_values[src].op == IR_BOTTOM || _values[src].op == IR_COPY)));
93 IR_ASSERT(ir_sccp_identity(ctx, _values, src) != dst);
94
95 _values[dst].optx = IR_COPY;
96 _values[dst].op1 = src;
97
98 if (_values[src].op == IR_BOTTOM) {
99 /* initialize empty double-linked list */
100 if (_values[src].op1 != src) {
101 _values[src].op1 = src;
102 _values[src].op2 = src;
103 _values[src].op3 = src;
104 }
105 } else {
106 src = ir_sccp_identity(ctx, _values, src);
107 }
108
109 /* insert into circular double-linked list */
110 ir_ref prev = _values[src].op3;
111 _values[dst].op2 = src;
112 _values[dst].op3 = prev;
113 _values[src].op3 = dst;
114 _values[prev].op2 = dst;
115 CHECK_LIST(_values, dst);
116}
117
118static void ir_sccp_split_partition(ir_ctx *ctx, ir_insn *_values, ir_bitqueue *worklist, ir_ref ref)
119{
120 ir_ref member, head, tail, next, prev;
121
122 CHECK_LIST(_values, ref);
123 IR_MAKE_BOTTOM(ref);
124 _values[ref].op1 = ref;
125
126 member = _values[ref].op2;
127 head = tail = IR_UNUSED;
128 while (member != ref) {
129 if (_values[member].op != IR_BOTTOM) {
130 ir_bitqueue_add(worklist, member);
131 }
132 ir_sccp_add_uses(ctx, _values, worklist, member);
133
134 next = _values[member].op2;
135 if (ir_sccp_identity(ctx, _values, member) == ref) {
136 /* remove "member" from the old circular double-linked list */
137 prev = _values[member].op3;
138 _values[prev].op2 = next;
139 _values[next].op3 = prev;
140
141 /* insert "member" into the new double-linked list */
142 if (!head) {
143 head = tail = member;
144 } else {
145 _values[tail].op2 = member;
146 _values[member].op3 = tail;
147 tail = member;
148 }
149 }
150 member = next;
151 }
152
153 /* remove "ref" from the old circular double-linked list */
154 next = _values[ref].op2;
155 prev = _values[ref].op3;
156 _values[prev].op2 = next;
157 _values[next].op3 = prev;
158 CHECK_LIST(_values, next);
159
160 /* close the new circle */
161 if (head) {
162 _values[ref].op2 = head;
163 _values[ref].op3 = tail;
164 _values[tail].op2 = ref;
165 _values[head].op3 = ref;
166 } else {
167 _values[ref].op2 = ref;
168 _values[ref].op3 = ref;
169 }
170 CHECK_LIST(_values, ref);
171}
172
174{
175 if (_values[ref].op == IR_COPY) {
176 ir_sccp_split_partition(ctx, _values, worklist, ref);
177 } else {
178 IR_MAKE_BOTTOM(ref);
179 }
180}
181
182# define IR_MAKE_BOTTOM_EX(ref) ir_sccp_make_bottom_ex(ctx, _values, worklist, ref)
183#else
184# define ir_sccp_identity(_ctx, _values, ref) (ref)
185# define IR_MAKE_BOTTOM_EX(ref) IR_MAKE_BOTTOM(ref)
186#endif
187
188IR_ALWAYS_INLINE bool ir_sccp_meet_const(ir_ctx *ctx, ir_insn *_values, ir_bitqueue *worklist, ir_ref ref, ir_insn *val_insn)
189{
190 IR_ASSERT(IR_IS_CONST_OP(val_insn->op) || IR_IS_SYM_CONST(val_insn->op));
191
192 if (_values[ref].op == IR_TOP) {
193 /* TOP meet NEW_CONST => NEW_CONST */
194 _values[ref].optx = val_insn->opt;
195 _values[ref].val.u64 = val_insn->val.u64;
196 return 1;
197 } else if (_values[ref].opt == val_insn->opt) {
198 /* OLD_CONST meet NEW_CONST => (OLD_CONST == NEW_CONST) ? OLD_CONST : BOTTOM */
199 if (_values[ref].val.u64 == val_insn->val.u64) {
200 return 0;
201 }
202 }
203
205 return 1;
206}
207
209{
210 ir_ref val_identity = ir_sccp_identity(ctx, _values, val);
211 ir_insn *val_insn;
212
213 if (IR_IS_CONST_REF(val_identity)) {
214 val_insn = &ctx->ir_base[val_identity];
215 } else {
216 val_insn = &_values[val_identity];
217
218 if (!IR_IS_CONST_OP(val_insn->op) && !IR_IS_SYM_CONST(val_insn->op)) {
219#if IR_COMBO_COPY_PROPAGATION
220 if (_values[ref].op == IR_COPY) {
221 /* COPY(OLD_VAL) meet COPY(NEW_VAL) =>
222 * (IDENTITY(OLD_VAL) == IDENTITY(NEW_VAL) ? COPY(OLD_VAL) ? BOTTOM */
223 if (ir_sccp_identity(ctx, _values, ref) == val_identity) {
224 return 0; /* not changed */
225 }
226 ir_sccp_split_partition(ctx, _values, worklist, ref);
227 return 1;
228 } else {
229 IR_ASSERT(_values[ref].op != IR_BOTTOM);
230 /* TOP meet COPY(NEW_VAL) -> COPY(NEW_VAL) */
231 /* OLD_CONST meet COPY(NEW_VAL) -> COPY(NEW_VAL) */
232 ir_sccp_add_identity(ctx, _values, val, ref);
233 return 1;
234 }
235#endif
236
237 IR_MAKE_BOTTOM(ref);
238 return 1;
239 }
240 }
241
242 return ir_sccp_meet_const(ctx, _values, worklist, ref, val_insn);
243}
244
245static ir_ref ir_sccp_fold(ir_ctx *ctx, ir_insn *_values, ir_bitqueue *worklist, ir_ref ref, ir_insn *insn)
246{
247 ir_insn *op1_insn, *op2_insn, *op3_insn;
248 ir_ref op1, op2, op3, copy;
249 uint32_t opt = insn->opt;
250
251 op1 = ir_sccp_identity(ctx, _values, insn->op1);
252 op2 = ir_sccp_identity(ctx, _values, insn->op2);
253 op3 = ir_sccp_identity(ctx, _values, insn->op3);
254
255restart:
256 op1_insn = (op1 > 0 && IR_IS_CONST_OP(_values[op1].op)) ? _values + op1 : ctx->ir_base + op1;
257 op2_insn = (op2 > 0 && IR_IS_CONST_OP(_values[op2].op)) ? _values + op2 : ctx->ir_base + op2;
258 op3_insn = (op3 > 0 && IR_IS_CONST_OP(_values[op3].op)) ? _values + op3 : ctx->ir_base + op3;
259
260 switch (ir_folding(ctx, opt, op1, op2, op3, op1_insn, op2_insn, op3_insn)) {
262 opt = ctx->fold_insn.optx;
263 op1 = ctx->fold_insn.op1;
264 op2 = ctx->fold_insn.op2;
265 op3 = ctx->fold_insn.op3;
266 goto restart;
267 case IR_FOLD_DO_CSE:
268 case IR_FOLD_DO_EMIT:
270 return 1;
271 case IR_FOLD_DO_COPY:
272 copy = ctx->fold_insn.op1;
273 return ir_sccp_meet(ctx, _values, worklist, ref, copy);
274 case IR_FOLD_DO_CONST:
275 return ir_sccp_meet_const(ctx, _values, worklist, ref, &ctx->fold_insn);
276 default:
277 IR_ASSERT(0);
278 return 0;
279 }
280}
281
282static bool ir_sccp_analyze_phi(ir_ctx *ctx, ir_insn *_values, ir_bitqueue *worklist, ir_ref i, ir_insn *insn)
283{
284 ir_ref j, n, input, *merge_input, *p;
285 ir_insn *v, *new_const = NULL;
286#if IR_COMBO_COPY_PROPAGATION
287 ir_ref new_copy = IR_UNUSED;
288 ir_ref new_copy_identity = IR_UNUSED;
289 ir_ref phi_identity = ir_sccp_identity(ctx, _values, i);
290#endif
291
292 if (!IR_IS_REACHABLE(insn->op1)) {
293 return 0;
294 }
295 n = insn->inputs_count;
296 if (n > 3 && _values[i].op == IR_TOP) {
297 for (j = 0; j < (n>>2); j++) {
298 _values[i+j+1].optx = IR_BOTTOM; /* keep the tail of a long multislot instruction */
299 }
300 }
301
302 p = insn->ops + 2;
303 merge_input = ctx->ir_base[insn->op1].ops + 1;
304 for (; --n > 0; p++, merge_input++) {
305 IR_ASSERT(*merge_input > 0);
306 if (!IR_IS_REACHABLE(*merge_input)) {
307 continue;
308 }
309
310 input = *p;
311 if (IR_IS_CONST_REF(input)) {
312 v = &ctx->ir_base[input];
313 } else if (input == i) {
314 continue;
315 } else {
316 v = &_values[input];
317 if (v->op == IR_TOP) {
318 ir_sccp_add_input(ctx, _values, worklist, input);
319 continue;
320#if IR_COMBO_COPY_PROPAGATION
321 } else if (v->op == IR_COPY) {
322 input = v->op1;
323 new_copy_identity = ir_sccp_identity(ctx, _values, input);
324 if (new_copy_identity == phi_identity) {
325 new_copy_identity = IR_UNUSED;
326 continue;
327 }
328 new_copy = input;
329 goto next;
330#endif
331 } else if (v->op == IR_BOTTOM) {
332#if IR_COMBO_COPY_PROPAGATION
333 if (input == phi_identity) {
334 continue;
335 }
336 new_copy = new_copy_identity = input;
337 goto next;
338#else
339 goto make_bottom;
340#endif
341 }
342 }
343 new_const = v;
344 goto next;
345 }
346
347 return 0;
348
349next:
350 p++;
351 merge_input++;
352 /* for all live merge inputs */
353 for (; --n > 0; p++, merge_input++) {
354 IR_ASSERT(*merge_input > 0);
355 if (!IR_IS_REACHABLE(*merge_input)) {
356 continue;
357 }
358
359 input = *p;
360 if (IR_IS_CONST_REF(input)) {
361#if IR_COMBO_COPY_PROPAGATION
362 if (new_copy) {
363 goto make_bottom;
364 }
365#endif
366 v = &ctx->ir_base[input];
367 } else if (input == i) {
368 continue;
369 } else {
370 v = &_values[input];
371 if (v->op == IR_TOP) {
372 ir_sccp_add_input(ctx, _values, worklist, input);
373 continue;
374#if IR_COMBO_COPY_PROPAGATION
375 } else if (v->op == IR_COPY) {
376 ir_ref identity = ir_sccp_identity(ctx, _values, v->op1);
377
378 if (identity == phi_identity || identity == new_copy_identity) {
379 continue;
380 }
381 goto make_bottom;
382#endif
383 } else if (v->op == IR_BOTTOM) {
384#if IR_COMBO_COPY_PROPAGATION
385 if (input == phi_identity || input == new_copy_identity) {
386 continue;
387 }
388#endif
389 goto make_bottom;
390 }
391 }
392 if (!new_const || new_const->opt != v->opt || new_const->val.u64 != v->val.u64) {
393 goto make_bottom;
394 }
395 }
396
397#if IR_COMBO_COPY_PROPAGATION
398 if (new_copy) {
399 return ir_sccp_meet(ctx, _values, worklist, i, new_copy);
400 }
401#endif
402
403 return ir_sccp_meet_const(ctx, _values, worklist, i, new_const);
404
405make_bottom:
407 return 1;
408}
409
410static bool ir_is_dead_load_ex(ir_ctx *ctx, ir_ref ref, uint32_t flags, ir_insn *insn)
411{
413 return ctx->use_lists[ref].count == 1;
414 } else if (insn->op == IR_ALLOCA || insn->op == IR_BLOCK_BEGIN) {
415 return ctx->use_lists[ref].count == 1;
416 }
417 return 0;
418}
419
420static bool ir_is_dead_load(ir_ctx *ctx, ir_ref ref)
421{
422 if (ctx->use_lists[ref].count == 1) {
423 uint32_t flags = ir_op_flags[ctx->ir_base[ref].op];
424
426 return 1;
427 } else if (ctx->ir_base[ref].op == IR_ALLOCA) {
428 return 1;
429 }
430 }
431 return 0;
432}
433
434static bool ir_is_dead(ir_ctx *ctx, ir_ref ref)
435{
436 if (ctx->use_lists[ref].count == 0) {
437 return IR_IS_FOLDABLE_OP(ctx->ir_base[ref].op);
438 } else {
439 return ir_is_dead_load(ctx, ref);
440 }
441 return 0;
442}
443
444static bool ir_sccp_is_true(ir_ctx *ctx, ir_insn *_values, ir_ref a)
445{
446 ir_insn *v = IR_IS_CONST_REF(a) ? &ctx->ir_base[a] : &_values[a];
447
448 return ir_const_is_true(v);
449}
450
451static bool ir_sccp_is_equal(ir_ctx *ctx, ir_insn *_values, ir_ref a, ir_ref b)
452{
453 ir_insn *v1 = IR_IS_CONST_REF(a) ? &ctx->ir_base[a] : &_values[a];
454 ir_insn *v2 = IR_IS_CONST_REF(b) ? &ctx->ir_base[b] : &_values[b];
455
456 IR_ASSERT(!IR_IS_SYM_CONST(v1->op));
457 IR_ASSERT(!IR_IS_SYM_CONST(v2->op));
458 return v1->val.u64 == v2->val.u64;
459}
460
461#ifdef IR_SCCP_TRACE
462static void ir_sccp_trace_val(ir_ctx *ctx, ir_insn *_values, ir_ref i)
463{
464 if (IR_IS_BOTTOM(i)) {
465 fprintf(stderr, "BOTTOM");
466 } else if (IR_IS_CONST_OP(_values[i].op) || IR_IS_SYM_CONST(_values[i].op)) {
467 fprintf(stderr, "CONST(");
468 ir_print_const(ctx, &_values[i], stderr, true);
469 fprintf(stderr, ")");
470#if IR_COMBO_COPY_PROPAGATION
471 } else if (_values[i].op == IR_COPY) {
472 fprintf(stderr, "COPY(%d)", _values[i].op1);
473#endif
474 } else if (IR_IS_TOP(i)) {
475 fprintf(stderr, "TOP");
476 } else if (_values[i].op == IR_IF) {
477 fprintf(stderr, "IF(%d)", _values[i].op1);
478 } else if (_values[i].op == IR_MERGE) {
479 fprintf(stderr, "MERGE(%d)", _values[i].op1);
480 } else {
481 fprintf(stderr, "%d", _values[i].op);
482 }
483}
484
485static void ir_sccp_trace_start(ir_ctx *ctx, ir_insn *_values, ir_ref i)
486{
487 fprintf(stderr, "%d. ", i);
488 ir_sccp_trace_val(ctx, _values, i);
489}
490
491static void ir_sccp_trace_end(ir_ctx *ctx, ir_insn *_values, ir_ref i)
492{
493 fprintf(stderr, " -> ");
494 ir_sccp_trace_val(ctx, _values, i);
495 fprintf(stderr, "\n");
496}
497#else
498# define ir_sccp_trace_start(c, v, i)
499# define ir_sccp_trace_end(c, v, i)
500#endif
501
502static IR_NEVER_INLINE void ir_sccp_analyze(ir_ctx *ctx, ir_insn *_values, ir_bitqueue *worklist, ir_bitqueue *iter_worklist)
503{
504 ir_ref i, j, n, *p, use;
505 ir_use_list *use_list;
506 ir_insn *insn, *use_insn;
507 uint32_t flags;
508
509 /* A bit modified SCCP algorithm of M. N. Wegman and F. K. Zadeck */
510 worklist->pos = 0;
511 ir_bitset_incl(worklist->set, 1);
512 for (; (i = ir_bitqueue_pop(worklist)) >= 0; ir_sccp_trace_end(ctx, _values, i)) {
513 IR_ASSERT(_values[i].op != IR_BOTTOM);
514 ir_sccp_trace_start(ctx, _values, i);
515 insn = &ctx->ir_base[i];
516 flags = ir_op_flags[insn->op];
517 if (flags & IR_OP_FLAG_DATA) {
518 if (ctx->use_lists[i].count == 0) {
519 /* dead code */
520 continue;
521 } else if (insn->op == IR_PHI) {
522 if (!ir_sccp_analyze_phi(ctx, _values, worklist, i, insn)) {
523 continue;
524 }
525 } else if (EXPECTED(IR_IS_FOLDABLE_OP(insn->op))) {
526 bool may_benefit = 0;
527 bool has_top = 0;
528
529 if (_values[i].op != IR_TOP) {
530 may_benefit = 1;
531 }
532
535 for (p = insn->ops + 1; n > 0; p++, n--) {
536 ir_ref input = *p;
537 if (input > 0) {
538 if (_values[input].op == IR_TOP) {
539 has_top = 1;
540 ir_sccp_add_input(ctx, _values, worklist, input);
541 } else if (_values[input].op != IR_BOTTOM) {
542 /* Perform folding only if some of direct inputs
543 * is going to be replaced by a constant or copy.
544 * This approach may miss some folding optimizations
545 * dependent on indirect inputs. e.g. reassociation.
546 */
547 may_benefit = 1;
548 }
549 }
550 }
551 if (has_top) {
552 continue;
553 }
554 if (!may_benefit) {
556 if (insn->op == IR_FP2FP || insn->op == IR_FP2INT || insn->op == IR_TRUNC) {
557 ir_bitqueue_add(iter_worklist, i);
558 }
559 } else if (!ir_sccp_fold(ctx, _values, worklist, i, insn)) {
560 /* not changed */
561 continue;
562 } else if (_values[i].op == IR_BOTTOM) {
563 insn = &ctx->ir_base[i];
564 if (insn->op == IR_FP2FP || insn->op == IR_FP2INT || insn->op == IR_TRUNC) {
565 ir_bitqueue_add(iter_worklist, i);
566 }
567 }
568 } else {
570 }
571 } else if (flags & IR_OP_FLAG_BB_START) {
572 if (insn->op == IR_MERGE || insn->op == IR_LOOP_BEGIN || insn->op == IR_BEGIN) {
573 ir_bitqueue_add(iter_worklist, i);
574 }
575 if (insn->op == IR_MERGE || insn->op == IR_LOOP_BEGIN) {
576 ir_ref unfeasible_inputs = 0;
577
578 n = insn->inputs_count;
579 if (n > 3 && _values[i].op == IR_TOP) {
580 for (j = 0; j < (n>>2); j++) {
581 _values[i+j+1].optx = IR_BOTTOM; /* keep the tail of a long multislot instruction */
582 }
583 }
584 for (p = insn->ops + 1; n > 0; p++, n--) {
585 ir_ref input = *p;
586 IR_ASSERT(input > 0);
587 if (!IR_IS_REACHABLE(input)) {
588 unfeasible_inputs++;
589 }
590 }
591 if (unfeasible_inputs == 0) {
593 } else if (_values[i].op != IR_MERGE || _values[i].op1 != unfeasible_inputs) {
594 _values[i].optx = IR_MERGE;
595 _values[i].op1 = unfeasible_inputs;
596 } else {
597 continue;
598 }
599 if (ctx->flags2 & IR_MEM2SSA_VARS) {
600 /* MEM2SSA puts new PHI at the bottom, but we like to process them now */
601 use_list = &ctx->use_lists[i];
602 n = use_list->count;
603 for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) {
604 use = *p;
605 if (_values[use].op != IR_BOTTOM) {
606 if (ctx->ir_base[use].op == IR_PHI) {
607 ir_bitqueue_del(worklist, use);
608 if (ctx->use_lists[use].count != 0) {
609 if (ir_sccp_analyze_phi(ctx, _values, worklist, use, &ctx->ir_base[use])) {
610 ir_sccp_add_uses(ctx, _values, worklist, use);
611 }
612 }
613 } else {
614 ir_bitqueue_add(worklist, use);
615 }
616 }
617 }
618 continue;
619 }
620 } else {
621 IR_ASSERT(insn->op == IR_START || IR_IS_REACHABLE(insn->op1));
623 }
624 } else {
625 IR_ASSERT(insn->op1 > 0);
626 if (!IR_IS_REACHABLE(insn->op1)) {
627 /* control inpt is not feasible */
628 continue;
629 }
630 if (insn->op == IR_IF) {
631 if (IR_IS_TOP(insn->op2)) {
632 ir_sccp_add_input(ctx, _values, worklist, insn->op2);
633 continue;
634 }
635 if (IR_IS_CONST(insn->op2)) {
636 bool b = ir_sccp_is_true(ctx, _values, insn->op2);
637 use_list = &ctx->use_lists[i];
638 IR_ASSERT(use_list->count == 2);
639 p = &ctx->use_edges[use_list->refs];
640 use = *p;
641 use_insn = &ctx->ir_base[use];
642 IR_ASSERT(use_insn->op == IR_IF_TRUE || use_insn->op == IR_IF_FALSE);
643 if ((use_insn->op == IR_IF_TRUE) != b) {
644 use = *(p+1);
645 IR_ASSERT(ctx->ir_base[use].op == IR_IF_TRUE || ctx->ir_base[use].op == IR_IF_FALSE);
646 }
647 if (_values[i].op == IR_TOP) {
648 _values[i].optx = IR_IF;
649 _values[i].op1 = use;
650 ir_bitqueue_add(worklist, use);
651 continue;
652 } else if (_values[i].op == IR_IF && _values[i].op1 == use) {
653 continue;
654 }
655 }
657 ir_bitqueue_add(iter_worklist, i);
658 } else if (insn->op == IR_SWITCH) {
659 if (IR_IS_TOP(insn->op2)) {
660 ir_sccp_add_input(ctx, _values, worklist, insn->op2);
661 continue;
662 }
663 if (IR_IS_CONST(insn->op2)) {
664 ir_ref use_case = IR_UNUSED;
665
666 use_list = &ctx->use_lists[i];
667 n = use_list->count;
668 for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) {
669 use = *p;
670 IR_ASSERT(use > 0);
671 use_insn = &ctx->ir_base[use];
672 if (use_insn->op == IR_CASE_VAL) {
673 if (ir_sccp_is_equal(ctx, _values, insn->op2, use_insn->op2)) {
674 use_case = use;
675 break;
676 }
677 } else if (use_insn->op == IR_CASE_DEFAULT) {
678 use_case = use;
679 }
680 }
681 if (use_case) {
682 use_insn = &ctx->ir_base[use_case];
683 if (_values[i].op == IR_TOP) {
684 _values[i].optx = IR_IF;
685 _values[i].op1 = use_case;
686 ir_bitqueue_add(worklist, use_case);
687 continue;
688 } else if (_values[i].op == IR_IF || _values[i].op1 == use_case) {
689 continue;
690 }
691 }
692 }
694 } else if (ir_is_dead_load_ex(ctx, i, flags, insn)) {
695 /* schedule dead load elimination */
696 ir_bitqueue_add(iter_worklist, i);
698 } else {
699 if (_values[i].op == IR_TOP) {
700 bool has_top = 0;
701
702 /* control, call, load and store instructions may have unprocessed inputs */
704 if (IR_OP_HAS_VAR_INPUTS(flags) && (n = insn->inputs_count) > 3) {
705 for (j = 0; j < (n>>2); j++) {
706 _values[i+j+1].optx = IR_BOTTOM; /* keep the tail of a long multislot instruction */
707 }
708 for (j = 2, p = insn->ops + j; j <= n; j++, p++) {
710 use = *p;
711 if (use > 0 && _values[use].op == IR_TOP) {
712 has_top = 1;
713 ir_sccp_add_input(ctx, _values, worklist, use);
714 }
715 }
716 } else if (n >= 2) {
718 use = insn->op2;
719 if (use > 0 && _values[use].op == IR_TOP) {
720 has_top = 1;
721 ir_sccp_add_input(ctx, _values, worklist, use);
722 }
723 if (n > 2) {
724 IR_ASSERT(n == 3);
726 use = insn->op3;
727 if (use > 0 && _values[use].op == IR_TOP) {
728 has_top = 1;
729 ir_sccp_add_input(ctx, _values, worklist, use);
730 }
731 }
732 }
733
734 if (has_top && !(flags & IR_OP_FLAG_BB_END)) {
735 use = ir_next_control(ctx, i);
736 if (_values[use].op == IR_TOP) {
737 has_top = 1;
738 /* do forward control propagaton only once */
739 if (!_values[use].op1) {
740 _values[use].op1 = 1;
741 ir_bitqueue_add(worklist, use);
742 }
743 }
744 continue;
745 }
746 }
748 }
749 }
750 ir_sccp_add_uses(ctx, _values, worklist, i);
751 }
752
753#ifdef IR_DEBUG
754 if (ctx->flags & IR_DEBUG_SCCP) {
755 for (i = 1; i < ctx->insns_count; i++) {
756 if (IR_IS_CONST_OP(_values[i].op) || IR_IS_SYM_CONST(_values[i].op)) {
757 fprintf(stderr, "%d. CONST(", i);
758 ir_print_const(ctx, &_values[i], stderr, true);
759 fprintf(stderr, ")\n");
760#if IR_COMBO_COPY_PROPAGATION
761 } else if (_values[i].op == IR_COPY) {
762 fprintf(stderr, "%d. COPY(%d)\n", i, _values[i].op1);
763#endif
764 } else if (IR_IS_TOP(i)) {
765 fprintf(stderr, "%d. TOP\n", i);
766 } else if (_values[i].op == IR_IF) {
767 fprintf(stderr, "%d. IF(%d)\n", i, _values[i].op1);
768 } else if (_values[i].op == IR_MERGE) {
769 fprintf(stderr, "%d. MERGE(%d)\n", i, _values[i].op1);
770 } else if (!IR_IS_BOTTOM(i)) {
771 fprintf(stderr, "%d. %d\n", i, _values[i].op);
772 }
773 }
774 }
775#endif
776}
777
778/**********************/
779/* SCCP trasformation */
780/**********************/
781
782static void ir_sccp_make_nop(ir_ctx *ctx, ir_ref ref)
783{
784 ir_ref j, n, *p;
785 ir_insn *insn;
786
787 CLEAR_USES(ref);
788 insn = &ctx->ir_base[ref];
789 n = insn->inputs_count;
790 insn->opt = IR_NOP; /* keep "inputs_count" */
791 for (j = 1, p = insn->ops + j; j <= n; j++, p++) {
792 *p = IR_UNUSED;
793 }
794}
795
796static void ir_sccp_remove_insn(ir_ctx *ctx, ir_insn *_values, ir_ref ref, ir_bitqueue *worklist)
797{
798 ir_ref j, n, *p;
799 ir_insn *insn;
800
801 CLEAR_USES(ref);
802 insn = &ctx->ir_base[ref];
803 n = insn->inputs_count;
804 insn->opt = IR_NOP; /* keep "inputs_count" */
805 for (j = 1, p = insn->ops + j; j <= n; j++, p++) {
806 ir_ref input = *p;
807 *p = IR_UNUSED;
808 /* we may skip nodes that are going to be removed by SCCP (TOP, CONST and COPY) */
809 if (input > 0 && _values[input].op > IR_COPY) {
810 ir_use_list_remove_all(ctx, input, ref);
811 if (ir_is_dead(ctx, input)) {
812 /* schedule DCE */
813 ir_bitqueue_add(worklist, input);
814 }
815 }
816 }
817}
818
819static void ir_sccp_replace_insn(ir_ctx *ctx, ir_insn *_values, ir_ref ref, ir_ref new_ref, ir_bitqueue *worklist)
820{
821 ir_ref j, n, *p, use, i;
822 ir_insn *insn;
823 ir_use_list *use_list;
824
825 IR_ASSERT(ref != new_ref);
826
827 insn = &ctx->ir_base[ref];
828
829#if IR_COMBO_COPY_PROPAGATION
830 if ((ir_op_flags[insn->op] & IR_OP_FLAG_MEM) && IR_IS_REACHABLE(insn->op1)) {
831 /* remove from control list */
832 ir_ref prev = insn->op1;
833 ir_ref next = ir_next_control(ctx, ref);
834 ctx->ir_base[next].op1 = prev;
835 ir_use_list_remove_one(ctx, ref, next);
837 insn->op1 = IR_UNUSED;
838 }
839#endif
840
841 n = insn->inputs_count;
842 insn->opt = IR_NOP; /* keep "inputs_count" */
843 for (j = 1, p = insn->ops + 1; j <= n; j++, p++) {
844 ir_ref input = *p;
845 *p = IR_UNUSED;
846 /* we may skip nodes that are going to be removed by SCCP (TOP, CONST and COPY) */
847 if (input > 0 && _values[input].op > IR_COPY) {
848 ir_use_list_remove_all(ctx, input, ref);
849 if (ir_is_dead(ctx, input)) {
850 /* schedule DCE */
851 ir_bitqueue_add(worklist, input);
852 }
853 }
854 }
855
856 use_list = &ctx->use_lists[ref];
857 n = use_list->count;
858 p = &ctx->use_edges[use_list->refs];
859 if (new_ref <= 0) {
860 /* constant or IR_UNUSED */
861 for (; n; p++, n--) {
862 use = *p;
863 /* we may skip nodes that are going to be removed by SCCP (TOP, CONST and COPY) */
864 if (_values[use].op > IR_COPY) {
865 insn = &ctx->ir_base[use];
866 i = ir_insn_find_op(insn, ref);
867 if (!i) continue;
868 IR_ASSERT(i > 0);
869 ir_insn_set_op(insn, i, new_ref);
870 /* schedule folding */
871 ir_bitqueue_add(worklist, use);
872 }
873 }
874 } else {
875 for (j = 0; j < n; j++, p++) {
876 use = *p;
877 /* we may skip nodes that are going to be removed by SCCP (TOP, CONST and COPY) */
878 if (_values[use].op == IR_BOTTOM) {
879 insn = &ctx->ir_base[use];
880 i = ir_insn_find_op(insn, ref);
881 IR_ASSERT(i > 0);
882 ir_insn_set_op(insn, i, new_ref);
883 if (ir_use_list_add(ctx, new_ref, use)) {
884 /* restore after reallocation */
885 use_list = &ctx->use_lists[ref];
886 n = use_list->count;
887 p = &ctx->use_edges[use_list->refs + j];
888 }
889 /* schedule folding */
890 ir_bitqueue_add(worklist, use);
891 }
892 }
893 }
894 CLEAR_USES(ref);
895}
896
897static void ir_sccp_remove_if(ir_ctx *ctx, ir_insn *_values, ir_ref ref, ir_ref dst)
898{
899 ir_ref next;
900 ir_insn *insn, *next_insn;
901
902 insn = &ctx->ir_base[ref];
903 if (ctx->use_lists[dst].count == 1) {
904 next = ctx->use_edges[ctx->use_lists[dst].refs];
905 next_insn = &ctx->ir_base[next];
906 /* remove IF and IF_TRUE/FALSE from double linked control list */
907 next_insn->op1 = insn->op1;
908 ir_use_list_replace_one(ctx, insn->op1, ref, next);
909 /* remove IF and IF_TRUE/FALSE instructions */
910 ir_sccp_make_nop(ctx, ref);
911 ir_sccp_make_nop(ctx, dst);
912 } else {
913 insn->op2 = IR_UNUSED;
914 insn->optx = IR_OPTX(IR_END, IR_VOID, 1);
915 next_insn = &ctx->ir_base[dst];
916 next_insn->op = IR_BEGIN;
917 }
918}
919
920static bool ir_sccp_remove_unfeasible_merge_inputs(ir_ctx *ctx, ir_ref ref, ir_insn *insn, ir_bitqueue *worklist)
921{
922 ir_ref old_merge_inputs, new_merge_inputs, i, *p;
923 ir_use_list *use_list;
924 ir_bitset life_inputs;
925 ir_bitset_base_t holder = 0;
926
927 IR_ASSERT(insn->op == IR_MERGE || insn->op == IR_LOOP_BEGIN);
928 old_merge_inputs = insn->inputs_count;
929 new_merge_inputs = 0;
930 life_inputs = (old_merge_inputs < IR_BITSET_BITS) ? &holder : ir_bitset_malloc(old_merge_inputs + 1);
931
932 for (i = 1; i <= old_merge_inputs; i++) {
933 ir_ref input = ir_insn_op(insn, i);
934
935 if (input) {
936 new_merge_inputs++;
937 if (new_merge_inputs != i) {
938 ir_insn_set_op(insn, new_merge_inputs, input);
939 }
940 ir_bitset_incl(life_inputs, i);
941 }
942 }
943
944 if (new_merge_inputs == old_merge_inputs) {
945 /* All inputs are feasible */
946 if (life_inputs != &holder) {
947 ir_mem_free(life_inputs);
948 }
949 return 0;
950 }
951
952 for (i = new_merge_inputs + 1; i <= old_merge_inputs; i++) {
953 ir_insn_set_op(insn, i, IR_UNUSED);
954 }
955
956 if (new_merge_inputs <= 1) {
957#if 0
958 if (new_merge_inputs == 1
959 && insn->op == IR_LOOP_BEGIN
960 && insn->op1 > ref) { // TODO: check dominance instead of order
961 /* dead loop */
962 ir_use_list_remove_one(ctx, insn->op1, ref);
963 insn->op1 = IR_UNUSED;
964 new_merge_inputs = 0;
965 }
966#endif
967 insn->optx = IR_OPTX(IR_BEGIN, IR_VOID, 1);
968 ir_bitqueue_add(worklist, ref);
969 } else {
970 insn->inputs_count = new_merge_inputs;
971 }
972
973 /* Update PHIs */
974 use_list = &ctx->use_lists[ref];
975 if (use_list->count > 1) {
976 ir_ref use_count = 0;
977 ir_ref *q;
978
979 for (i = 0, p = q = &ctx->use_edges[use_list->refs]; i < use_list->count; p++, i++) {
980 ir_ref use = *p;
981 ir_insn *use_insn = &ctx->ir_base[use];
982
983 if (use_insn->op == IR_PHI) {
984 ir_ref j, k;
985
986 /* compress PHI */
987 for (j = k = 1; j <= old_merge_inputs; j++) {
988 ir_ref input = ir_insn_op(use_insn, j + 1);
989
990 if (ir_bitset_in(life_inputs, j)) {
991 IR_ASSERT(input);
992 if (k != j) {
993 ir_insn_set_op(use_insn, k + 1, input);
994 }
995 k++;
996 } else if (input > 0) {
997 ir_use_list_remove_one(ctx, input, use);
998 }
999 }
1000 while (k <= old_merge_inputs) {
1001 k++;
1002 ir_insn_set_op(use_insn, k, IR_UNUSED);
1003 }
1004
1005 if (new_merge_inputs == 0) {
1006 /* remove PHI */
1007#if 0
1008 use_insn->op1 = IR_UNUSED;
1009 ir_iter_remove_insn(ctx, use, worklist);
1010#else
1011 IR_ASSERT(0);
1012#endif
1013 continue;
1014 } else if (new_merge_inputs == 1) {
1015 /* replace PHI by COPY */
1016 use_insn->optx = IR_OPTX(IR_COPY, use_insn->type, 1);
1017 use_insn->op1 = use_insn->op2;
1018 use_insn->op2 = IR_UNUSED;
1019 ir_bitqueue_add(worklist, use);
1020 continue;
1021 } else {
1022 use_insn->inputs_count = new_merge_inputs + 1;
1023 }
1024 }
1025 if (p != q) {
1026 *q = use;
1027 }
1028 q++;
1029 use_count++;
1030 }
1031 for (i = use_count; i < use_list->count; q++, i++) {
1032 *q = IR_UNUSED;
1033 }
1034 use_list->count = use_count;
1035 }
1036
1037 if (life_inputs != &holder) {
1038 ir_mem_free(life_inputs);
1039 }
1040
1041 return 1;
1042}
1043
1044static IR_NEVER_INLINE void ir_sccp_transform(ir_ctx *ctx, ir_insn *_values, ir_bitqueue *worklist, ir_bitqueue *iter_worklist)
1045{
1046 ir_ref i, j;
1047 ir_insn *value;
1048
1049 for (i = 1, value = _values + i; i < ctx->insns_count; value++, i++) {
1050 if (value->op == IR_BOTTOM) {
1051 continue;
1052 } else if (IR_IS_CONST_OP(value->op)) {
1053 /* replace instruction by constant */
1054 j = ir_const(ctx, value->val, value->type);
1055 ir_sccp_replace_insn(ctx, _values, i, j, iter_worklist);
1056 } else if (IR_IS_SYM_CONST(value->op)) {
1057 /* replace instruction by constant */
1058 j = ir_const_ex(ctx, value->val, value->type, value->optx);
1059 ir_sccp_replace_insn(ctx, _values, i, j, iter_worklist);
1060#if IR_COMBO_COPY_PROPAGATION
1061 } else if (value->op == IR_COPY) {
1062 ir_sccp_replace_insn(ctx, _values, i, ir_sccp_identity(ctx, _values, value->op1), iter_worklist);
1063#endif
1064 } else if (value->op == IR_TOP) {
1065 /* remove unreachable instruction */
1066 ir_insn *insn = &ctx->ir_base[i];
1067
1068 if (insn->op == IR_NOP) {
1069 /* already removed */
1070 } else if (ir_op_flags[insn->op] & (IR_OP_FLAG_DATA|IR_OP_FLAG_MEM)) {
1071 if (insn->op != IR_PARAM) {
1072 ir_sccp_remove_insn(ctx, _values, i, iter_worklist);
1073 }
1074 } else {
1075 if (ir_op_flags[insn->op] & IR_OP_FLAG_TERMINATOR) {
1076 /* remove from terminators list */
1077 ir_ref prev = ctx->ir_base[1].op1;
1078 if (prev == i) {
1079 ctx->ir_base[1].op1 = insn->op3;
1080 } else {
1081 while (prev) {
1082 if (ctx->ir_base[prev].op3 == i) {
1083 ctx->ir_base[prev].op3 = insn->op3;
1084 break;
1085 }
1086 prev = ctx->ir_base[prev].op3;
1087 }
1088 }
1089 }
1090 ir_sccp_replace_insn(ctx, _values, i, IR_UNUSED, iter_worklist);
1091 }
1092 } else if (value->op == IR_IF) {
1093 /* remove one way IF/SWITCH */
1094 ir_sccp_remove_if(ctx, _values, i, value->op1);
1095 } else if (value->op == IR_MERGE) {
1096 /* schedule merge to remove unfeasible MERGE inputs */
1097 ir_bitqueue_add(worklist, i);
1098 }
1099 }
1100
1101 while ((i = ir_bitqueue_pop(worklist)) >= 0) {
1102 IR_ASSERT(_values[i].op == IR_MERGE);
1103 ir_sccp_remove_unfeasible_merge_inputs(ctx, i, &ctx->ir_base[i], iter_worklist);
1104 }
1105}
1106
1107/***************************/
1108/* Iterative Optimizations */
1109/***************************/
1110
1111/* Modification of some instruction may open new optimization oprtunities for other
1112 * instructions that use this one.
1113 *
1114 * For example, let "a = ADD(x, y)" became "a = ADD(x, C1)". In case we also have
1115 * "b = ADD(a, C2)" we may optimize it into "b = ADD(x, C1 + C2)" and then might
1116 * also remove "a".
1117 *
1118 * This implementation supports only few optimization of combinations from ir_fold.h
1119 *
1120 * TODO: Think abput a more general solution ???
1121 */
1122static void ir_iter_add_related_uses(ir_ctx *ctx, ir_ref ref, ir_bitqueue *worklist)
1123{
1124 ir_insn *insn = &ctx->ir_base[ref];
1125
1126 if (insn->op == IR_ADD || insn->op == IR_SUB) {
1127 ir_use_list *use_list = &ctx->use_lists[ref];
1128
1129 if (use_list->count == 1) {
1130 ir_ref use = ctx->use_edges[use_list->refs];
1131 ir_insn *use_insn = &ctx->ir_base[ref];
1132
1133 if (use_insn->op == IR_ADD || use_insn->op == IR_SUB) {
1134 ir_bitqueue_add(worklist, use);
1135 }
1136 }
1137 }
1138}
1139
1140static void ir_iter_remove_insn(ir_ctx *ctx, ir_ref ref, ir_bitqueue *worklist)
1141{
1142 ir_ref j, n, *p;
1143 ir_insn *insn;
1144
1145 CLEAR_USES(ref);
1146 insn = &ctx->ir_base[ref];
1147 n = insn->inputs_count;
1148 insn->opt = IR_NOP; /* keep "inputs_count" */
1149 for (j = 1, p = insn->ops + j; j <= n; j++, p++) {
1150 ir_ref input = *p;
1151 *p = IR_UNUSED;
1152 if (input > 0) {
1153 ir_use_list_remove_all(ctx, input, ref);
1154 if (ir_is_dead(ctx, input)) {
1155 /* schedule DCE */
1156 ir_bitqueue_add(worklist, input);
1157 } else if (ctx->ir_base[input].op == IR_PHI && ctx->use_lists[input].count == 1) {
1158 /* try to optimize PHI into ABS/MIN/MAX/COND */
1159 ir_bitqueue_add(worklist, ctx->ir_base[input].op1);
1160 }
1161 }
1162 }
1163}
1164
1165void ir_iter_replace(ir_ctx *ctx, ir_ref ref, ir_ref new_ref, ir_bitqueue *worklist)
1166{
1167 ir_ref i, j, n, *p, use;
1168 ir_insn *insn;
1169 ir_use_list *use_list;
1170
1171 IR_ASSERT(ref != new_ref);
1172
1173 use_list = &ctx->use_lists[ref];
1174 n = use_list->count;
1175 p = &ctx->use_edges[use_list->refs];
1176 if (new_ref <= 0) {
1177 /* constant or IR_UNUSED */
1178 for (; n; p++, n--) {
1179 use = *p;
1180 IR_ASSERT(use != ref);
1181 insn = &ctx->ir_base[use];
1182 i = ir_insn_find_op(insn, ref);
1183 IR_ASSERT(i > 0);
1184 ir_insn_set_op(insn, i, new_ref);
1185 /* schedule folding */
1186 ir_bitqueue_add(worklist, use);
1187 ir_iter_add_related_uses(ctx, use, worklist);
1188 }
1189 } else {
1190 for (j = 0; j < n; j++, p++) {
1191 use = *p;
1192 IR_ASSERT(use != ref);
1193 insn = &ctx->ir_base[use];
1194 i = ir_insn_find_op(insn, ref);
1195 IR_ASSERT(i > 0);
1196 ir_insn_set_op(insn, i, new_ref);
1197 if (ir_use_list_add(ctx, new_ref, use)) {
1198 /* restore after reallocation */
1199 use_list = &ctx->use_lists[ref];
1200 n = use_list->count;
1201 p = &ctx->use_edges[use_list->refs + j];
1202 }
1203 /* schedule folding */
1204 ir_bitqueue_add(worklist, use);
1205 }
1206 }
1207}
1208
1209static void ir_iter_replace_insn(ir_ctx *ctx, ir_ref ref, ir_ref new_ref, ir_bitqueue *worklist)
1210{
1211 ir_ref j, n, *p;
1212 ir_insn *insn;
1213
1214 insn = &ctx->ir_base[ref];
1215 n = insn->inputs_count;
1216 insn->opt = IR_NOP; /* keep "inputs_count" */
1217 for (j = 1, p = insn->ops + 1; j <= n; j++, p++) {
1218 ir_ref input = *p;
1219 *p = IR_UNUSED;
1220 if (input > 0) {
1221 ir_use_list_remove_all(ctx, input, ref);
1222 if (ir_is_dead(ctx, input)) {
1223 /* schedule DCE */
1224 ir_bitqueue_add(worklist, input);
1225 } else if (ctx->ir_base[input].op == IR_PHI && ctx->use_lists[input].count == 1) {
1226 /* try to optimize PHI into ABS/MIN/MAX/COND */
1227 ir_bitqueue_add(worklist, ctx->ir_base[input].op1);
1228 }
1229 }
1230 }
1231
1232 ir_iter_replace(ctx, ref, new_ref, worklist);
1233
1234 CLEAR_USES(ref);
1235}
1236
1237void ir_iter_update_op(ir_ctx *ctx, ir_ref ref, uint32_t idx, ir_ref new_val, ir_bitqueue *worklist)
1238{
1239 ir_insn *insn = &ctx->ir_base[ref];
1240 ir_ref old_val = ir_insn_op(insn, idx);
1241
1242 IR_ASSERT(old_val != new_val);
1243 if (!IR_IS_CONST_REF(new_val)) {
1244 ir_use_list_add(ctx, new_val, ref);
1245 }
1246 ir_insn_set_op(insn, idx, new_val);
1247 if (!IR_IS_CONST_REF(old_val)) {
1248 ir_use_list_remove_one(ctx, old_val, ref);
1249 if (ir_is_dead(ctx, old_val)) {
1250 /* schedule DCE */
1251 ir_bitqueue_add(worklist, old_val);
1252 }
1253 }
1254}
1255
1256static ir_ref ir_iter_find_cse1(ir_ctx *ctx, uint32_t optx, ir_ref op1)
1257{
1259
1260 ir_use_list *use_list = &ctx->use_lists[op1];
1261 ir_ref *p, n = use_list->count;
1262
1263 for (p = ctx->use_edges + use_list->refs; n > 0; p++, n--) {
1264 ir_ref use = *p;
1265 ir_insn *use_insn = &ctx->ir_base[use];
1266
1267 if (use_insn->optx == optx) {
1268 IR_ASSERT(use_insn->op1 == op1);
1269 return use;
1270 }
1271 }
1272 return IR_UNUSED;
1273}
1274
1275static ir_ref ir_iter_find_cse(ir_ctx *ctx, ir_ref ref, uint32_t opt, ir_ref op1, ir_ref op2, ir_ref op3, ir_bitqueue *worklist)
1276{
1278 ir_use_list *use_list = NULL;
1279 ir_ref *p, use;
1280 ir_insn *use_insn;
1281
1282 if (n == 2) {
1283 if (!IR_IS_CONST_REF(op1)) {
1284 use_list = &ctx->use_lists[op1];
1285 }
1286 if (!IR_IS_CONST_REF(op2) && (!use_list || use_list->count > ctx->use_lists[op2].count)) {
1287 use_list = &ctx->use_lists[op2];
1288 }
1289 if (use_list) {
1290 n = use_list->count;
1291 for (p = ctx->use_edges + use_list->refs; n > 0; p++, n--) {
1292 use = *p;
1293 if (use != ref) {
1294 use_insn = &ctx->ir_base[use];
1295 if (use_insn->opt == opt && use_insn->op1 == op1 && use_insn->op2 == op2) {
1296 IR_ASSERT(use_insn->op3 == op3);
1297 if (use < ref) {
1298 return use;
1299 } else {
1300 ir_bitqueue_add(worklist, use);
1301 }
1302 }
1303 }
1304 }
1305 }
1306 } else if (n < 2) {
1307 IR_ASSERT(n == 1);
1308 if (!IR_IS_CONST_REF(op1)) {
1309 use_list = &ctx->use_lists[op1];
1310 n = use_list->count;
1311 for (p = ctx->use_edges + use_list->refs; n > 0; p++, n--) {
1312 use = *p;
1313 if (use != ref) {
1314 use_insn = &ctx->ir_base[use];
1315 if (use_insn->opt == opt) {
1316 IR_ASSERT(use_insn->op1 == op1);
1317 IR_ASSERT(use_insn->op2 == op2);
1318 IR_ASSERT(use_insn->op3 == op3);
1319 if (use < ref) {
1320 return use;
1321 } else {
1322 ir_bitqueue_add(worklist, use);
1323 }
1324 }
1325 }
1326 }
1327 }
1328 } else {
1329 IR_ASSERT(n == 3);
1330 if (!IR_IS_CONST_REF(op1)) {
1331 use_list = &ctx->use_lists[op1];
1332 }
1333 if (!IR_IS_CONST_REF(op2) && (!use_list || use_list->count > ctx->use_lists[op2].count)) {
1334 use_list = &ctx->use_lists[op2];
1335 }
1336 if (!IR_IS_CONST_REF(op3) && (!use_list || use_list->count > ctx->use_lists[op3].count)) {
1337 use_list = &ctx->use_lists[op3];
1338 }
1339 if (use_list) {
1340 n = use_list->count;
1341 for (p = ctx->use_edges + use_list->refs; n > 0; p++, n--) {
1342 use = *p;
1343 if (use != ref) {
1344 use_insn = &ctx->ir_base[use];
1345 if (use_insn->opt == opt && use_insn->op1 == op1 && use_insn->op2 == op2 && use_insn->op3 == op3) {
1346 if (use < ref) {
1347 return use;
1348 } else {
1349 ir_bitqueue_add(worklist, use);
1350 }
1351 }
1352 }
1353 }
1354 }
1355 }
1356 return IR_UNUSED;
1357}
1358
1359static void ir_iter_fold(ir_ctx *ctx, ir_ref ref, ir_bitqueue *worklist)
1360{
1361 uint32_t opt;
1362 ir_ref op1, op2, op3, copy;
1363 ir_insn *op1_insn, *op2_insn, *op3_insn, *insn;
1364
1365 insn = &ctx->ir_base[ref];
1366 opt = insn->opt;
1367 op1 = insn->op1;
1368 op2 = insn->op2;
1369 op3 = insn->op3;
1370
1371restart:
1372 op1_insn = ctx->ir_base + op1;
1373 op2_insn = ctx->ir_base + op2;
1374 op3_insn = ctx->ir_base + op3;
1375
1376 switch (ir_folding(ctx, opt, op1, op2, op3, op1_insn, op2_insn, op3_insn)) {
1377 case IR_FOLD_DO_RESTART:
1378 opt = ctx->fold_insn.optx;
1379 op1 = ctx->fold_insn.op1;
1380 op2 = ctx->fold_insn.op2;
1381 op3 = ctx->fold_insn.op3;
1382 goto restart;
1383 case IR_FOLD_DO_CSE:
1384 copy = ir_iter_find_cse(ctx, ref, ctx->fold_insn.opt,
1385 ctx->fold_insn.op1, ctx->fold_insn.op2, ctx->fold_insn.op3, worklist);
1386 if (copy) {
1387 ir_iter_replace_insn(ctx, ref, copy, worklist);
1388 break;
1389 }
1391 case IR_FOLD_DO_EMIT:
1392 insn = &ctx->ir_base[ref];
1393 if (insn->opt != ctx->fold_insn.opt
1394 || insn->op1 != ctx->fold_insn.op1
1395 || insn->op2 != ctx->fold_insn.op2
1396 || insn->op3 != ctx->fold_insn.op3) {
1397
1398 ir_use_list *use_list;
1399 ir_ref n, j, *p, use;
1400
1401 insn->optx = ctx->fold_insn.opt;
1403 insn->inputs_count = IR_INPUT_EDGES_COUNT(ir_op_flags[opt & IR_OPT_OP_MASK]);
1404 if (insn->op1 != ctx->fold_insn.op1) {
1405 if (insn->op1 > 0) {
1406 ir_use_list_remove_one(ctx, insn->op1, ref);
1407 }
1408 if (ctx->fold_insn.op1 > 0) {
1409 ir_use_list_add(ctx, ctx->fold_insn.op1, ref);
1410 }
1411 }
1412 if (insn->op2 != ctx->fold_insn.op2) {
1413 if (insn->op2 > 0) {
1414 ir_use_list_remove_one(ctx, insn->op2, ref);
1415 }
1416 if (ctx->fold_insn.op2 > 0) {
1417 ir_use_list_add(ctx, ctx->fold_insn.op2, ref);
1418 }
1419 }
1420 if (insn->op3 != ctx->fold_insn.op3) {
1421 if (insn->op3 > 0) {
1422 ir_use_list_remove_one(ctx, insn->op3, ref);
1423 }
1424 if (ctx->fold_insn.op3 > 0) {
1425 ir_use_list_add(ctx, ctx->fold_insn.op3, ref);
1426 }
1427 }
1428 insn->op1 = ctx->fold_insn.op1;
1429 insn->op2 = ctx->fold_insn.op2;
1430 insn->op3 = ctx->fold_insn.op3;
1431
1432 use_list = &ctx->use_lists[ref];
1433 n = use_list->count;
1434 for (j = 0, p = &ctx->use_edges[use_list->refs]; j < n; j++, p++) {
1435 use = *p;
1436 ir_bitqueue_add(worklist, use);
1437 }
1438 }
1439 break;
1440 case IR_FOLD_DO_COPY:
1441 op1 = ctx->fold_insn.op1;
1442 ir_iter_replace_insn(ctx, ref, op1, worklist);
1443 break;
1444 case IR_FOLD_DO_CONST:
1445 op1 = ir_const(ctx, ctx->fold_insn.val, ctx->fold_insn.type);
1446 ir_iter_replace_insn(ctx, ref, op1, worklist);
1447 break;
1448 default:
1449 IR_ASSERT(0);
1450 break;
1451 }
1452}
1453
1454static bool ir_may_promote_d2f(ir_ctx *ctx, ir_ref ref)
1455{
1456 ir_insn *insn = &ctx->ir_base[ref];
1457
1458 IR_ASSERT(insn->type == IR_DOUBLE);
1459 if (IR_IS_CONST_REF(ref)) {
1460 return !IR_IS_SYM_CONST(insn->op) && insn->val.d == (double)(float)insn->val.d;
1461 } else {
1462 switch (insn->op) {
1463 case IR_FP2FP:
1464 return 1;
1465// case IR_INT2FP:
1466// return ctx->use_lists[ref].count == 1;
1467 case IR_NEG:
1468 case IR_ABS:
1469 return ctx->use_lists[ref].count == 1 &&
1470 ir_may_promote_d2f(ctx, insn->op1);
1471 case IR_ADD:
1472 case IR_SUB:
1473 case IR_MUL:
1474 case IR_DIV:
1475 case IR_MIN:
1476 case IR_MAX:
1477 return ctx->use_lists[ref].count == 1 &&
1478 ir_may_promote_d2f(ctx, insn->op1) &&
1479 ir_may_promote_d2f(ctx, insn->op2);
1480 default:
1481 break;
1482 }
1483 }
1484 return 0;
1485}
1486
1487static bool ir_may_promote_f2d(ir_ctx *ctx, ir_ref ref)
1488{
1489 ir_insn *insn = &ctx->ir_base[ref];
1490
1491 IR_ASSERT(insn->type == IR_FLOAT);
1492 if (IR_IS_CONST_REF(ref)) {
1493 return !IR_IS_SYM_CONST(insn->op) && insn->val.f == (float)(double)insn->val.f;
1494 } else {
1495 switch (insn->op) {
1496 case IR_FP2FP:
1497 return 1;
1498 case IR_INT2FP:
1499 return ctx->use_lists[ref].count == 1;
1500 case IR_NEG:
1501 case IR_ABS:
1502 return ctx->use_lists[ref].count == 1 &&
1503 ir_may_promote_f2d(ctx, insn->op1);
1504 case IR_ADD:
1505 case IR_SUB:
1506 case IR_MUL:
1507// case IR_DIV:
1508 case IR_MIN:
1509 case IR_MAX:
1510 return ctx->use_lists[ref].count == 1 &&
1511 ir_may_promote_f2d(ctx, insn->op1) &&
1512 ir_may_promote_f2d(ctx, insn->op2);
1513 default:
1514 break;
1515 }
1516 }
1517 return 0;
1518}
1519
1520static ir_ref ir_promote_d2f(ir_ctx *ctx, ir_ref ref, ir_ref use, ir_bitqueue *worklist)
1521{
1522 ir_insn *insn = &ctx->ir_base[ref];
1523 uint32_t count;
1524
1525 IR_ASSERT(insn->type == IR_DOUBLE);
1526 if (IR_IS_CONST_REF(ref)) {
1527 return ir_const_float(ctx, (float)insn->val.d);
1528 } else {
1529 ir_bitqueue_add(worklist, ref);
1530 switch (insn->op) {
1531 case IR_FP2FP:
1532 count = ctx->use_lists[ref].count;
1533 ir_use_list_remove_all(ctx, ref, use);
1534 if (ctx->use_lists[ref].count == 0) {
1535 ir_use_list_replace_one(ctx, insn->op1, ref, use);
1536 if (count > 1) {
1537 do {
1538 ir_use_list_add(ctx, insn->op1, use);
1539 } while (--count > 1);
1540 }
1541 ref = insn->op1;
1542 MAKE_NOP(insn);
1543 return ref;
1544 } else {
1545 ir_use_list_add(ctx, insn->op1, use);
1546 count -= ctx->use_lists[ref].count;
1547 if (count > 1) {
1548 do {
1549 ir_use_list_add(ctx, insn->op1, use);
1550 } while (--count > 1);
1551 }
1552 }
1553 return insn->op1;
1554// case IR_INT2FP:
1555// insn->type = IR_FLOAT;
1556// return ref;
1557 case IR_NEG:
1558 case IR_ABS:
1559 insn->op1 = ir_promote_d2f(ctx, insn->op1, ref, worklist);
1560 insn->type = IR_FLOAT;
1561 return ref;
1562 case IR_ADD:
1563 case IR_SUB:
1564 case IR_MUL:
1565 case IR_DIV:
1566 case IR_MIN:
1567 case IR_MAX:
1568 if (insn->op1 == insn->op2) {
1569 insn->op2 = insn->op1 = ir_promote_d2f(ctx, insn->op1, ref, worklist);
1570 } else {
1571 insn->op1 = ir_promote_d2f(ctx, insn->op1, ref, worklist);
1572 insn->op2 = ir_promote_d2f(ctx, insn->op2, ref, worklist);
1573 }
1574 insn->type = IR_FLOAT;
1575 return ref;
1576 default:
1577 break;
1578 }
1579 }
1580 IR_ASSERT(0);
1581 return ref;
1582}
1583
1584static ir_ref ir_promote_f2d(ir_ctx *ctx, ir_ref ref, ir_ref use, ir_bitqueue *worklist)
1585{
1586 ir_insn *insn = &ctx->ir_base[ref];
1587 uint32_t count;
1588 ir_ref old_ref;
1589
1590 IR_ASSERT(insn->type == IR_FLOAT);
1591 if (IR_IS_CONST_REF(ref)) {
1592 return ir_const_double(ctx, (double)insn->val.f);
1593 } else {
1594 ir_bitqueue_add(worklist, ref);
1595 switch (insn->op) {
1596 case IR_FP2FP:
1597 count = ctx->use_lists[ref].count;
1598 ir_use_list_remove_all(ctx, ref, use);
1599 if (ctx->use_lists[ref].count == 0) {
1600 ir_use_list_replace_one(ctx, insn->op1, ref, use);
1601 if (count > 1) {
1602 do {
1603 ir_use_list_add(ctx, insn->op1, use);
1604 } while (--count > 1);
1605 }
1606 ref = insn->op1;
1607 MAKE_NOP(insn);
1608 return ref;
1609 } else {
1610 ir_use_list_add(ctx, insn->op1, use);
1611 count -= ctx->use_lists[ref].count;
1612 if (count > 1) {
1613 do {
1614 ir_use_list_add(ctx, insn->op1, use);
1615 } while (--count > 1);
1616 }
1617 }
1618 return insn->op1;
1619 case IR_INT2FP:
1620 old_ref = ir_iter_find_cse1(ctx, IR_OPTX(IR_INT2FP, IR_DOUBLE, 1), insn->op1);
1621 if (old_ref) {
1622 IR_ASSERT(ctx->use_lists[ref].count == 1);
1623 ir_use_list_remove_one(ctx, insn->op1, ref);
1624 CLEAR_USES(ref);
1625 MAKE_NOP(insn);
1626 ir_use_list_add(ctx, old_ref, use);
1627 return old_ref;
1628 }
1629 insn->type = IR_DOUBLE;
1630 return ref;
1631 case IR_NEG:
1632 case IR_ABS:
1633 insn->op1 = ir_promote_f2d(ctx, insn->op1, ref, worklist);
1634 insn->type = IR_DOUBLE;
1635 return ref;
1636 case IR_ADD:
1637 case IR_SUB:
1638 case IR_MUL:
1639// case IR_DIV:
1640 case IR_MIN:
1641 case IR_MAX:
1642 if (insn->op1 == insn->op2) {
1643 insn->op2 = insn->op1 = ir_promote_f2d(ctx, insn->op1, ref, worklist);
1644 } else {
1645 insn->op1 = ir_promote_f2d(ctx, insn->op1, ref, worklist);
1646 insn->op2 = ir_promote_f2d(ctx, insn->op2, ref, worklist);
1647 }
1648 insn->type = IR_DOUBLE;
1649 return ref;
1650 default:
1651 break;
1652 }
1653 }
1654 IR_ASSERT(0);
1655 return ref;
1656}
1657
1658static bool ir_may_promote_trunc(ir_ctx *ctx, ir_type type, ir_ref ref)
1659{
1660 ir_insn *insn = &ctx->ir_base[ref];
1661 ir_ref *p, n, input;
1662
1663 if (IR_IS_CONST_REF(ref)) {
1664 return !IR_IS_SYM_CONST(insn->op);
1665 } else {
1666 switch (insn->op) {
1667 case IR_ZEXT:
1668 case IR_SEXT:
1669 case IR_TRUNC:
1670 return ctx->ir_base[insn->op1].type == type || ctx->use_lists[ref].count == 1;
1671 case IR_NEG:
1672 case IR_ABS:
1673 case IR_NOT:
1674 return ctx->use_lists[ref].count == 1 &&
1675 ir_may_promote_trunc(ctx, type, insn->op1);
1676 case IR_ADD:
1677 case IR_SUB:
1678 case IR_MUL:
1679 case IR_MIN:
1680 case IR_MAX:
1681 case IR_OR:
1682 case IR_AND:
1683 case IR_XOR:
1684 case IR_SHL:
1685 return ctx->use_lists[ref].count == 1 &&
1686 ir_may_promote_trunc(ctx, type, insn->op1) &&
1687 ir_may_promote_trunc(ctx, type, insn->op2);
1688// case IR_SHR:
1689// case IR_SAR:
1690// case IR_DIV:
1691// case IR_MOD:
1692// case IR_FP2INT:
1693// TODO: ???
1694 case IR_COND:
1695 return ctx->use_lists[ref].count == 1 &&
1696 ir_may_promote_trunc(ctx, type, insn->op2) &&
1697 ir_may_promote_trunc(ctx, type, insn->op3);
1698 case IR_PHI:
1699 if (ctx->use_lists[ref].count != 1) {
1700 ir_use_list *use_list = &ctx->use_lists[ref];
1701 ir_ref count = 0;
1702
1703 for (p = &ctx->use_edges[use_list->refs], n = use_list->count; n > 0; p++, n--) {
1704 if (*p != ref) {
1705 if (count) {
1706 return 0;
1707 }
1708 count = 1;
1709 }
1710 }
1711 }
1712 for (p = insn->ops + 2, n = insn->inputs_count - 1; n > 0; p++, n--) {
1713 input = *p;
1714 if (input != ref) {
1715 if (!ir_may_promote_trunc(ctx, type, input)) {
1716 return 0;
1717 }
1718 }
1719 }
1720 return 1;
1721 default:
1722 break;
1723 }
1724 }
1725 return 0;
1726}
1727
1728static ir_ref ir_promote_i2i(ir_ctx *ctx, ir_type type, ir_ref ref, ir_ref use, ir_bitqueue *worklist)
1729{
1730 ir_insn *insn = &ctx->ir_base[ref];
1731 uint32_t count;
1732 ir_ref *p, n, input;
1733
1734 if (IR_IS_CONST_REF(ref)) {
1735 return ir_const(ctx, insn->val, type);
1736 } else {
1737 ir_bitqueue_add(worklist, ref);
1738 switch (insn->op) {
1739 case IR_ZEXT:
1740 case IR_SEXT:
1741 case IR_TRUNC:
1742 if (ctx->ir_base[insn->op1].type != type) {
1743 ir_type src_type = ctx->ir_base[insn->op1].type;
1744 if (ir_type_size[src_type] == ir_type_size[type]) {
1745 insn->op = IR_BITCAST;
1746 } else if (ir_type_size[src_type] > ir_type_size[type]) {
1747 insn->op = IR_TRUNC;
1748 } else {
1749 if (insn->op != IR_SEXT && insn->op != IR_ZEXT) {
1750 insn->op = IR_IS_TYPE_SIGNED(type) ? IR_SEXT : IR_ZEXT;
1751 }
1752 }
1753 insn->type = type;
1754 return ref;
1755 }
1756
1757 count = ctx->use_lists[ref].count;
1758 ir_use_list_remove_all(ctx, ref, use);
1759 if (ctx->use_lists[ref].count == 0) {
1760 ir_use_list_replace_one(ctx, insn->op1, ref, use);
1761 if (count > 1) {
1762 do {
1763 ir_use_list_add(ctx, insn->op1, use);
1764 } while (--count > 1);
1765 }
1766 ref = insn->op1;
1767 MAKE_NOP(insn);
1768 return ref;
1769 } else {
1770 ir_use_list_add(ctx, insn->op1, use);
1771 count -= ctx->use_lists[ref].count;
1772 if (count > 1) {
1773 do {
1774 ir_use_list_add(ctx, insn->op1, use);
1775 } while (--count > 1);
1776 }
1777 }
1778 return insn->op1;
1779 case IR_NEG:
1780 case IR_ABS:
1781 case IR_NOT:
1782 insn->op1 = ir_promote_i2i(ctx, type, insn->op1, ref, worklist);
1783 insn->type = type;
1784 return ref;
1785 case IR_ADD:
1786 case IR_SUB:
1787 case IR_MUL:
1788 case IR_MIN:
1789 case IR_MAX:
1790 case IR_OR:
1791 case IR_AND:
1792 case IR_XOR:
1793 case IR_SHL:
1794 if (insn->op1 == insn->op2) {
1795 insn->op2 = insn->op1 = ir_promote_i2i(ctx, type, insn->op1, ref, worklist);
1796 } else {
1797 insn->op1 = ir_promote_i2i(ctx, type, insn->op1, ref, worklist);
1798 insn->op2 = ir_promote_i2i(ctx, type, insn->op2, ref, worklist);
1799 }
1800 insn->type = type;
1801 return ref;
1802// case IR_DIV:
1803// case IR_MOD:
1804// case IR_SHR:
1805// case IR_SAR:
1806// case IR_FP2INT:
1807// TODO: ???
1808 case IR_COND:
1809 if (insn->op2 == insn->op3) {
1810 insn->op3 = insn->op2 = ir_promote_i2i(ctx, type, insn->op2, ref, worklist);
1811 } else {
1812 insn->op2 = ir_promote_i2i(ctx, type, insn->op2, ref, worklist);
1813 insn->op3 = ir_promote_i2i(ctx, type, insn->op3, ref, worklist);
1814 }
1815 insn->type = type;
1816 return ref;
1817 case IR_PHI:
1818 for (p = insn->ops + 2, n = insn->inputs_count - 1; n > 0; p++, n--) {
1819 input = *p;
1820 if (input != ref) {
1821 *p = ir_promote_i2i(ctx, type, input, ref, worklist);
1822 }
1823 }
1824 insn->type = type;
1825 return ref;
1826 default:
1827 break;
1828 }
1829 }
1830 IR_ASSERT(0);
1831 return ref;
1832}
1833
1834static ir_ref ir_ext_const(ir_ctx *ctx, ir_insn *val_insn, ir_op op, ir_type type)
1835{
1836 ir_val new_val;
1837
1838 switch (val_insn->type) {
1839 default:
1840 IR_ASSERT(0);
1841 case IR_I8:
1842 case IR_U8:
1843 case IR_BOOL:
1844 if (op == IR_SEXT) {
1845 new_val.i64 = (int64_t)val_insn->val.i8;
1846 } else {
1847 new_val.u64 = (uint64_t)val_insn->val.u8;
1848 }
1849 break;
1850 case IR_I16:
1851 case IR_U16:
1852 if (op == IR_SEXT) {
1853 new_val.i64 = (int64_t)val_insn->val.i16;
1854 } else {
1855 new_val.u64 = (uint64_t)val_insn->val.u16;
1856 }
1857 break;
1858 case IR_I32:
1859 case IR_U32:
1860 if (op == IR_SEXT) {
1861 new_val.i64 = (int64_t)val_insn->val.i32;
1862 } else {
1863 new_val.u64 = (uint64_t)val_insn->val.u32;
1864 }
1865 break;
1866 }
1867 return ir_const(ctx, new_val, type);
1868}
1869
1870static ir_ref ir_ext_ref(ir_ctx *ctx, ir_ref var_ref, ir_ref src_ref, ir_op op, ir_type type, ir_bitqueue *worklist)
1871{
1872 uint32_t optx = IR_OPTX(op, type, 1);
1873 ir_ref ref;
1874
1875 if (!IR_IS_CONST_REF(src_ref)) {
1876 ref = ir_iter_find_cse1(ctx, optx, src_ref);
1877 if (ref) {
1878 ir_use_list_add(ctx, ref, var_ref);
1879 if (!IR_IS_CONST_REF(src_ref)) {
1880 ir_use_list_remove_one(ctx, src_ref, var_ref);
1881 }
1882 ir_bitqueue_add(worklist, ref);
1883 return ref;
1884 }
1885 }
1886
1887 ref = ir_emit1(ctx, optx, src_ref);
1888 ir_use_list_add(ctx, ref, var_ref);
1889 if (!IR_IS_CONST_REF(src_ref)) {
1890 ir_use_list_replace_one(ctx, src_ref, var_ref, ref);
1891 }
1892 ir_bitqueue_grow(worklist, ref + 1);
1893 ir_bitqueue_add(worklist, ref);
1894 return ref;
1895}
1896
1897static uint32_t _ir_estimated_control(ir_ctx *ctx, ir_ref val)
1898{
1899 ir_insn *insn;
1900 ir_ref n, *p, input, result, ctrl;
1901
1902 if (IR_IS_CONST_REF(val)) {
1903 return 1; /* IR_START */
1904 }
1905
1906 insn = &ctx->ir_base[val];
1907 if (ir_op_flags[insn->op] & (IR_OP_FLAG_CONTROL|IR_OP_FLAG_MEM)) {
1908 return val;
1909 }
1910
1912 if (IR_OPND_KIND(ir_op_flags[insn->op], 1) == IR_OPND_CONTROL_DEP) {
1913 return insn->op1;
1914 }
1915
1916 n = insn->inputs_count;
1917 p = insn->ops + 1;
1918
1919 result = 1;
1920 for (; n > 0; p++, n--) {
1921 input = *p;
1922 ctrl = _ir_estimated_control(ctx, input);
1923 if (ctrl > result) { // TODO: check dominance depth instead of order
1924 result = ctrl;
1925 }
1926 }
1927 return result;
1928}
1929
1930static bool ir_is_loop_invariant(ir_ctx *ctx, ir_ref ref, ir_ref loop)
1931{
1932 ref = _ir_estimated_control(ctx, ref);
1933 return ref < loop; // TODO: check dominance instead of order
1934}
1935
1936static bool ir_is_cheaper_ext(ir_ctx *ctx, ir_ref ref, ir_ref loop, ir_ref ext_ref, ir_op op)
1937{
1938 if (IR_IS_CONST_REF(ref)) {
1939 return 1;
1940 } else {
1941 ir_insn *insn = &ctx->ir_base[ref];
1942
1943 if (insn->op == IR_LOAD) {
1944 if (ir_is_loop_invariant(ctx, ref, loop)) {
1945 return 1;
1946 } else {
1947 /* ZEXT(LOAD(_, _)) costs the same as LOAD(_, _) */
1948 if (ctx->use_lists[ref].count == 2) {
1949 return 1;
1950 } else if (ctx->use_lists[ref].count == 3) {
1951 ir_use_list *use_list = &ctx->use_lists[ref];
1952 ir_ref *p, n, use;
1953
1954 for (p = &ctx->use_edges[use_list->refs], n = use_list->count; n > 0; p++, n--) {
1955 use = *p;
1956 if (use != ext_ref) {
1957 ir_insn *use_insn = &ctx->ir_base[use];
1958
1959 if (use_insn->op != op
1960 && (!(ir_op_flags[use_insn->op] & (IR_OP_FLAG_CONTROL|IR_OP_FLAG_MEM))
1961 || use_insn->op1 != ref)) {
1962 return 0;
1963 }
1964 }
1965 }
1966 return 1;
1967 }
1968 }
1969 return 0;
1970 } else {
1971 return ir_is_loop_invariant(ctx, ref, loop);
1972 }
1973 }
1974}
1975
1976static bool ir_try_promote_induction_var_ext(ir_ctx *ctx, ir_ref ext_ref, ir_ref phi_ref, ir_ref op_ref, ir_bitqueue *worklist)
1977{
1978 ir_op op = ctx->ir_base[ext_ref].op;
1979 ir_type type = ctx->ir_base[ext_ref].type;
1980 ir_insn *phi_insn;
1981 ir_use_list *use_list;
1982 ir_ref n, *p, use, ext_ref_2 = IR_UNUSED;
1983
1984 /* Check if we may change the type of the induction variable */
1985 use_list = &ctx->use_lists[phi_ref];
1986 n = use_list->count;
1987 if (n > 1) {
1988 for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) {
1989 use = *p;
1990 if (use == op_ref || use == ext_ref) {
1991 continue;
1992 } else {
1993 ir_insn *use_insn = &ctx->ir_base[use];
1994
1995 if (use_insn->op >= IR_EQ && use_insn->op <= IR_UGT) {
1996 if (use_insn->op1 == phi_ref) {
1997 if (ir_is_cheaper_ext(ctx, use_insn->op2, ctx->ir_base[phi_ref].op1, ext_ref, op)) {
1998 continue;
1999 }
2000 } else if (use_insn->op2 == phi_ref) {
2001 if (ir_is_cheaper_ext(ctx, use_insn->op1, ctx->ir_base[phi_ref].op1, ext_ref, op)) {
2002 continue;
2003 }
2004 }
2005 return 0;
2006 } else if (use_insn->op == IR_IF) {
2007 continue;
2008 } else if (!ext_ref_2 && use_insn->op == op && use_insn->type == type) {
2009 ext_ref_2 = use;
2010 continue;
2011 } else {
2012 return 0;
2013 }
2014 }
2015 }
2016 }
2017
2018 use_list = &ctx->use_lists[op_ref];
2019 n = use_list->count;
2020 if (n > 1) {
2021 for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) {
2022 use = *p;
2023 if (use == phi_ref || use == ext_ref) {
2024 continue;
2025 } else {
2026 ir_insn *use_insn = &ctx->ir_base[use];
2027
2028 if (use_insn->op >= IR_EQ && use_insn->op <= IR_UGT) {
2029 if (use_insn->op1 == phi_ref) {
2030 if (ir_is_cheaper_ext(ctx, use_insn->op2, ctx->ir_base[phi_ref].op1, ext_ref, op)) {
2031 continue;
2032 }
2033 } else if (use_insn->op2 == phi_ref) {
2034 if (ir_is_cheaper_ext(ctx, use_insn->op1, ctx->ir_base[phi_ref].op1, ext_ref, op)) {
2035 continue;
2036 }
2037 }
2038 return 0;
2039 } else if (use_insn->op == IR_IF) {
2040 continue;
2041 } else if (!ext_ref_2 && use_insn->op == op && use_insn->type == type) {
2042 ext_ref_2 = use;
2043 continue;
2044 } else {
2045 return 0;
2046 }
2047 }
2048 }
2049 }
2050
2051 for (n = 0; n < ctx->use_lists[phi_ref].count; n++) {
2052 /* "use_lists" may be reallocated by ir_ext_ref() */
2053 use = ctx->use_edges[ctx->use_lists[phi_ref].refs + n];
2054 if (use == ext_ref) {
2055 continue;
2056 } else {
2057 ir_insn *use_insn = &ctx->ir_base[use];
2058
2059 if (use_insn->op == IR_IF) {
2060 continue;
2061 } else if (use_insn->op == op) {
2062 IR_ASSERT(ext_ref_2 == use);
2063 continue;
2064 }
2065 IR_ASSERT(((use_insn->op >= IR_EQ && use_insn->op <= IR_UGT)
2066 || use_insn->op == IR_ADD || use_insn->op == IR_SUB || use_insn->op == IR_MUL)
2067 && (use_insn->op1 == phi_ref || use_insn->op2 == phi_ref));
2068 if (use_insn->op1 != phi_ref) {
2069 if (IR_IS_CONST_REF(use_insn->op1)
2070 && !IR_IS_SYM_CONST(ctx->ir_base[use_insn->op1].op)) {
2071 ctx->ir_base[use].op1 = ir_ext_const(ctx, &ctx->ir_base[use_insn->op1], op, type);
2072 } else {
2073 ctx->ir_base[use].op1 = ir_ext_ref(ctx, use, use_insn->op1, op, type, worklist);
2074 }
2075 ir_bitqueue_add(worklist, use);
2076 }
2077 if (use_insn->op2 != phi_ref) {
2078 if (IR_IS_CONST_REF(use_insn->op2)
2079 && !IR_IS_SYM_CONST(ctx->ir_base[use_insn->op2].op)) {
2080 ctx->ir_base[use].op2 = ir_ext_const(ctx, &ctx->ir_base[use_insn->op2], op, type);
2081 } else {
2082 ctx->ir_base[use].op2 = ir_ext_ref(ctx, use, use_insn->op2, op, type, worklist);
2083 }
2084 ir_bitqueue_add(worklist, use);
2085 }
2086 }
2087 }
2088
2089 if (ctx->use_lists[op_ref].count > 1) {
2090 for (n = 0; n < ctx->use_lists[op_ref].count; n++) {
2091 /* "use_lists" may be reallocated by ir_ext_ref() */
2092 use = ctx->use_edges[ctx->use_lists[op_ref].refs + n];
2093 if (use == ext_ref || use == phi_ref) {
2094 continue;
2095 } else {
2096 ir_insn *use_insn = &ctx->ir_base[use];
2097
2098 if (use_insn->op == IR_IF) {
2099 continue;
2100 } else if (use_insn->op == op) {
2101 IR_ASSERT(ext_ref_2 == use);
2102 continue;
2103 }
2104 IR_ASSERT(use_insn->op >= IR_EQ && use_insn->op <= IR_UGT);
2105 if (use_insn->op1 != op_ref) {
2106 if (IR_IS_CONST_REF(use_insn->op1)
2107 && !IR_IS_SYM_CONST(ctx->ir_base[use_insn->op1].op)) {
2108 ctx->ir_base[use].op1 = ir_ext_const(ctx, &ctx->ir_base[use_insn->op1], op, type);
2109 } else {
2110 ctx->ir_base[use].op1 = ir_ext_ref(ctx, use, use_insn->op1, op, type, worklist);
2111 }
2112 ir_bitqueue_add(worklist, use);
2113 }
2114 if (use_insn->op2 != op_ref) {
2115 if (IR_IS_CONST_REF(use_insn->op2)
2116 && !IR_IS_SYM_CONST(ctx->ir_base[use_insn->op2].op)) {
2117 ctx->ir_base[use].op2 = ir_ext_const(ctx, &ctx->ir_base[use_insn->op2], op, type);
2118 } else {
2119 ctx->ir_base[use].op2 = ir_ext_ref(ctx, use, use_insn->op2, op, type, worklist);
2120 }
2121 ir_bitqueue_add(worklist, use);
2122 }
2123 }
2124 }
2125 }
2126
2127 ir_iter_replace_insn(ctx, ext_ref, ctx->ir_base[ext_ref].op1, worklist);
2128
2129 if (ext_ref_2) {
2130 ir_iter_replace_insn(ctx, ext_ref_2, ctx->ir_base[ext_ref_2].op1, worklist);
2131 }
2132
2133 ctx->ir_base[op_ref].type = type;
2134
2135 phi_insn = &ctx->ir_base[phi_ref];
2136 phi_insn->type = type;
2137 if (IR_IS_CONST_REF(phi_insn->op2)
2138 && !IR_IS_SYM_CONST(ctx->ir_base[phi_insn->op2].op)) {
2139 ctx->ir_base[phi_ref].op2 = ir_ext_const(ctx, &ctx->ir_base[phi_insn->op2], op, type);
2140 } else {
2141 ctx->ir_base[phi_ref].op2 = ir_ext_ref(ctx, phi_ref, phi_insn->op2, op, type, worklist);
2142 }
2143
2144 return 1;
2145}
2146
2147static bool ir_try_promote_ext(ir_ctx *ctx, ir_ref ext_ref, ir_insn *insn, ir_bitqueue *worklist)
2148 {
2149 ir_ref ref = insn->op1;
2150
2151 /* Check for simple induction variable in the form: x2 = PHI(loop, x1, x3); x3 = ADD(x2, _); */
2152 insn = &ctx->ir_base[ref];
2153 if (insn->op == IR_PHI
2154 && insn->inputs_count == 3 /* (2 values) */
2155 && ctx->ir_base[insn->op1].op == IR_LOOP_BEGIN) {
2156 ir_ref op_ref = insn->op3;
2157 ir_insn *op_insn = &ctx->ir_base[op_ref];
2158
2159 if (op_insn->op == IR_ADD || op_insn->op == IR_SUB || op_insn->op == IR_MUL) {
2160 if (op_insn->op1 == ref) {
2161 if (ir_is_loop_invariant(ctx, op_insn->op2, insn->op1)) {
2162 return ir_try_promote_induction_var_ext(ctx, ext_ref, ref, op_ref, worklist);
2163 }
2164 } else if (op_insn->op2 == ref) {
2165 if (ir_is_loop_invariant(ctx, op_insn->op1, insn->op1)) {
2166 return ir_try_promote_induction_var_ext(ctx, ext_ref, ref, op_ref, worklist);
2167 }
2168 }
2169 }
2170 } else if (insn->op == IR_ADD || insn->op == IR_SUB || insn->op == IR_MUL) {
2171 if (!IR_IS_CONST_REF(insn->op1)
2172 && ctx->ir_base[insn->op1].op == IR_PHI
2173 && ctx->ir_base[insn->op1].inputs_count == 3 /* (2 values) */
2174 && ctx->ir_base[insn->op1].op3 == ref
2175 && ctx->ir_base[ctx->ir_base[insn->op1].op1].op == IR_LOOP_BEGIN
2176 && ir_is_loop_invariant(ctx, insn->op2, ctx->ir_base[insn->op1].op1)) {
2177 return ir_try_promote_induction_var_ext(ctx, ext_ref, insn->op1, ref, worklist);
2178 } else if (!IR_IS_CONST_REF(insn->op2)
2179 && ctx->ir_base[insn->op2].op == IR_PHI
2180 && ctx->ir_base[insn->op2].inputs_count == 3 /* (2 values) */
2181 && ctx->ir_base[insn->op2].op3 == ref
2182 && ctx->ir_base[ctx->ir_base[insn->op2].op1].op == IR_LOOP_BEGIN
2183 && ir_is_loop_invariant(ctx, insn->op1, ctx->ir_base[insn->op2].op1)) {
2184 return ir_try_promote_induction_var_ext(ctx, ext_ref, insn->op2, ref, worklist);
2185 }
2186 }
2187
2188 return 0;
2189}
2190
2191static void ir_get_true_false_refs(const ir_ctx *ctx, ir_ref if_ref, ir_ref *if_true_ref, ir_ref *if_false_ref)
2192{
2193 ir_use_list *use_list = &ctx->use_lists[if_ref];
2194 ir_ref *p = &ctx->use_edges[use_list->refs];
2195
2196 IR_ASSERT(use_list->count == 2);
2197 if (ctx->ir_base[*p].op == IR_IF_TRUE) {
2198 IR_ASSERT(ctx->ir_base[*(p + 1)].op == IR_IF_FALSE);
2199 *if_true_ref = *p;
2200 *if_false_ref = *(p + 1);
2201 } else {
2202 IR_ASSERT(ctx->ir_base[*p].op == IR_IF_FALSE);
2203 IR_ASSERT(ctx->ir_base[*(p + 1)].op == IR_IF_TRUE);
2204 *if_false_ref = *p;
2205 *if_true_ref = *(p + 1);
2206 }
2207}
2208
2209static void ir_merge_blocks(ir_ctx *ctx, ir_ref end, ir_ref begin, ir_bitqueue *worklist)
2210{
2211 ir_ref prev, next;
2212 ir_use_list *use_list;
2213
2214 if (ctx->use_lists[begin].count > 1) {
2215 ir_ref *p, n, i, use;
2216 ir_insn *use_insn;
2217 ir_ref region = end;
2219
2220 while (!IR_IS_BB_START(ctx->ir_base[region].op)) {
2221 region = ctx->ir_base[region].op1;
2222 }
2223
2224 use_list = &ctx->use_lists[begin];
2225 n = use_list->count;
2226 for (p = &ctx->use_edges[use_list->refs], i = 0; i < n; p++, i++) {
2227 use = *p;
2228 use_insn = &ctx->ir_base[use];
2229 if (ir_op_flags[use_insn->op] & IR_OP_FLAG_CONTROL) {
2230 IR_ASSERT(!next);
2231 next = use;
2232 } else {
2233 IR_ASSERT(use_insn->op == IR_VAR);
2234 IR_ASSERT(use_insn->op1 == begin);
2235 use_insn->op1 = region;
2236 if (ir_use_list_add(ctx, region, use)) {
2237 /* restore after reallocation */
2238 use_list = &ctx->use_lists[begin];
2239 n = use_list->count;
2240 p = &ctx->use_edges[use_list->refs + i];
2241 }
2242 }
2243 }
2244
2245 IR_ASSERT(next);
2246 ctx->use_edges[use_list->refs] = next;
2247 use_list->count = 1;
2248 }
2249
2250 IR_ASSERT(ctx->ir_base[begin].op == IR_BEGIN);
2251 IR_ASSERT(ctx->ir_base[end].op == IR_END);
2252 IR_ASSERT(ctx->ir_base[begin].op1 == end);
2253 IR_ASSERT(ctx->use_lists[end].count == 1);
2254
2255 prev = ctx->ir_base[end].op1;
2256
2257 use_list = &ctx->use_lists[begin];
2258 IR_ASSERT(use_list->count == 1);
2259 next = ctx->use_edges[use_list->refs];
2260
2261 /* remove BEGIN and END */
2263 MAKE_NOP(&ctx->ir_base[end]); CLEAR_USES(end);
2264
2265 /* connect their predecessor and successor */
2266 ctx->ir_base[next].op1 = prev;
2268
2269 if (ctx->ir_base[prev].op == IR_BEGIN || ctx->ir_base[prev].op == IR_MERGE) {
2270 ir_bitqueue_add(worklist, prev);
2271 }
2272}
2273
2274static void ir_remove_unused_vars(ir_ctx *ctx, ir_ref start, ir_ref end)
2275{
2276 ir_use_list *use_list = &ctx->use_lists[start];
2277 ir_ref *p, use, n = use_list->count;
2278
2279 for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) {
2280 use = *p;
2281 if (use != end) {
2282 ir_insn *use_insn = &ctx->ir_base[use];
2283 IR_ASSERT(use_insn->op == IR_VAR);
2284 IR_ASSERT(ctx->use_lists[use].count == 0);
2285 MAKE_NOP(use_insn);
2286 }
2287 }
2288}
2289
2290static bool ir_try_remove_empty_diamond(ir_ctx *ctx, ir_ref ref, ir_insn *insn, ir_bitqueue *worklist)
2291{
2292 if (insn->inputs_count == 2) {
2293 ir_ref end1_ref = insn->op1, end2_ref = insn->op2;
2294 ir_insn *end1 = &ctx->ir_base[end1_ref];
2295 ir_insn *end2 = &ctx->ir_base[end2_ref];
2296
2297 if (end1->op != IR_END || end2->op != IR_END) {
2298 return 0;
2299 }
2300
2301 ir_ref start1_ref = end1->op1, start2_ref = end2->op1;
2302 ir_insn *start1 = &ctx->ir_base[start1_ref];
2303 ir_insn *start2 = &ctx->ir_base[start2_ref];
2304
2305 if (start1->op1 != start2->op1) {
2306 return 0;
2307 }
2308
2309 ir_ref root_ref = start1->op1;
2310 ir_insn *root = &ctx->ir_base[root_ref];
2311
2312 if (root->op != IR_IF
2313 && !(root->op == IR_SWITCH && ctx->use_lists[root_ref].count == 2)) {
2314 return 0;
2315 }
2316
2317 /* Empty Diamond
2318 *
2319 * prev prev
2320 * | condition | condition
2321 * | / |
2322 * IF |
2323 * | \ |
2324 * | +-----+ |
2325 * | IF_FALSE |
2326 * IF_TRUE | => |
2327 * | END |
2328 * END / |
2329 * | +---+ |
2330 * | / |
2331 * MERGE |
2332 * | |
2333 * next next
2334 */
2335
2336 ir_ref next_ref = ctx->use_edges[ctx->use_lists[ref].refs];
2337 ir_insn *next = &ctx->ir_base[next_ref];
2338
2339 if (ctx->use_lists[start1_ref].count != 1) {
2340 ir_remove_unused_vars(ctx, start1_ref, end1_ref);
2341 }
2342 if (ctx->use_lists[start2_ref].count != 1) {
2343 ir_remove_unused_vars(ctx, start2_ref, end2_ref);
2344 }
2345
2346 next->op1 = root->op1;
2347 ir_use_list_replace_one(ctx, root->op1, root_ref, next_ref);
2348 if (!IR_IS_CONST_REF(root->op2)) {
2349 ir_use_list_remove_all(ctx, root->op2, root_ref);
2350 if (ir_is_dead(ctx, root->op2)) {
2351 ir_bitqueue_add(worklist, root->op2);
2352 }
2353 }
2354
2355 MAKE_NOP(root); CLEAR_USES(root_ref);
2356 MAKE_NOP(start1); CLEAR_USES(start1_ref);
2357 MAKE_NOP(start2); CLEAR_USES(start2_ref);
2358 MAKE_NOP(end1); CLEAR_USES(end1_ref);
2359 MAKE_NOP(end2); CLEAR_USES(end2_ref);
2360 MAKE_NOP(insn); CLEAR_USES(ref);
2361
2362 if (ctx->ir_base[next->op1].op == IR_BEGIN || ctx->ir_base[next->op1].op == IR_MERGE) {
2363 ir_bitqueue_add(worklist, next->op1);
2364 }
2365
2366 return 1;
2367 } else {
2368 ir_ref i, count = insn->inputs_count, *ops = insn->ops + 1;
2369 ir_ref root_ref = IR_UNUSED;
2370
2371 for (i = 0; i < count; i++) {
2372 ir_ref end_ref, start_ref;
2373 ir_insn *end, *start;
2374
2375 end_ref = ops[i];
2376 end = &ctx->ir_base[end_ref];
2377 if (end->op != IR_END) {
2378 return 0;
2379 }
2380 start_ref = end->op1;
2381 start = &ctx->ir_base[start_ref];
2382 if (start->op != IR_CASE_VAL && start->op != IR_CASE_DEFAULT) {
2383 return 0;
2384 }
2385 if (ctx->use_lists[start_ref].count != 1) {
2386 ir_remove_unused_vars(ctx, start_ref, end_ref);
2387 }
2388 if (!root_ref) {
2389 root_ref = start->op1;
2390 if (ctx->use_lists[root_ref].count != count) {
2391 return 0;
2392 }
2393 } else if (start->op1 != root_ref) {
2394 return 0;
2395 }
2396 }
2397
2398 /* Empty N-Diamond */
2399 ir_ref next_ref = ctx->use_edges[ctx->use_lists[ref].refs];
2400 ir_insn *next = &ctx->ir_base[next_ref];
2401 ir_insn *root = &ctx->ir_base[root_ref];
2402
2403 next->op1 = root->op1;
2404 ir_use_list_replace_one(ctx, root->op1, root_ref, next_ref);
2405
2406 if (!IR_IS_CONST_REF(root->op2)) {
2407 ir_use_list_remove_all(ctx, root->op2, root_ref);
2408 if (ir_is_dead(ctx, root->op2)) {
2409 ir_bitqueue_add(worklist, root->op2);
2410 }
2411 }
2412
2413 MAKE_NOP(root); CLEAR_USES(root_ref);
2414
2415 for (i = 0; i < count; i++) {
2416 ir_ref end_ref = ops[i];
2417 ir_insn *end = &ctx->ir_base[end_ref];
2418 ir_ref start_ref = end->op1;
2419 ir_insn *start = &ctx->ir_base[start_ref];
2420
2421 MAKE_NOP(start); CLEAR_USES(start_ref);
2422 MAKE_NOP(end); CLEAR_USES(end_ref);
2423 }
2424
2425 MAKE_NOP(insn); CLEAR_USES(ref);
2426
2427 if (ctx->ir_base[next->op1].op == IR_BEGIN || ctx->ir_base[next->op1].op == IR_MERGE) {
2428 ir_bitqueue_add(worklist, next->op1);
2429 }
2430
2431 return 1;
2432 }
2433}
2434
2435static bool ir_is_zero(ir_ctx *ctx, ir_ref ref)
2436{
2437 return IR_IS_CONST_REF(ref)
2438 && !IR_IS_SYM_CONST(ctx->ir_base[ref].op)
2439 && ctx->ir_base[ref].val.u32 == 0;
2440}
2441
2442static bool ir_optimize_phi(ir_ctx *ctx, ir_ref merge_ref, ir_insn *merge, ir_ref ref, ir_insn *insn, ir_bitqueue *worklist)
2443{
2444 IR_ASSERT(insn->inputs_count == 3);
2445 IR_ASSERT(ctx->use_lists[merge_ref].count == 2);
2446
2447 ir_ref end1_ref = merge->op1, end2_ref = merge->op2;
2448 ir_insn *end1 = &ctx->ir_base[end1_ref];
2449 ir_insn *end2 = &ctx->ir_base[end2_ref];
2450
2451 if (end1->op == IR_END && end2->op == IR_END) {
2452 ir_ref start1_ref = end1->op1, start2_ref = end2->op1;
2453 ir_insn *start1 = &ctx->ir_base[start1_ref];
2454 ir_insn *start2 = &ctx->ir_base[start2_ref];
2455
2456 if (start1->op1 == start2->op1) {
2457 ir_ref root_ref = start1->op1;
2458 ir_insn *root = &ctx->ir_base[root_ref];
2459
2460 if (root->op == IR_IF && !IR_IS_CONST_REF(root->op2) && ctx->use_lists[root->op2].count == 1) {
2461 ir_ref cond_ref = root->op2;
2462 ir_insn *cond = &ctx->ir_base[cond_ref];
2463 ir_type type = insn->type;
2464 bool is_cmp, is_less;
2465
2466 if (IR_IS_TYPE_FP(type)) {
2467 is_cmp = (cond->op == IR_LT || cond->op == IR_LE || cond->op == IR_GT || cond->op == IR_GE ||
2468 cond->op == IR_ULT || cond->op == IR_ULE || cond->op == IR_UGT || cond->op == IR_UGE);
2469 is_less = (cond->op == IR_LT || cond->op == IR_LE ||
2470 cond->op == IR_ULT || cond->op == IR_ULE);
2471 } else if (IR_IS_TYPE_SIGNED(type)) {
2472 is_cmp = (cond->op == IR_LT || cond->op == IR_LE || cond->op == IR_GT || cond->op == IR_GE);
2473 is_less = (cond->op == IR_LT || cond->op == IR_LE);
2474 } else {
2476 is_cmp = (cond->op == IR_ULT || cond->op == IR_ULE || cond->op == IR_UGT || cond->op == IR_UGE);
2477 is_less = (cond->op == IR_ULT || cond->op == IR_ULE);
2478 }
2479
2480 if (is_cmp
2481 && ((insn->op2 == cond->op1 && insn->op3 == cond->op2)
2482 || (insn->op2 == cond->op2 && insn->op3 == cond->op1))) {
2483 /* MAX/MIN
2484 *
2485 * prev prev
2486 * | LT(A, B) |
2487 * | / |
2488 * IF |
2489 * | \ |
2490 * | +-----+ |
2491 * | IF_FALSE |
2492 * IF_TRUE | => |
2493 * | END |
2494 * END / |
2495 * | +---+ |
2496 * | / |
2497 * MERGE |
2498 * | \ |
2499 * | PHI(A, B) | MIN(A, B)
2500 * next next
2501 */
2502 ir_ref next_ref = ctx->use_edges[ctx->use_lists[merge_ref].refs];
2503 ir_insn *next;
2504
2505 if (next_ref == ref) {
2506 next_ref = ctx->use_edges[ctx->use_lists[merge_ref].refs + 1];
2507 }
2508 next = &ctx->ir_base[next_ref];
2509
2510 if (ctx->use_lists[start1_ref].count != 1) {
2511 ir_remove_unused_vars(ctx, start1_ref, end1_ref);
2512 }
2513 if (ctx->use_lists[start2_ref].count != 1) {
2514 ir_remove_unused_vars(ctx, start2_ref, end2_ref);
2515 }
2516
2517 insn->op = (
2518 (is_less ? cond->op1 : cond->op2)
2519 ==
2520 ((start1->op == IR_IF_TRUE) ? insn->op2 : insn->op3)
2521 ) ? IR_MIN : IR_MAX;
2522 insn->inputs_count = 2;
2523 if (insn->op2 > insn->op3) {
2524 insn->op1 = insn->op2;
2525 insn->op2 = insn->op3;
2526 } else {
2527 insn->op1 = insn->op3;
2528 }
2529 insn->op3 = IR_UNUSED;
2530
2531 next->op1 = root->op1;
2532 ir_use_list_replace_one(ctx, root->op1, root_ref, next_ref);
2533 if (!IR_IS_CONST_REF(insn->op1)) {
2534 ir_use_list_remove_all(ctx, insn->op1, cond_ref);
2535 }
2536 if (!IR_IS_CONST_REF(insn->op2)) {
2537 ir_use_list_remove_all(ctx, insn->op2, cond_ref);
2538 }
2539
2540 MAKE_NOP(cond); CLEAR_USES(cond_ref);
2541 MAKE_NOP(root); CLEAR_USES(root_ref);
2542 MAKE_NOP(start1); CLEAR_USES(start1_ref);
2543 MAKE_NOP(start2); CLEAR_USES(start2_ref);
2544 MAKE_NOP(end1); CLEAR_USES(end1_ref);
2545 MAKE_NOP(end2); CLEAR_USES(end2_ref);
2546 MAKE_NOP(merge); CLEAR_USES(merge_ref);
2547
2548 if (ctx->ir_base[next->op1].op == IR_BEGIN || ctx->ir_base[next->op1].op == IR_MERGE) {
2549 ir_bitqueue_add(worklist, next->op1);
2550 }
2551
2552 return 1;
2553 } else if (is_cmp
2554 && ((ctx->ir_base[insn->op2].op == IR_NEG
2555 && ctx->use_lists[insn->op2].count == 1
2556 && ctx->ir_base[insn->op2].op1 == insn->op3
2557 && ((cond->op1 == insn->op3
2558 && ir_is_zero(ctx, cond->op2)
2559 && is_less == (start1->op == IR_IF_TRUE))
2560 || (cond->op2 == insn->op3
2561 && ir_is_zero(ctx, cond->op1)
2562 && is_less != (start1->op == IR_IF_TRUE))))
2563 || (ctx->ir_base[insn->op3].op == IR_NEG
2564 && ctx->use_lists[insn->op3].count == 1
2565 && ctx->ir_base[insn->op3].op1 == insn->op2
2566 && ((cond->op1 == insn->op2
2567 && ir_is_zero(ctx, cond->op2)
2568 && is_less != (start1->op == IR_IF_TRUE))
2569 || (cond->op2 == insn->op2
2570 && ir_is_zero(ctx, cond->op1)
2571 && is_less == (start1->op == IR_IF_TRUE)))))) {
2572 /* ABS
2573 *
2574 * prev prev
2575 * | LT(A, 0) |
2576 * | / |
2577 * IF |
2578 * | \ |
2579 * | +-----+ |
2580 * | IF_FALSE |
2581 * IF_TRUE | => |
2582 * | END |
2583 * END / |
2584 * | +---+ |
2585 * | / |
2586 * MERGE |
2587 * | \ |
2588 * | PHI(A, NEG(A)) | ABS(A)
2589 * next next
2590 */
2591 ir_ref neg_ref;
2592 ir_ref next_ref = ctx->use_edges[ctx->use_lists[merge_ref].refs];
2593 ir_insn *next;
2594
2595 if (next_ref == ref) {
2596 next_ref = ctx->use_edges[ctx->use_lists[merge_ref].refs + 1];
2597 }
2598 next = &ctx->ir_base[next_ref];
2599
2600 if (ctx->use_lists[start1_ref].count != 1) {
2601 ir_remove_unused_vars(ctx, start1_ref, end1_ref);
2602 }
2603 if (ctx->use_lists[start2_ref].count != 1) {
2604 ir_remove_unused_vars(ctx, start2_ref, end2_ref);
2605 }
2606
2607 insn->op = IR_ABS;
2608 insn->inputs_count = 1;
2609 if (ctx->ir_base[insn->op2].op == IR_NEG) {
2610 neg_ref = insn->op2;
2611 insn->op1 = insn->op3;
2612 } else {
2613 neg_ref = insn->op3;
2614 insn->op1 = insn->op2;
2615 }
2616 insn->op2 = IR_UNUSED;
2617 insn->op3 = IR_UNUSED;
2618
2619 next->op1 = root->op1;
2620 ir_use_list_replace_one(ctx, root->op1, root_ref, next_ref);
2621 ir_use_list_remove_one(ctx, insn->op1, neg_ref);
2622 if (!IR_IS_CONST_REF(insn->op1)) {
2623 ir_use_list_remove_all(ctx, insn->op1, cond_ref);
2624 }
2625
2626 MAKE_NOP(cond); CLEAR_USES(cond_ref);
2627 MAKE_NOP(root); CLEAR_USES(root_ref);
2628 MAKE_NOP(start1); CLEAR_USES(start1_ref);
2629 MAKE_NOP(start2); CLEAR_USES(start2_ref);
2630 MAKE_NOP(end1); CLEAR_USES(end1_ref);
2631 MAKE_NOP(end2); CLEAR_USES(end2_ref);
2632 MAKE_NOP(merge); CLEAR_USES(merge_ref);
2633 MAKE_NOP(&ctx->ir_base[neg_ref]); CLEAR_USES(neg_ref);
2634
2635 if (ctx->ir_base[next->op1].op == IR_BEGIN || ctx->ir_base[next->op1].op == IR_MERGE) {
2636 ir_bitqueue_add(worklist, next->op1);
2637 }
2638
2639 return 1;
2640#if 0
2641 } else {
2642 /* COND
2643 *
2644 * prev prev
2645 * | cond |
2646 * | / |
2647 * IF |
2648 * | \ |
2649 * | +-----+ |
2650 * | IF_FALSE |
2651 * IF_TRUE | => |
2652 * | END |
2653 * END / |
2654 * | +---+ |
2655 * | / |
2656 * MERGE |
2657 * | \ |
2658 * | PHI(A, B) | COND(cond, A, B)
2659 * next next
2660 */
2661 ir_ref next_ref = ctx->use_edges[ctx->use_lists[merge_ref].refs];
2662 ir_insn *next;
2663
2664 if (next_ref == ref) {
2665 next_ref = ctx->use_edges[ctx->use_lists[merge_ref].refs + 1];
2666 }
2667 next = &ctx->ir_base[next_ref];
2668
2669 if (ctx->use_lists[start1_ref].count != 1) {
2670 ir_remove_unused_vars(ctx, start1_ref, end1_ref);
2671 }
2672 if (ctx->use_lists[start2_ref].count != 1) {
2673 ir_remove_unused_vars(ctx, start2_ref, end2_ref);
2674 }
2675
2676 insn->op = IR_COND;
2677 insn->inputs_count = 3;
2678 insn->op1 = cond_ref;
2679 if (start1->op == IR_IF_FALSE) {
2680 SWAP_REFS(insn->op2, insn->op3);
2681 }
2682
2683 next->op1 = root->op1;
2684 ir_use_list_replace_one(ctx, cond_ref, root_ref, ref);
2685 ir_use_list_replace_one(ctx, root->op1, root_ref, next_ref);
2686 ir_use_list_remove_all(ctx, root->op2, root_ref);
2687
2688 MAKE_NOP(root); CLEAR_USES(root_ref);
2689 MAKE_NOP(start1); CLEAR_USES(start1_ref);
2690 MAKE_NOP(start2); CLEAR_USES(start2_ref);
2691 MAKE_NOP(end1); CLEAR_USES(end1_ref);
2692 MAKE_NOP(end2); CLEAR_USES(end2_ref);
2693 MAKE_NOP(merge); CLEAR_USES(merge_ref);
2694
2695 if (ctx->ir_base[next->op1].op == IR_BEGIN || ctx->ir_base[next->op1].op == IR_MERGE) {
2696 ir_bitqueue_add(worklist, next->op1);
2697 }
2698
2699 return 1;
2700#endif
2701 }
2702 }
2703 }
2704 }
2705
2706 return 0;
2707}
2708
2709static bool ir_cmp_is_true(ir_op op, ir_insn *op1, ir_insn *op2)
2710{
2711 IR_ASSERT(op1->type == op2->type);
2712 if (IR_IS_TYPE_INT(op1->type)) {
2713 if (op == IR_EQ) {
2714 return op1->val.u64 == op2->val.u64;
2715 } else if (op == IR_NE) {
2716 return op1->val.u64 != op2->val.u64;
2717 } else if (op == IR_LT) {
2718 if (IR_IS_TYPE_SIGNED(op1->type)) {
2719 return op1->val.i64 < op2->val.i64;
2720 } else {
2721 return op1->val.u64 < op2->val.u64;
2722 }
2723 } else if (op == IR_GE) {
2724 if (IR_IS_TYPE_SIGNED(op1->type)) {
2725 return op1->val.i64 >= op2->val.i64;
2726 } else {
2727 return op1->val.u64 >= op2->val.u64;
2728 }
2729 } else if (op == IR_LE) {
2730 if (IR_IS_TYPE_SIGNED(op1->type)) {
2731 return op1->val.i64 <= op2->val.i64;
2732 } else {
2733 return op1->val.u64 <= op2->val.u64;
2734 }
2735 } else if (op == IR_GT) {
2736 if (IR_IS_TYPE_SIGNED(op1->type)) {
2737 return op1->val.i64 > op2->val.i64;
2738 } else {
2739 return op1->val.u64 > op2->val.u64;
2740 }
2741 } else if (op == IR_ULT) {
2742 return op1->val.u64 < op2->val.u64;
2743 } else if (op == IR_UGE) {
2744 return op1->val.u64 >= op2->val.u64;
2745 } else if (op == IR_ULE) {
2746 return op1->val.u64 <= op2->val.u64;
2747 } else if (op == IR_UGT) {
2748 return op1->val.u64 > op2->val.u64;
2749 } else {
2750 IR_ASSERT(0);
2751 return 0;
2752 }
2753 } else if (op1->type == IR_DOUBLE) {
2754 if (op == IR_EQ) {
2755 return op1->val.d == op2->val.d;
2756 } else if (op == IR_NE) {
2757 return op1->val.d != op2->val.d;
2758 } else if (op == IR_LT) {
2759 return op1->val.d < op2->val.d;
2760 } else if (op == IR_GE) {
2761 return op1->val.d >= op2->val.d;
2762 } else if (op == IR_LE) {
2763 return op1->val.d <= op2->val.d;
2764 } else if (op == IR_GT) {
2765 return op1->val.d > op2->val.d;
2766 } else if (op == IR_ULT) {
2767 return !(op1->val.d >= op2->val.d);
2768 } else if (op == IR_UGE) {
2769 return !(op1->val.d < op2->val.d);
2770 } else if (op == IR_ULE) {
2771 return !(op1->val.d > op2->val.d);
2772 } else if (op == IR_UGT) {
2773 return !(op1->val.d <= op2->val.d);
2774 } else {
2775 IR_ASSERT(0);
2776 return 0;
2777 }
2778 } else {
2779 IR_ASSERT(op1->type == IR_FLOAT);
2780 if (op == IR_EQ) {
2781 return op1->val.f == op2->val.f;
2782 } else if (op == IR_NE) {
2783 return op1->val.f != op2->val.f;
2784 } else if (op == IR_LT) {
2785 return op1->val.f < op2->val.f;
2786 } else if (op == IR_GE) {
2787 return op1->val.f >= op2->val.f;
2788 } else if (op == IR_LE) {
2789 return op1->val.f <= op2->val.f;
2790 } else if (op == IR_GT) {
2791 return op1->val.f > op2->val.f;
2792 } else if (op == IR_ULT) {
2793 return !(op1->val.f >= op2->val.f);
2794 } else if (op == IR_UGE) {
2795 return !(op1->val.f < op2->val.f);
2796 } else if (op == IR_ULE) {
2797 return !(op1->val.f > op2->val.f);
2798 } else if (op == IR_UGT) {
2799 return !(op1->val.f <= op2->val.f);
2800 } else {
2801 IR_ASSERT(0);
2802 return 0;
2803 }
2804 }
2805}
2806
2807static bool ir_try_split_if(ir_ctx *ctx, ir_ref ref, ir_insn *insn, ir_bitqueue *worklist)
2808{
2809 ir_ref cond_ref = insn->op2;
2810 ir_insn *cond = &ctx->ir_base[cond_ref];
2811
2812 if (cond->op == IR_PHI
2813 && cond->inputs_count == 3
2814 && cond->op1 == insn->op1
2815 && ((IR_IS_CONST_REF(cond->op2) && !IR_IS_SYM_CONST(ctx->ir_base[cond->op2].op))
2816 || (IR_IS_CONST_REF(cond->op3) && !IR_IS_SYM_CONST(ctx->ir_base[cond->op3].op)))) {
2817 ir_ref merge_ref = insn->op1;
2818 ir_insn *merge = &ctx->ir_base[merge_ref];
2819
2820 if (ctx->use_lists[merge_ref].count == 2) {
2821 ir_ref end1_ref = merge->op1, end2_ref = merge->op2;
2822 ir_insn *end1 = &ctx->ir_base[end1_ref];
2823 ir_insn *end2 = &ctx->ir_base[end2_ref];
2824
2825 if (end1->op == IR_END && end2->op == IR_END) {
2826 ir_ref if_true_ref, if_false_ref;
2827 ir_insn *if_true, *if_false;
2828 ir_op op = IR_IF_FALSE;
2829
2830 ir_get_true_false_refs(ctx, ref, &if_true_ref, &if_false_ref);
2831
2832 if (!IR_IS_CONST_REF(cond->op2) || IR_IS_SYM_CONST(ctx->ir_base[cond->op2].op)) {
2833 IR_ASSERT(IR_IS_CONST_REF(cond->op3));
2834 SWAP_REFS(cond->op2, cond->op3);
2835 SWAP_REFS(merge->op1, merge->op2);
2836 SWAP_REFS(end1_ref, end2_ref);
2837 SWAP_INSNS(end1, end2);
2838 }
2839 if (ir_const_is_true(&ctx->ir_base[cond->op2])) {
2840 SWAP_REFS(if_true_ref, if_false_ref);
2841 op = IR_IF_TRUE;
2842 }
2843 if_true = &ctx->ir_base[if_true_ref];
2844 if_false = &ctx->ir_base[if_false_ref];
2845
2846 if (IR_IS_CONST_REF(cond->op3) && !IR_IS_SYM_CONST(ctx->ir_base[cond->op3].op)) {
2847 if (ir_const_is_true(&ctx->ir_base[cond->op3]) ^ (op == IR_IF_TRUE)) {
2848 /* Simple IF Split
2849 *
2850 * | | | |
2851 * | END | END
2852 * END / END \
2853 * | +---+ | +
2854 * | / | |
2855 * MERGE | |
2856 * | \ | |
2857 * | PHI(false, true) | |
2858 * | / | |
2859 * IF => | |
2860 * | \ | |
2861 * | +------+ | |
2862 * | IF_TRUE | BEGIN
2863 * IF_FALSE | BEGIN
2864 * | |
2865 */
2866 ir_use_list_replace_one(ctx, end1_ref, merge_ref, if_false_ref);
2867 ir_use_list_replace_one(ctx, end2_ref, merge_ref, if_true_ref);
2868
2869 MAKE_NOP(merge); CLEAR_USES(merge_ref);
2870 MAKE_NOP(cond); CLEAR_USES(cond_ref);
2871 MAKE_NOP(insn); CLEAR_USES(ref);
2872
2873 if_false->optx = IR_OPTX(IR_BEGIN, IR_VOID, 1);
2874 if_false->op1 = end1_ref;
2875
2876 if_true->optx = IR_OPTX(IR_BEGIN, IR_VOID, 1);
2877 if_true->op1 = end2_ref;
2878
2879 ir_bitqueue_add(worklist, if_false_ref);
2880 ir_bitqueue_add(worklist, if_true_ref);
2881
2882 return 1;
2883 } else {
2884 /* Simple IF Split
2885 *
2886 * | | | |
2887 * | END | END
2888 * END / END |
2889 * | +---+ | |
2890 * | / | |
2891 * MERGE | +
2892 * | \ | /
2893 * | PHI(false, false) | /
2894 * | / | /
2895 * IF => | /
2896 * | \ | /
2897 * | +------+ | /
2898 * | IF_TRUE | / BEGIN(unreachable)
2899 * IF_FALSE | MERGE
2900 * | |
2901 */
2902 ir_use_list_replace_one(ctx, end1_ref, merge_ref, if_false_ref);
2903 ir_use_list_replace_one(ctx, end2_ref, merge_ref, if_false_ref);
2904
2905 MAKE_NOP(merge); CLEAR_USES(merge_ref);
2906 MAKE_NOP(cond); CLEAR_USES(cond_ref);
2907 MAKE_NOP(insn); CLEAR_USES(ref);
2908
2909 if_false->optx = IR_OPTX(IR_MERGE, IR_VOID, 2);
2910 if_false->op1 = end1_ref;
2911 if_false->op2 = end2_ref;
2912
2913 if_true->optx = IR_BEGIN;
2914 if_true->op1 = IR_UNUSED;
2915
2916 ctx->flags2 &= ~IR_CFG_REACHABLE;
2917
2918 ir_bitqueue_add(worklist, if_false_ref);
2919
2920 return 1;
2921 }
2922 }
2923
2924 /* Simple IF Split
2925 *
2926 * | | | |
2927 * | END | IF(X)
2928 * END / END / \
2929 * | +---+ | +--+ +
2930 * | / | / |
2931 * MERGE | IF_FALSE |
2932 * | \ | | |
2933 * | PHI(false, X) | | |
2934 * | / | | |
2935 * IF => | END |
2936 * | \ | | |
2937 * | +------+ | | |
2938 * | IF_TRUE | | IF_TRUE
2939 * IF_FALSE | MERGE
2940 * | |
2941 */
2942 ir_use_list_remove_all(ctx, merge_ref, cond_ref);
2943 ir_use_list_remove_all(ctx, ref, if_true_ref);
2944 if (!IR_IS_CONST_REF(cond->op3)) {
2945 ir_use_list_replace_one(ctx, cond->op3, cond_ref, end2_ref);
2946 }
2947 ir_use_list_replace_one(ctx, end1_ref, merge_ref, if_false_ref);
2948 ir_use_list_add(ctx, end2_ref, if_true_ref);
2949
2950 end2->optx = IR_OPTX(IR_IF, IR_VOID, 2);
2951 end2->op2 = cond->op3;
2952
2953 merge->optx = IR_OPTX(op, IR_VOID, 1);
2954 merge->op1 = end2_ref;
2955 merge->op2 = IR_UNUSED;
2956
2957 MAKE_NOP(cond);
2958 CLEAR_USES(cond_ref);
2959
2960 insn->optx = IR_OPTX(IR_END, IR_VOID, 1);
2961 insn->op1 = merge_ref;
2962 insn->op2 = IR_UNUSED;
2963
2964 if_true->op1 = end2_ref;
2965
2966 if_false->optx = IR_OPTX(IR_MERGE, IR_VOID, 2);
2967 if_false->op1 = end1_ref;
2968 if_false->op2 = ref;
2969
2970 ir_bitqueue_add(worklist, if_false_ref);
2971 if (ctx->ir_base[end2->op1].op == IR_BEGIN || ctx->ir_base[end2->op1].op == IR_MERGE) {
2972 ir_bitqueue_add(worklist, end2->op1);
2973 }
2974
2975 return 1;
2976 }
2977 }
2978 }
2979
2980 return 0;
2981}
2982
2983static bool ir_try_split_if_cmp(ir_ctx *ctx, ir_ref ref, ir_insn *insn, ir_bitqueue *worklist)
2984{
2985 ir_ref cond_ref = insn->op2;
2986 ir_insn *cond = &ctx->ir_base[cond_ref];
2987
2988 if (cond->op >= IR_EQ && cond->op <= IR_UGT
2989 && IR_IS_CONST_REF(cond->op2)
2990 && !IR_IS_SYM_CONST(ctx->ir_base[cond->op2].op)
2991 && ctx->use_lists[insn->op2].count == 1) {
2992 ir_ref phi_ref = cond->op1;
2993 ir_insn *phi = &ctx->ir_base[phi_ref];
2994
2995 if (phi->op == IR_PHI
2996 && phi->inputs_count == 3
2997 && phi->op1 == insn->op1
2998 && ctx->use_lists[phi_ref].count == 1
2999 && ((IR_IS_CONST_REF(phi->op2) && !IR_IS_SYM_CONST(ctx->ir_base[phi->op2].op))
3000 || (IR_IS_CONST_REF(phi->op3) && !IR_IS_SYM_CONST(ctx->ir_base[phi->op3].op)))) {
3001 ir_ref merge_ref = insn->op1;
3002 ir_insn *merge = &ctx->ir_base[merge_ref];
3003
3004 if (ctx->use_lists[merge_ref].count == 2) {
3005 ir_ref end1_ref = merge->op1, end2_ref = merge->op2;
3006 ir_insn *end1 = &ctx->ir_base[end1_ref];
3007 ir_insn *end2 = &ctx->ir_base[end2_ref];
3008
3009 if (end1->op == IR_END && end2->op == IR_END) {
3010 ir_ref if_true_ref, if_false_ref;
3011 ir_insn *if_true, *if_false;
3012 ir_op op = IR_IF_FALSE;
3013
3014 ir_get_true_false_refs(ctx, ref, &if_true_ref, &if_false_ref);
3015
3016 if (!IR_IS_CONST_REF(phi->op2) || IR_IS_SYM_CONST(ctx->ir_base[phi->op2].op)) {
3017 IR_ASSERT(IR_IS_CONST_REF(phi->op3));
3018 SWAP_REFS(phi->op2, phi->op3);
3019 SWAP_REFS(merge->op1, merge->op2);
3020 SWAP_REFS(end1_ref, end2_ref);
3021 SWAP_INSNS(end1, end2);
3022 }
3023 if (ir_cmp_is_true(cond->op, &ctx->ir_base[phi->op2], &ctx->ir_base[cond->op2])) {
3024 SWAP_REFS(if_true_ref, if_false_ref);
3025 op = IR_IF_TRUE;
3026 }
3027 if_true = &ctx->ir_base[if_true_ref];
3028 if_false = &ctx->ir_base[if_false_ref];
3029
3030 if (IR_IS_CONST_REF(phi->op3) && !IR_IS_SYM_CONST(ctx->ir_base[phi->op3].op)) {
3031 if (ir_cmp_is_true(cond->op, &ctx->ir_base[phi->op3], &ctx->ir_base[cond->op2]) ^ (op == IR_IF_TRUE)) {
3032 /* IF Split
3033 *
3034 * | | | |
3035 * | END | END
3036 * END / END |
3037 * | +---+ | |
3038 * | / | |
3039 * MERGE | |
3040 * | \ | |
3041 * | PHI(C1, X) | |
3042 * | | | |
3043 * | CMP(_, C2) | |
3044 * | / | |
3045 * IF => | |
3046 * | \ | |
3047 * | +------+ | |
3048 * | IF_TRUE | BEGIN
3049 * IF_FALSE | BEGIN |
3050 * | |
3051 */
3052
3053 ir_use_list_replace_one(ctx, end1_ref, merge_ref, if_false_ref);
3054 ir_use_list_replace_one(ctx, end2_ref, merge_ref, if_true_ref);
3055
3056 MAKE_NOP(merge); CLEAR_USES(merge_ref);
3057 MAKE_NOP(phi); CLEAR_USES(phi_ref);
3058 MAKE_NOP(cond); CLEAR_USES(cond_ref);
3059 MAKE_NOP(insn); CLEAR_USES(ref);
3060
3061 if_false->optx = IR_OPTX(IR_BEGIN, IR_VOID, 1);
3062 if_false->op1 = end1_ref;
3063
3064 if_true->optx = IR_OPTX(IR_BEGIN, IR_VOID, 1);
3065 if_true->op1 = end2_ref;
3066
3067 ir_bitqueue_add(worklist, if_false_ref);
3068 ir_bitqueue_add(worklist, if_true_ref);
3069
3070 return 1;
3071 } else {
3072 /* IF Split
3073 *
3074 * | | | |
3075 * | END | END
3076 * END / END |
3077 * | +---+ | |
3078 * | / | |
3079 * MERGE | |
3080 * | \ | |
3081 * | PHI(C1, X) | |
3082 * | | | +
3083 * | CMP(_, C2) | /
3084 * | / | /
3085 * IF => | /
3086 * | \ | /
3087 * | +------+ | /
3088 * | IF_TRUE | / BEGIN(unreachable)
3089 * IF_FALSE | MERGE |
3090 * | |
3091 */
3092
3093 ir_use_list_replace_one(ctx, end1_ref, merge_ref, if_false_ref);
3094 ir_use_list_replace_one(ctx, end2_ref, merge_ref, if_false_ref);
3095
3096 MAKE_NOP(merge); CLEAR_USES(merge_ref);
3097 MAKE_NOP(phi); CLEAR_USES(phi_ref);
3098 MAKE_NOP(cond); CLEAR_USES(cond_ref);
3099 MAKE_NOP(insn); CLEAR_USES(ref);
3100
3101 if_false->optx = IR_OPTX(IR_MERGE, IR_VOID, 2);
3102 if_false->op1 = end1_ref;
3103 if_false->op2 = end2_ref;
3104
3105 if_true->optx = IR_BEGIN;
3106 if_true->op1 = IR_UNUSED;
3107
3108 ctx->flags2 &= ~IR_CFG_REACHABLE;
3109
3110 ir_bitqueue_add(worklist, if_false_ref);
3111
3112 return 1;
3113 }
3114 } else {
3115 /* IF Split
3116 *
3117 * | | | |
3118 * | END | IF<----+
3119 * END / END / \ |
3120 * | +---+ | +--+ + |
3121 * | / | / | |
3122 * MERGE | IF_FALSE | |
3123 * | \ | | | |
3124 * | PHI(C1, X) | | | |
3125 * | | | | | |
3126 * | CMP(_, C2) | | | CMP(X, C2)
3127 * | / | | |
3128 * IF => | END |
3129 * | \ | | |
3130 * | +------+ | | |
3131 * | IF_TRUE | | IF_TRUE
3132 * IF_FALSE | MERGE
3133 * | |
3134 */
3135
3136 ir_use_list_remove_all(ctx, merge_ref, phi_ref);
3137 ir_use_list_remove_all(ctx, ref, if_true_ref);
3138 if (!IR_IS_CONST_REF(phi->op3)) {
3139 ir_use_list_replace_one(ctx, phi->op3, phi_ref, insn->op2);
3140 }
3141 ir_use_list_replace_one(ctx, end1_ref, merge_ref, if_false_ref);
3142 ir_use_list_replace_one(ctx, cond_ref, ref, end2_ref);
3143 ir_use_list_add(ctx, end2_ref, if_true_ref);
3144
3145 end2->optx = IR_OPTX(IR_IF, IR_VOID, 2);
3146 end2->op2 = insn->op2;
3147
3148 merge->optx = IR_OPTX(op, IR_VOID, 1);
3149 merge->op1 = end2_ref;
3150 merge->op2 = IR_UNUSED;
3151
3152 cond->op1 = phi->op3;
3153 MAKE_NOP(phi);
3154 CLEAR_USES(phi_ref);
3155
3156 insn->optx = IR_OPTX(IR_END, IR_VOID, 1);
3157 insn->op1 = merge_ref;
3158 insn->op2 = IR_UNUSED;
3159
3160 if_true->op1 = end2_ref;
3161
3162 if_false->optx = IR_OPTX(IR_MERGE, IR_VOID, 2);
3163 if_false->op1 = end1_ref;
3164 if_false->op2 = ref;
3165
3166 ir_bitqueue_add(worklist, if_false_ref);
3167 if (ctx->ir_base[end2->op1].op == IR_BEGIN || ctx->ir_base[end2->op1].op == IR_MERGE) {
3168 ir_bitqueue_add(worklist, end2->op1);
3169 }
3170
3171 return 1;
3172 }
3173 }
3174 }
3175 }
3176 }
3177
3178 return 0;
3179}
3180
3181static void ir_iter_optimize_merge(ir_ctx *ctx, ir_ref merge_ref, ir_insn *merge, ir_bitqueue *worklist)
3182{
3183 ir_use_list *use_list = &ctx->use_lists[merge_ref];
3184
3185 if (use_list->count == 1) {
3186 ir_try_remove_empty_diamond(ctx, merge_ref, merge, worklist);
3187 } else if (use_list->count == 2) {
3188 if (merge->inputs_count == 2) {
3189 ir_ref phi_ref = ctx->use_edges[use_list->refs];
3190 ir_insn *phi = &ctx->ir_base[phi_ref];
3191
3192 ir_ref next_ref = ctx->use_edges[use_list->refs + 1];
3193 ir_insn *next = &ctx->ir_base[next_ref];
3194
3195 if (next->op == IR_PHI) {
3196 SWAP_REFS(phi_ref, next_ref);
3197 SWAP_INSNS(phi, next);
3198 }
3199
3200 if (phi->op == IR_PHI && next->op != IR_PHI) {
3201 if (next->op == IR_IF && next->op1 == merge_ref && ctx->use_lists[phi_ref].count == 1) {
3202 if (next->op2 == phi_ref) {
3203 if (ir_try_split_if(ctx, next_ref, next, worklist)) {
3204 return;
3205 }
3206 } else {
3207 ir_insn *cmp = &ctx->ir_base[next->op2];
3208
3209 if (cmp->op >= IR_EQ && cmp->op <= IR_UGT
3210 && cmp->op1 == phi_ref
3211 && IR_IS_CONST_REF(cmp->op2)
3212 && !IR_IS_SYM_CONST(ctx->ir_base[cmp->op2].op)
3213 && ctx->use_lists[next->op2].count == 1) {
3214 if (ir_try_split_if_cmp(ctx, next_ref, next, worklist)) {
3215 return;
3216 }
3217 }
3218 }
3219 }
3220 ir_optimize_phi(ctx, merge_ref, merge, phi_ref, phi, worklist);
3221 }
3222 }
3223 }
3224}
3225
3226static ir_ref ir_find_ext_use(ir_ctx *ctx, ir_ref ref)
3227{
3228 ir_use_list *use_list = &ctx->use_lists[ref];
3229 ir_ref *p, n, use;
3230 ir_insn *use_insn;
3231
3232 for (p = &ctx->use_edges[use_list->refs], n = use_list->count; n > 0; p++, n--) {
3233 use = *p;
3234 use_insn = &ctx->ir_base[use];
3235 if (use_insn->op == IR_SEXT || use_insn->op == IR_ZEXT) {
3236 return use;
3237 }
3238 }
3239 return IR_UNUSED;
3240}
3241
3242static void ir_iter_optimize_induction_var(ir_ctx *ctx, ir_ref phi_ref, ir_ref op_ref, ir_bitqueue *worklist)
3243{
3244 ir_ref ext_ref;
3245
3246 ext_ref = ir_find_ext_use(ctx, phi_ref);
3247 if (!ext_ref) {
3248 ext_ref = ir_find_ext_use(ctx, op_ref);
3249 }
3250 if (ext_ref) {
3251 ir_try_promote_induction_var_ext(ctx, ext_ref, phi_ref, op_ref, worklist);
3252 }
3253}
3254
3255static void ir_iter_optimize_loop(ir_ctx *ctx, ir_ref loop_ref, ir_insn *loop, ir_bitqueue *worklist)
3256{
3257 ir_ref n;
3258
3259 if (loop->inputs_count != 2 || ctx->use_lists[loop_ref].count <= 1) {
3260 return;
3261 }
3262
3263 /* Check for simple induction variable in the form: x2 = PHI(loop, x1, x3); x3 = ADD(x2, _); */
3264 for (n = 0; n < ctx->use_lists[loop_ref].count; n++) {
3265 /* "use_lists" may be reallocated by ir_ext_ref() */
3266 ir_ref use = ctx->use_edges[ctx->use_lists[loop_ref].refs + n];
3267 ir_insn *use_insn = &ctx->ir_base[use];
3268
3269 if (use_insn->op == IR_PHI) {
3270 ir_ref op_ref = use_insn->op3;
3271 ir_insn *op_insn = &ctx->ir_base[op_ref];
3272
3273 if (op_insn->op == IR_ADD || op_insn->op == IR_SUB || op_insn->op == IR_MUL) {
3274 if (op_insn->op1 == use) {
3275 if (ir_is_loop_invariant(ctx, op_insn->op2, loop_ref)) {
3276 ir_iter_optimize_induction_var(ctx, use, op_ref, worklist);
3277 }
3278 } else if (op_insn->op2 == use) {
3279 if (ir_is_loop_invariant(ctx, op_insn->op1, loop_ref)) {
3280 ir_iter_optimize_induction_var(ctx, use, op_ref, worklist);
3281 }
3282 }
3283 }
3284 }
3285 }
3286}
3287
3288static ir_ref ir_iter_optimize_condition(ir_ctx *ctx, ir_ref control, ir_ref condition, bool *swap)
3289{
3290 ir_insn *condition_insn = &ctx->ir_base[condition];
3291
3292 while ((condition_insn->op == IR_BITCAST
3293 || condition_insn->op == IR_ZEXT
3294 || condition_insn->op == IR_SEXT)
3295 && ctx->use_lists[condition].count == 1) {
3296 condition = condition_insn->op1;
3297 condition_insn = &ctx->ir_base[condition];
3298 }
3299
3300 if (condition_insn->opt == IR_OPT(IR_NOT, IR_BOOL)) {
3301 *swap = 1;
3302 condition = condition_insn->op1;
3303 condition_insn = &ctx->ir_base[condition];
3304 }
3305
3306 if (condition_insn->op == IR_NE && IR_IS_CONST_REF(condition_insn->op2)) {
3307 ir_insn *val_insn = &ctx->ir_base[condition_insn->op2];
3308
3309 if (IR_IS_TYPE_INT(val_insn->type) && val_insn->val.u64 == 0) {
3310 condition = condition_insn->op1;
3311 condition_insn = &ctx->ir_base[condition];
3312 }
3313 } else if (condition_insn->op == IR_EQ && IR_IS_CONST_REF(condition_insn->op2)) {
3314 ir_insn *val_insn = &ctx->ir_base[condition_insn->op2];
3315
3316 if (condition_insn->op2 == IR_TRUE) {
3317 condition = condition_insn->op1;
3318 condition_insn = &ctx->ir_base[condition];
3319 } else if (IR_IS_TYPE_INT(val_insn->type) && val_insn->val.u64 == 0) {
3320 condition = condition_insn->op1;
3321 condition_insn = &ctx->ir_base[condition];
3322 *swap = !*swap;
3323 }
3324 }
3325
3326 while ((condition_insn->op == IR_BITCAST
3327 || condition_insn->op == IR_ZEXT
3328 || condition_insn->op == IR_SEXT)
3329 && ctx->use_lists[condition].count == 1) {
3330 condition = condition_insn->op1;
3331 condition_insn = &ctx->ir_base[condition];
3332 }
3333
3334 if (condition_insn->op == IR_ALLOCA || condition_insn->op == IR_VADDR) {
3335 return IR_TRUE;
3336 }
3337
3338 if (!IR_IS_CONST_REF(condition) && ctx->use_lists[condition].count > 1) {
3339 condition = ir_check_dominating_predicates(ctx, control, condition);
3340 }
3341
3342 return condition;
3343}
3344
3345static void ir_iter_optimize_if(ir_ctx *ctx, ir_ref ref, ir_insn *insn, ir_bitqueue *worklist)
3346{
3347 bool swap = 0;
3348 ir_ref condition = ir_iter_optimize_condition(ctx, insn->op1, insn->op2, &swap);
3349
3350 if (swap) {
3351 ir_use_list *use_list = &ctx->use_lists[ref];
3352 ir_ref *p, use;
3353
3354 IR_ASSERT(use_list->count == 2);
3355 p = ctx->use_edges + use_list->refs;
3356 use = *p;
3357 if (ctx->ir_base[use].op == IR_IF_TRUE) {
3358 ctx->ir_base[use].op = IR_IF_FALSE;
3359 use = *(p+1);
3360 ctx->ir_base[use].op = IR_IF_TRUE;
3361 } else {
3362 ctx->ir_base[use].op = IR_IF_TRUE;
3363 use = *(p+1);
3364 ctx->ir_base[use].op = IR_IF_FALSE;
3365 }
3366 }
3367
3368 if (IR_IS_CONST_REF(condition)) {
3369 /*
3370 * | |
3371 * IF(TRUE) => END
3372 * | \ |
3373 * | +------+ |
3374 * | IF_TRUE | BEGIN(unreachable)
3375 * IF_FALSE | BEGIN
3376 * | |
3377 */
3378 ir_ref if_true_ref, if_false_ref;
3379 ir_insn *if_true, *if_false;
3380
3381 insn->optx = IR_OPTX(IR_END, IR_VOID, 1);
3382 if (!IR_IS_CONST_REF(insn->op2)) {
3383 ir_use_list_remove_one(ctx, insn->op2, ref);
3384 ir_bitqueue_add(worklist, insn->op2);
3385 }
3386 insn->op2 = IR_UNUSED;
3387
3388 ir_get_true_false_refs(ctx, ref, &if_true_ref, &if_false_ref);
3389 if_true = &ctx->ir_base[if_true_ref];
3390 if_false = &ctx->ir_base[if_false_ref];
3391 if_true->op = IR_BEGIN;
3392 if_false->op = IR_BEGIN;
3393 if (ir_ref_is_true(ctx, condition)) {
3394 if_false->op1 = IR_UNUSED;
3395 ir_use_list_remove_one(ctx, ref, if_false_ref);
3396 ir_bitqueue_add(worklist, if_true_ref);
3397 } else {
3398 if_true->op1 = IR_UNUSED;
3399 ir_use_list_remove_one(ctx, ref, if_true_ref);
3400 ir_bitqueue_add(worklist, if_false_ref);
3401 }
3402 ctx->flags2 &= ~IR_CFG_REACHABLE;
3403 } else if (insn->op2 != condition) {
3404 ir_iter_update_op(ctx, ref, 2, condition, worklist);
3405 }
3406}
3407
3408static void ir_iter_optimize_guard(ir_ctx *ctx, ir_ref ref, ir_insn *insn, ir_bitqueue *worklist)
3409{
3410 bool swap = 0;
3411 ir_ref condition = ir_iter_optimize_condition(ctx, insn->op1, insn->op2, &swap);
3412
3413 if (swap) {
3414 if (insn->op == IR_GUARD) {
3415 insn->op = IR_GUARD_NOT;
3416 } else {
3417 insn->op = IR_GUARD;
3418 }
3419 }
3420
3421 if (IR_IS_CONST_REF(condition)) {
3422 if (insn->op == IR_GUARD) {
3423 if (ir_ref_is_true(ctx, condition)) {
3424 ir_ref prev, next;
3425
3426remove_guard:
3427 prev = insn->op1;
3428 next = ir_next_control(ctx, ref);
3429 ctx->ir_base[next].op1 = prev;
3430 ir_use_list_remove_one(ctx, ref, next);
3431 ir_use_list_replace_one(ctx, prev, ref, next);
3432 insn->op1 = IR_UNUSED;
3433
3434 if (!IR_IS_CONST_REF(insn->op2)) {
3435 ir_use_list_remove_one(ctx, insn->op2, ref);
3436 if (ir_is_dead(ctx, insn->op2)) {
3437 /* schedule DCE */
3438 ir_bitqueue_add(worklist, insn->op2);
3439 }
3440 }
3441
3442 if (insn->op3) {
3443 /* SNAPSHOT */
3444 ir_iter_remove_insn(ctx, insn->op3, worklist);
3445 }
3446
3447 MAKE_NOP(insn);
3448 return;
3449 } else {
3450 condition = IR_FALSE;
3451 }
3452 } else {
3453 if (ir_ref_is_true(ctx, condition)) {
3454 condition = IR_TRUE;
3455 } else {
3456 goto remove_guard;
3457 }
3458 }
3459 }
3460
3461 if (insn->op2 != condition) {
3462 ir_iter_update_op(ctx, ref, 2, condition, worklist);
3463 }
3464}
3465
3466void ir_iter_opt(ir_ctx *ctx, ir_bitqueue *worklist)
3467{
3468 ir_ref i, val;
3469 ir_insn *insn;
3470
3471 while ((i = ir_bitqueue_pop(worklist)) >= 0) {
3472 insn = &ctx->ir_base[i];
3473 if (IR_IS_FOLDABLE_OP(insn->op)) {
3474 if (ctx->use_lists[i].count == 0) {
3475 if (insn->op == IR_PHI) {
3476 ir_bitqueue_add(worklist, insn->op1);
3477 }
3478 ir_iter_remove_insn(ctx, i, worklist);
3479 } else {
3480 insn = &ctx->ir_base[i];
3481 switch (insn->op) {
3482 case IR_FP2FP:
3483 if (insn->type == IR_FLOAT) {
3484 if (ir_may_promote_d2f(ctx, insn->op1)) {
3485 ir_ref ref = ir_promote_d2f(ctx, insn->op1, i, worklist);
3486 insn->op1 = ref;
3487 ir_iter_replace_insn(ctx, i, ref, worklist);
3488 break;
3489 }
3490 } else {
3491 if (ir_may_promote_f2d(ctx, insn->op1)) {
3492 ir_ref ref = ir_promote_f2d(ctx, insn->op1, i, worklist);
3493 insn->op1 = ref;
3494 ir_iter_replace_insn(ctx, i, ref, worklist);
3495 break;
3496 }
3497 }
3498 goto folding;
3499 case IR_FP2INT:
3500 if (ctx->ir_base[insn->op1].type == IR_DOUBLE) {
3501 if (ir_may_promote_d2f(ctx, insn->op1)) {
3502 insn->op1 = ir_promote_d2f(ctx, insn->op1, i, worklist);
3503 }
3504 } else {
3505 if (ir_may_promote_f2d(ctx, insn->op1)) {
3506 insn->op1 = ir_promote_f2d(ctx, insn->op1, i, worklist);
3507 }
3508 }
3509 goto folding;
3510 case IR_TRUNC:
3511 if (ir_may_promote_trunc(ctx, insn->type, insn->op1)) {
3512 ir_ref ref = ir_promote_i2i(ctx, insn->type, insn->op1, i, worklist);
3513 insn->op1 = ref;
3514 ir_iter_replace_insn(ctx, i, ref, worklist);
3515 break;
3516 }
3517 goto folding;
3518 case IR_SEXT:
3519 case IR_ZEXT:
3520 if (ir_try_promote_ext(ctx, i, insn, worklist)) {
3521 break;
3522 }
3523 goto folding;
3524 case IR_PHI:
3525 break;
3526 default:
3527folding:
3528 ir_iter_fold(ctx, i, worklist);
3529 break;
3530 }
3531 }
3532 } else if (ir_op_flags[insn->op] & IR_OP_FLAG_BB_START) {
3533 if (!(ctx->flags & IR_OPT_CFG)) {
3534 /* pass */
3535 } else if (insn->op == IR_BEGIN) {
3536 if (insn->op1 && ctx->ir_base[insn->op1].op == IR_END) {
3537 ir_merge_blocks(ctx, insn->op1, i, worklist);
3538 }
3539 } else if (insn->op == IR_MERGE) {
3540 ir_iter_optimize_merge(ctx, i, insn, worklist);
3541 } else if (insn->op == IR_LOOP_BEGIN) {
3542 ir_iter_optimize_loop(ctx, i, insn, worklist);
3543 }
3544 } else if (ir_is_dead_load(ctx, i)) {
3545 ir_ref next;
3546
3547 /* remove LOAD from double linked control list */
3548remove_mem_insn:
3549 next = ctx->use_edges[ctx->use_lists[i].refs];
3550 IR_ASSERT(ctx->use_lists[i].count == 1);
3551 ctx->ir_base[next].op1 = insn->op1;
3552 ir_use_list_replace_one(ctx, insn->op1, i, next);
3553 insn->op1 = IR_UNUSED;
3554 ir_iter_remove_insn(ctx, i, worklist);
3555 } else if (insn->op == IR_LOAD) {
3556 val = ir_find_aliasing_load(ctx, insn->op1, insn->type, insn->op2);
3557 if (val) {
3558 ir_insn *val_insn;
3559 ir_ref prev, next;
3560
3561remove_aliased_load:
3562 prev = insn->op1;
3563 next = ir_next_control(ctx, i);
3564 ctx->ir_base[next].op1 = prev;
3567 insn->op1 = IR_UNUSED;
3568
3569 val_insn = &ctx->ir_base[val];
3570 if (val_insn->type == insn->type) {
3571 ir_iter_replace_insn(ctx, i, val, worklist);
3572 } else {
3573 IR_ASSERT(!IR_IS_CONST_REF(insn->op2));
3574 ir_use_list_remove_one(ctx, insn->op2, i);
3575 if (ir_is_dead(ctx, insn->op2)) {
3576 /* schedule DCE */
3577 ir_bitqueue_add(worklist, insn->op2);
3578 }
3579 if (!IR_IS_CONST_REF(val)) {
3580 ir_use_list_add(ctx, val, i);
3581 }
3582 if (ir_type_size[val_insn->type] == ir_type_size[insn->type]) {
3583 /* load forwarding with bitcast (L2L) */
3584 insn->optx = IR_OPTX(IR_BITCAST, insn->type, 1);
3585 } else {
3586 /* partial load forwarding (L2L) */
3587 insn->optx = IR_OPTX(IR_TRUNC, insn->type, 1);
3588 }
3589 insn->op1 = val;
3590 insn->op2 = IR_UNUSED;
3591 ir_bitqueue_add(worklist, i);
3592 }
3593 }
3594 } else if (insn->op == IR_STORE) {
3595 if (ir_find_aliasing_store(ctx, insn->op1, insn->op2, insn->op3)) {
3596 goto remove_mem_insn;
3597 } else {
3598 ir_insn *val_insn;
3599
3600remove_bitcast:
3601 val = insn->op3;
3602 val_insn = &ctx->ir_base[val];
3603 if (val_insn->op == IR_BITCAST
3604 && ir_type_size[val_insn->type] == ir_type_size[ctx->ir_base[val_insn->op1].type]) {
3605 insn->op3 = val_insn->op1;
3606 ir_use_list_remove_one(ctx, val, i);
3607 if (ctx->use_lists[val].count == 0) {
3608 if (!IR_IS_CONST_REF(val_insn->op1)) {
3609 ir_use_list_replace_one(ctx, val_insn->op1, val, i);
3610 }
3611 ir_iter_remove_insn(ctx, val, worklist);
3612 } else {
3613 if (!IR_IS_CONST_REF(val_insn->op1)) {
3614 ir_use_list_add(ctx, val_insn->op1, i);
3615 }
3616 }
3617 }
3618 }
3619 } else if (insn->op == IR_VLOAD) {
3620 val = ir_find_aliasing_vload(ctx, insn->op1, insn->type, insn->op2);
3621 if (val) {
3622 goto remove_aliased_load;
3623 }
3624 } else if (insn->op == IR_VSTORE) {
3625 if (ir_find_aliasing_vstore(ctx, insn->op1, insn->op2, insn->op3)) {
3626 goto remove_mem_insn;
3627 } else {
3628 goto remove_bitcast;
3629 }
3630 } else if (insn->op == IR_IF) {
3631 ir_iter_optimize_if(ctx, i, insn, worklist);
3632 } else if (insn->op == IR_GUARD || insn->op == IR_GUARD_NOT) {
3633 ir_iter_optimize_guard(ctx, i, insn, worklist);
3634 }
3635 }
3636}
3637
3639{
3640 ir_bitqueue sccp_worklist, iter_worklist;
3641 ir_insn *_values;
3642
3643 ir_bitqueue_init(&iter_worklist, ctx->insns_count);
3644 ir_bitqueue_init(&sccp_worklist, ctx->insns_count);
3645 _values = ir_mem_calloc(ctx->insns_count, sizeof(ir_insn));
3646
3647 ctx->flags2 |= IR_OPT_IN_SCCP;
3648 ir_sccp_analyze(ctx, _values, &sccp_worklist, &iter_worklist);
3649 ir_sccp_transform(ctx, _values, &sccp_worklist, &iter_worklist);
3650 ctx->flags2 &= ~IR_OPT_IN_SCCP;
3651
3652 ir_mem_free(_values);
3653 ir_bitqueue_free(&sccp_worklist);
3654
3655 ctx->flags2 |= IR_CFG_REACHABLE;
3656
3657 ir_iter_opt(ctx, &iter_worklist);
3658
3659 ir_bitqueue_free(&iter_worklist);
3660
3661 return 1;
3662}
fprintf($stream, string $format, mixed ... $values)
prev(array|object &$array)
copy(string $from, string $to, $context=null)
count(Countable|array $value, int $mode=COUNT_NORMAL)
uint32_t v
Definition cdf.c:1237
int begin
Definition eaw_table.h:20
zend_ffi_type * type
Definition ffi.c:3812
zend_long n
Definition ffi.c:4979
zval * val
Definition ffi.c:4262
buf start
Definition ffi.c:4687
const php_stream_filter_ops * ops
Definition filters.c:1899
#define NULL
Definition gdcache.h:45
again j
for( $i=0;$i< 0x1E;$i++)
bool ir_use_list_add(ir_ctx *ctx, ir_ref to, ir_ref ref)
Definition ir.c:1378
void ir_use_list_replace_one(ir_ctx *ctx, ir_ref ref, ir_ref use, ir_ref new_use)
Definition ir.c:1347
ir_ref ir_check_dominating_predicates(ir_ctx *ctx, ir_ref ref, ir_ref condition)
Definition ir.c:2469
ir_ref ir_const_ex(ir_ctx *ctx, ir_val val, uint8_t type, uint32_t optx)
Definition ir.c:512
ir_ref ir_const_double(ir_ctx *ctx, double c)
Definition ir.c:638
ir_ref ir_emit1(ir_ctx *ctx, uint32_t opt, ir_ref op1)
Definition ir.c:829
ir_ref ir_find_aliasing_load(ir_ctx *ctx, ir_ref ref, ir_type type, ir_ref addr)
Definition ir.c:2064
ir_ref ir_find_aliasing_vload(ir_ctx *ctx, ir_ref ref, ir_type type, ir_ref var)
Definition ir.c:2110
ir_ref ir_find_aliasing_vstore(ir_ctx *ctx, ir_ref ref, ir_ref var, ir_ref val)
Definition ir.c:2284
ir_ref ir_const(ir_ctx *ctx, ir_val val, uint8_t type)
Definition ir.c:557
ir_ref ir_find_aliasing_store(ir_ctx *ctx, ir_ref ref, ir_ref addr, ir_ref val)
Definition ir.c:2204
const uint32_t ir_op_flags[IR_LAST_OP]
Definition ir.c:294
void ir_use_list_remove_one(ir_ctx *ctx, ir_ref from, ir_ref ref)
Definition ir.c:1321
void ir_print_const(const ir_ctx *ctx, const ir_insn *insn, FILE *f, bool quoted)
Definition ir.c:117
void ir_use_list_remove_all(ir_ctx *ctx, ir_ref from, ir_ref ref)
Definition ir.c:1295
ir_ref ir_folding(ir_ctx *ctx, uint32_t opt, ir_ref op1, ir_ref op2, ir_ref op3, ir_insn *op1_insn, ir_insn *op2_insn, ir_insn *op3_insn)
Definition ir.c:932
ir_ref ir_const_float(ir_ctx *ctx, float c)
Definition ir.c:630
const uint8_t ir_type_size[IR_LAST_TYPE]
Definition ir.c:61
#define IR_OPT_CFG
Definition ir.h:532
#define IR_OPTX(op, type, n)
Definition ir.h:386
enum _ir_type ir_type
#define IR_IS_TYPE_INT(t)
Definition ir.h:145
IR_ALWAYS_INLINE uint32_t ir_insn_find_op(const ir_insn *insn, ir_ref val)
Definition ir.h:739
#define IR_TRUE
Definition ir.h:398
union _ir_val ir_val
#define IR_NEVER_INLINE
Definition ir.h:111
#define IR_IS_TYPE_UNSIGNED(t)
Definition ir.h:143
#define IR_UNUSED
Definition ir.h:395
@ IR_VOID
Definition ir.h:151
#define IR_OPT_OP_MASK
Definition ir.h:380
#define ir_mem_calloc
Definition ir.h:1009
int32_t ir_ref
Definition ir.h:390
IR_ALWAYS_INLINE ir_ref ir_insn_op(const ir_insn *insn, int32_t n)
Definition ir.h:727
#define IR_OPT(op, type)
Definition ir.h:385
#define IR_IS_CONST_REF(ref)
Definition ir.h:392
#define IR_IS_TYPE_SIGNED(t)
Definition ir.h:144
IR_ALWAYS_INLINE void ir_insn_set_op(ir_insn *insn, int32_t n, ir_ref val)
Definition ir.h:733
#define IR_FALSE
Definition ir.h:397
#define ir_mem_free
Definition ir.h:1015
struct _ir_ctx ir_ctx
Definition ir.h:550
enum _ir_op ir_op
#define IR_IS_TYPE_FP(t)
Definition ir.h:146
#define IR_ALWAYS_INLINE
Definition ir.h:108
struct _ir_use_list ir_use_list
Definition ir.h:551
struct _ir_insn ir_insn
IR_ALWAYS_INLINE void ir_bitqueue_grow(ir_bitqueue *q, uint32_t n)
Definition ir_private.h:588
#define SWAP_REFS(_ref1, _ref2)
IR_ALWAYS_INLINE bool ir_const_is_true(const ir_insn *v)
Definition ir_private.h:893
#define IR_IS_CONST_OP(op)
Definition ir_private.h:887
ir_bitset_base_t * ir_bitset
Definition ir_private.h:317
#define IR_CFG_REACHABLE
#define SWAP_INSNS(_insn1, _insn2)
#define IR_MIN(a, b)
Definition ir_private.h:63
IR_ALWAYS_INLINE void ir_bitqueue_del(ir_bitqueue *q, uint32_t n)
Definition ir_private.h:638
IR_ALWAYS_INLINE void ir_bitqueue_add(ir_bitqueue *q, uint32_t n)
Definition ir_private.h:629
#define IR_BITSET_BITS
Definition ir_private.h:307
#define IR_OPND_DATA
Definition ir_private.h:946
#define IR_OPND_CONTROL_DEP
Definition ir_private.h:948
#define IR_OP_FLAG_MEM_MASK
Definition ir_private.h:928
#define IR_FALLTHROUGH
Definition ir_private.h:54
IR_ALWAYS_INLINE ir_bitset ir_bitset_malloc(uint32_t n)
Definition ir_private.h:324
IR_ALWAYS_INLINE void ir_bitset_incl(ir_bitset set, uint32_t n)
Definition ir_private.h:329
#define IR_IS_SYM_CONST(op)
Definition ir_private.h:889
#define IR_ASSERT(x)
Definition ir_private.h:17
#define IR_IS_BB_START(op)
#define IR_MEM2SSA_VARS
#define IR_IS_FOLDABLE_OP(op)
Definition ir_private.h:888
#define IR_MAX(a, b)
Definition ir_private.h:62
IR_ALWAYS_INLINE void ir_bitqueue_init(ir_bitqueue *q, uint32_t n)
Definition ir_private.h:581
@ IR_FOLD_DO_EMIT
@ IR_FOLD_DO_COPY
@ IR_FOLD_DO_CONST
@ IR_FOLD_DO_CSE
@ IR_FOLD_DO_RESTART
#define IR_OPND_KIND(flags, i)
Definition ir_private.h:963
IR_ALWAYS_INLINE int ir_bitqueue_pop(ir_bitqueue *q)
Definition ir_private.h:610
#define CLEAR_USES(_ref)
#define IR_OPT_IN_SCCP
#define IR_INPUT_EDGES_COUNT(flags)
Definition ir_private.h:958
IR_ALWAYS_INLINE bool ir_ref_is_true(ir_ctx *ctx, ir_ref ref)
Definition ir_private.h:910
#define IR_OP_FLAG_CONTROL
Definition ir_private.h:931
IR_ALWAYS_INLINE void ir_bitqueue_free(ir_bitqueue *q)
Definition ir_private.h:599
#define ir_bitset_base_t
Definition ir_private.h:313
IR_ALWAYS_INLINE ir_ref ir_next_control(const ir_ctx *ctx, ir_ref ref)
#define IR_OP_FLAG_BB_START
Definition ir_private.h:934
struct _ir_bitqueue ir_bitqueue
#define IR_OP_FLAG_BB_END
Definition ir_private.h:935
#define IR_OP_FLAG_MEM
Definition ir_private.h:932
#define IR_OP_FLAG_MEM_LOAD
Definition ir_private.h:939
#define IR_OP_FLAG_DATA
Definition ir_private.h:930
#define IR_OP_FLAG_TERMINATOR
Definition ir_private.h:936
#define IR_OP_HAS_VAR_INPUTS(flags)
Definition ir_private.h:961
void ir_iter_opt(ir_ctx *ctx, ir_bitqueue *worklist)
Definition ir_sccp.c:3466
IR_ALWAYS_INLINE void ir_sccp_add_uses(ir_ctx *ctx, ir_insn *_values, ir_bitqueue *worklist, ir_ref ref)
Definition ir_sccp.c:35
#define IR_MAKE_BOTTOM_EX(ref)
Definition ir_sccp.c:182
#define IR_IS_TOP(ref)
Definition ir_sccp.c:23
#define IR_IS_REACHABLE(ref)
Definition ir_sccp.c:25
IR_ALWAYS_INLINE bool ir_sccp_meet(ir_ctx *ctx, ir_insn *_values, ir_bitqueue *worklist, ir_ref ref, ir_ref val)
Definition ir_sccp.c:208
#define ir_sccp_trace_start(c, v, i)
Definition ir_sccp.c:498
#define CHECK_LIST(_values, ref)
Definition ir_sccp.c:86
#define ir_sccp_trace_end(c, v, i)
Definition ir_sccp.c:499
void ir_iter_update_op(ir_ctx *ctx, ir_ref ref, uint32_t idx, ir_ref new_val, ir_bitqueue *worklist)
Definition ir_sccp.c:1237
IR_ALWAYS_INLINE void ir_sccp_make_bottom_ex(ir_ctx *ctx, ir_insn *_values, ir_bitqueue *worklist, ir_ref ref)
Definition ir_sccp.c:173
#define IR_BOTTOM
Definition ir_sccp.c:18
IR_ALWAYS_INLINE ir_ref ir_sccp_identity(ir_ctx *ctx, ir_insn *_values, ir_ref a)
Definition ir_sccp.c:63
IR_ALWAYS_INLINE bool _ir_is_reachable_ctrl(ir_ctx *ctx, ir_insn *_values, ir_ref ref)
Definition ir_sccp.c:28
int ir_sccp(ir_ctx *ctx)
Definition ir_sccp.c:3638
IR_ALWAYS_INLINE void ir_sccp_add_input(ir_ctx *ctx, ir_insn *_values, ir_bitqueue *worklist, ir_ref ref)
Definition ir_sccp.c:51
void ir_iter_replace(ir_ctx *ctx, ir_ref ref, ir_ref new_ref, ir_bitqueue *worklist)
Definition ir_sccp.c:1165
#define IR_IS_BOTTOM(ref)
Definition ir_sccp.c:24
IR_ALWAYS_INLINE bool ir_sccp_meet_const(ir_ctx *ctx, ir_insn *_values, ir_bitqueue *worklist, ir_ref ref, ir_insn *val_insn)
Definition ir_sccp.c:188
#define IR_TOP
Definition ir_sccp.c:17
#define IR_IS_CONST(ref)
Definition ir_sccp.c:26
#define IR_MAKE_BOTTOM(ref)
Definition ir_sccp.c:21
#define next(ls)
Definition minilua.c:2661
unsigned const char * end
Definition php_ffi.h:51
struct php_pcntl_pending_signal * head
Definition php_pcntl.h:47
struct php_pcntl_pending_signal * tail
Definition php_pcntl.h:47
p
Definition session.c:1105
uint32_t pos
Definition ir_private.h:577
ir_bitset set
Definition ir_private.h:578
ir_insn fold_insn
Definition ir.h:585
ir_use_list * use_lists
Definition ir.h:587
ir_insn * ir_base
Definition ir.h:574
uint32_t flags
Definition ir.h:579
ir_ref insns_count
Definition ir.h:575
uint32_t flags2
Definition ir.h:580
ir_ref * use_edges
Definition ir.h:588
ir_val val
Definition ir.h:477
$obj a
Definition test.php:84
uint64_t u64
Definition ir.h:411
int64_t i64
Definition ir.h:412
double d
Definition ir.h:410
#define MAKE_NOP(opline)
#define EXPECTED(condition)
bool result
op2
op1
value