LLVM/project 71582c6llvm/lib/Target/AMDGPU AMDGPUInsertDelayAlu.cpp, llvm/test/CodeGen/AMDGPU insert-delay-alu.mir atomic_optimizations_local_pointer.ll

[AMDGPU] Remove s_delay_alu for VALU->SGPR->SALU (#127212)

We have a VALU->SGPR->SALU (VALU writing to SGPR and SALU reading from
it). When VALU is issued, it increments internal counter VA_SDST used to
track use of this SGPR. SALU will not issue until VA_SDST is zero, that
is when VALU is finished writing. Therefore, delays added by s_delay_alu
are not needed in this situation.
DeltaFile
+413-210llvm/test/CodeGen/AMDGPU/insert-delay-alu.mir
+25-83llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll
+48-48llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f32.ll
+18-66llvm/test/CodeGen/AMDGPU/atomic_optimizations_global_pointer.ll
+16-48llvm/test/CodeGen/AMDGPU/llvm.amdgcn.permlane.ll
+36-18llvm/test/CodeGen/AMDGPU/llvm.amdgcn.exp.row.ll
+11-35llvm/test/CodeGen/AMDGPU/buffer-fat-pointer-atomicrmw-fmin.ll
+11-35llvm/test/CodeGen/AMDGPU/buffer-fat-pointer-atomicrmw-fmax.ll
+11-30llvm/test/CodeGen/AMDGPU/buffer-fat-pointer-atomicrmw-fadd.ll
+12-28llvm/test/CodeGen/AMDGPU/global-atomicrmw-fsub.ll
+12-28llvm/test/CodeGen/AMDGPU/flat-atomicrmw-fsub.ll
+12-28llvm/test/CodeGen/AMDGPU/flat-atomicrmw-fmin.ll
+12-28llvm/test/CodeGen/AMDGPU/flat-atomicrmw-fmax.ll
+12-28llvm/test/CodeGen/AMDGPU/global-atomicrmw-fmin.ll
+12-28llvm/test/CodeGen/AMDGPU/global-atomicrmw-fmax.ll
+2-36llvm/test/CodeGen/AMDGPU/dynamic_stackalloc.ll
+38-0llvm/lib/Target/AMDGPU/AMDGPUInsertDelayAlu.cpp
+12-24llvm/test/CodeGen/AMDGPU/global-atomicrmw-fadd.ll
+12-24llvm/test/CodeGen/AMDGPU/flat-atomicrmw-fadd.ll
+12-18llvm/test/CodeGen/AMDGPU/integer-mad-patterns.ll
+0-25llvm/test/CodeGen/AMDGPU/atomic_optimizations_buffer.ll
+5-15llvm/test/CodeGen/AMDGPU/global_atomics_scan_fsub.ll
+5-15llvm/test/CodeGen/AMDGPU/global_atomics_scan_fadd.ll
+10-10llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll
+10-10llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands-non-ptr-intrinsics.ll
+0-19llvm/test/CodeGen/AMDGPU/atomic_optimizations_struct_buffer.ll
+0-19llvm/test/CodeGen/AMDGPU/atomic_optimizations_raw_buffer.ll
+4-14llvm/test/CodeGen/AMDGPU/GlobalISel/mul.ll
+7-11llvm/test/CodeGen/AMDGPU/llvm.amdgcn.init.whole.wave-w32.ll
+6-11llvm/test/CodeGen/AMDGPU/fma.f16.ll
+8-8llvm/test/CodeGen/AMDGPU/combine-add-zext-xor.ll
+0-16llvm/test/CodeGen/AMDGPU/llvm.amdgcn.permlane.ptr.ll
+5-11llvm/test/CodeGen/AMDGPU/idiv-licm.ll
+6-9llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmax.ll
+1-14llvm/test/CodeGen/AMDGPU/llvm.amdgcn.writelane.ll
+6-9llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmin.ll
+4-9llvm/test/CodeGen/AMDGPU/carryout-selection.ll
+4-9llvm/test/CodeGen/AMDGPU/llvm.mulo.ll
+4-8llvm/test/CodeGen/AMDGPU/issue92561-restore-undef-scc-verifier-error.ll
+6-6llvm/test/CodeGen/AMDGPU/insert-delay-alu-bug.ll
+4-8llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.umax.ll
+4-8llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.umin.ll
+5-7llvm/test/CodeGen/AMDGPU/load-constant-i1.ll
+4-8llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll
+4-7llvm/test/CodeGen/AMDGPU/insert_waitcnt_for_precise_memory.ll
+0-11llvm/test/CodeGen/AMDGPU/llvm.amdgcn.writelane.ptr.ll
+4-6llvm/test/CodeGen/AMDGPU/local-atomicrmw-fadd.ll
+4-6llvm/test/CodeGen/AMDGPU/fptrunc.ll
+4-6llvm/test/CodeGen/AMDGPU/fcopysign.f16.ll
+4-5llvm/test/CodeGen/AMDGPU/saddo.ll
+2-7llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.atomic.fmax.f32.ll
+2-7llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.atomic.fmin.f32.ll
+2-6llvm/test/CodeGen/AMDGPU/mad_64_32.ll
+4-4llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.intersect_ray.ll
+4-4llvm/test/CodeGen/AMDGPU/fp-classify.ll
+0-8llvm/test/CodeGen/AMDGPU/flat_atomics_i64.ll
+2-5llvm/test/CodeGen/AMDGPU/fract-match.ll
+2-4llvm/test/CodeGen/AMDGPU/v_cndmask.ll
+2-4llvm/test/CodeGen/AMDGPU/llvm.amdgcn.init.whole.wave-w64.ll
+3-3llvm/test/CodeGen/AMDGPU/bf16.ll
+2-4llvm/test/CodeGen/AMDGPU/uitofp.f16.ll
+2-4llvm/test/CodeGen/AMDGPU/sitofp.f16.ll
+2-3llvm/test/CodeGen/AMDGPU/v_cmp_gfx11.ll
+2-3llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.load.format.v3f16.ll
+0-4llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.atomic.fadd_nortn.ll
+0-4llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.atomic.fadd.v2bf16.ll
+2-2llvm/test/CodeGen/AMDGPU/llvm.log.ll
+2-2llvm/test/CodeGen/AMDGPU/llvm.log10.ll
+2-2llvm/test/CodeGen/AMDGPU/llvm.log2.ll
+2-2llvm/test/CodeGen/AMDGPU/llvm.maximum.f16.ll
+2-2llvm/test/CodeGen/AMDGPU/llvm.minimum.f16.ll
+0-4llvm/test/CodeGen/AMDGPU/min.ll
+2-2llvm/test/CodeGen/AMDGPU/pseudo-scalar-transcendental.ll
+2-2llvm/test/CodeGen/AMDGPU/global-saddr-load.ll
+0-4llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.atomic.fadd_rtn.ll
+1-2llvm/test/CodeGen/AMDGPU/load-constant-always-uniform.ll
+1-2llvm/test/CodeGen/AMDGPU/no-dup-inst-prefetch.ll
+1-2llvm/test/CodeGen/AMDGPU/atomic_optimizations_pixelshader.ll
+1-1llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.load.format.v3f16.ll
+0-2llvm/test/CodeGen/AMDGPU/local-atomicrmw-fmax.ll
+0-2llvm/test/CodeGen/AMDGPU/local-atomicrmw-fmin.ll
+0-2llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_uinc_wrap.ll
+1-1llvm/test/CodeGen/AMDGPU/vgpr-descriptor-waterfall-loop-idom-update.ll
+0-2llvm/test/CodeGen/AMDGPU/buffer-fat-pointers-memcpy.ll
+1-1llvm/test/CodeGen/AMDGPU/expand-scalar-carry-out-select-user.ll
+0-2llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.atomic.fadd.v2bf16.ll
+0-2llvm/test/CodeGen/AMDGPU/skip-if-dead.ll
+0-2llvm/test/CodeGen/AMDGPU/local-atomicrmw-fsub.ll
+0-1llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.ttracedata.ll
+0-1llvm/test/CodeGen/AMDGPU/branch-relaxation.ll
+0-1llvm/test/CodeGen/AMDGPU/llvm.amdgcn.readfirstlane.m0.ll
+0-1llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.setreg.ll
+0-1llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.sleep.var.ll
+916-1,28793 files

UnifiedSplitRaw