diff --git llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_fmax.ll llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_fmax.ll
index 424388a30e99..d1a303b41dee 100644
--- llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_fmax.ll
+++ llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_fmax.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 < %s | FileCheck -check-prefix=GFX12 %s
-; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx940 < %s | FileCheck -check-prefix=GFX940 %s
+; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx942 < %s | FileCheck -check-prefix=GFX942 %s
 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 < %s | FileCheck -check-prefix=GFX11 %s
 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s
 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx90a < %s | FileCheck -check-prefix=GFX90A %s
@@ -24,12 +24,12 @@ define float @local_atomic_fmax_ret_f32(ptr addrspace(3) %ptr, float %val) {
 ; GFX12-NEXT:    global_inv scope:SCOPE_SE
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: local_atomic_fmax_ret_f32:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    ds_max_rtn_f32 v0, v0, v1
-; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: local_atomic_fmax_ret_f32:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    ds_max_rtn_f32 v0, v0, v1
+; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: local_atomic_fmax_ret_f32:
 ; GFX11:       ; %bb.0:
@@ -96,12 +96,12 @@ define void @local_atomic_fmax_noret_f32(ptr addrspace(3) %ptr, float %val) {
 ; GFX12-NEXT:    global_inv scope:SCOPE_SE
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: local_atomic_fmax_noret_f32:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    ds_max_f32 v0, v1
-; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: local_atomic_fmax_noret_f32:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    ds_max_f32 v0, v1
+; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: local_atomic_fmax_noret_f32:
 ; GFX11:       ; %bb.0:
@@ -168,14 +168,14 @@ define double @local_atomic_fmax_ret_f64(ptr addrspace(3) %ptr, double %val) {
 ; GFX12-NEXT:    global_inv scope:SCOPE_SE
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: local_atomic_fmax_ret_f64:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v4, v1
-; GFX940-NEXT:    v_mov_b32_e32 v5, v2
-; GFX940-NEXT:    ds_max_rtn_f64 v[0:1], v0, v[4:5]
-; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: local_atomic_fmax_ret_f64:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v4, v1
+; GFX942-NEXT:    v_mov_b32_e32 v5, v2
+; GFX942-NEXT:    ds_max_rtn_f64 v[0:1], v0, v[4:5]
+; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: local_atomic_fmax_ret_f64:
 ; GFX11:       ; %bb.0:
@@ -244,14 +244,14 @@ define void @local_atomic_fmax_noret_f64(ptr addrspace(3) %ptr, double %val) {
 ; GFX12-NEXT:    global_inv scope:SCOPE_SE
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: local_atomic_fmax_noret_f64:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v4, v1
-; GFX940-NEXT:    v_mov_b32_e32 v5, v2
-; GFX940-NEXT:    ds_max_f64 v0, v[4:5]
-; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: local_atomic_fmax_noret_f64:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v4, v1
+; GFX942-NEXT:    v_mov_b32_e32 v5, v2
+; GFX942-NEXT:    ds_max_f64 v0, v[4:5]
+; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: local_atomic_fmax_noret_f64:
 ; GFX11:       ; %bb.0:
@@ -320,30 +320,30 @@ define float @global_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_memory(pt
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: global_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    global_load_dword v3, v[0:1], off
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    v_max_f32_e32 v2, v2, v2
-; GFX940-NEXT:  .LBB4_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v5, v3
-; GFX940-NEXT:    v_max_f32_e32 v3, v5, v5
-; GFX940-NEXT:    v_max_f32_e32 v4, v3, v2
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    global_atomic_cmpswap v3, v[0:1], v[4:5], off sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB4_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v0, v3
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: global_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    global_load_dword v3, v[0:1], off
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    v_max_f32_e32 v2, v2, v2
+; GFX942-NEXT:  .LBB4_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v5, v3
+; GFX942-NEXT:    v_max_f32_e32 v3, v5, v5
+; GFX942-NEXT:    v_max_f32_e32 v4, v3, v2
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    global_atomic_cmpswap v3, v[0:1], v[4:5], off sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB4_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v0, v3
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: global_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -466,29 +466,29 @@ define void @global_agent_atomic_fmax_noret_f32__amdgpu_no_fine_grained_memory(p
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: global_agent_atomic_fmax_noret_f32__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    global_load_dword v3, v[0:1], off
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    v_max_f32_e32 v4, v2, v2
-; GFX940-NEXT:  .LBB5_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_max_f32_e32 v2, v3, v3
-; GFX940-NEXT:    v_max_f32_e32 v2, v2, v4
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    global_atomic_cmpswap v2, v[0:1], v[2:3], off sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v3, v2
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB5_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: global_agent_atomic_fmax_noret_f32__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    global_load_dword v3, v[0:1], off
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    v_max_f32_e32 v4, v2, v2
+; GFX942-NEXT:  .LBB5_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_max_f32_e32 v2, v3, v3
+; GFX942-NEXT:    v_max_f32_e32 v2, v2, v4
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    global_atomic_cmpswap v2, v[0:1], v[2:3], off sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v3, v2
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB5_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: global_agent_atomic_fmax_noret_f32__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -626,14 +626,14 @@ define double @global_agent_atomic_fmax_ret_f64__amdgpu_no_fine_grained_memory(p
 ; GFX12-NEXT:    v_dual_mov_b32 v0, v4 :: v_dual_mov_b32 v1, v5
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: global_agent_atomic_fmax_ret_f64__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    global_atomic_max_f64 v[0:1], v[0:1], v[2:3], off sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: global_agent_atomic_fmax_ret_f64__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    global_atomic_max_f64 v[0:1], v[0:1], v[2:3], off sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: global_agent_atomic_fmax_ret_f64__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -781,14 +781,14 @@ define void @global_agent_atomic_fmax_noret_f64__amdgpu_no_fine_grained_memory(p
 ; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s0
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: global_agent_atomic_fmax_noret_f64__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    global_atomic_max_f64 v[0:1], v[2:3], off
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: global_agent_atomic_fmax_noret_f64__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    global_atomic_max_f64 v[0:1], v[2:3], off
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: global_agent_atomic_fmax_noret_f64__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -911,30 +911,30 @@ define float @flat_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_memory(ptr
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    flat_load_dword v3, v[0:1]
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    v_max_f32_e32 v2, v2, v2
-; GFX940-NEXT:  .LBB8_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v5, v3
-; GFX940-NEXT:    v_max_f32_e32 v3, v5, v5
-; GFX940-NEXT:    v_max_f32_e32 v4, v3, v2
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v3, v[0:1], v[4:5] sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB8_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v0, v3
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    flat_load_dword v3, v[0:1]
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    v_max_f32_e32 v2, v2, v2
+; GFX942-NEXT:  .LBB8_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v5, v3
+; GFX942-NEXT:    v_max_f32_e32 v3, v5, v5
+; GFX942-NEXT:    v_max_f32_e32 v4, v3, v2
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v3, v[0:1], v[4:5] sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB8_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v0, v3
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -1053,29 +1053,29 @@ define void @flat_agent_atomic_fmax_noret_f32__amdgpu_no_fine_grained_memory(ptr
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fmax_noret_f32__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    flat_load_dword v3, v[0:1]
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    v_max_f32_e32 v4, v2, v2
-; GFX940-NEXT:  .LBB9_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_max_f32_e32 v2, v3, v3
-; GFX940-NEXT:    v_max_f32_e32 v2, v2, v4
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v3, v2
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB9_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fmax_noret_f32__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    flat_load_dword v3, v[0:1]
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    v_max_f32_e32 v4, v2, v2
+; GFX942-NEXT:  .LBB9_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_max_f32_e32 v2, v3, v3
+; GFX942-NEXT:    v_max_f32_e32 v2, v2, v4
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v3, v2
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB9_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fmax_noret_f32__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -1212,14 +1212,14 @@ define double @flat_agent_atomic_fmax_ret_f64__amdgpu_no_fine_grained_memory(ptr
 ; GFX12-NEXT:    v_dual_mov_b32 v0, v4 :: v_dual_mov_b32 v1, v5
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fmax_ret_f64__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_max_f64 v[0:1], v[0:1], v[2:3] sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fmax_ret_f64__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_max_f64 v[0:1], v[0:1], v[2:3] sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fmax_ret_f64__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -1365,14 +1365,14 @@ define void @flat_agent_atomic_fmax_noret_f64__amdgpu_no_fine_grained_memory(ptr
 ; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s0
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fmax_noret_f64__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_max_f64 v[0:1], v[2:3]
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fmax_noret_f64__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_max_f64 v[0:1], v[2:3]
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fmax_noret_f64__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -1497,32 +1497,32 @@ define float @buffer_fat_ptr_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_m
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v2, s16
-; GFX940-NEXT:    v_mov_b32_e32 v1, v0
-; GFX940-NEXT:    buffer_load_dword v0, v2, s[0:3], 0 offen
-; GFX940-NEXT:    s_mov_b64 s[4:5], 0
-; GFX940-NEXT:    v_max_f32_e32 v3, v1, v1
-; GFX940-NEXT:  .LBB12_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v5, v0
-; GFX940-NEXT:    v_max_f32_e32 v0, v5, v5
-; GFX940-NEXT:    v_max_f32_e32 v4, v0, v3
-; GFX940-NEXT:    v_mov_b64_e32 v[0:1], v[4:5]
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    buffer_atomic_cmpswap v[0:1], v2, s[0:3], 0 offen sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v0, v5
-; GFX940-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[4:5]
-; GFX940-NEXT:    s_cbranch_execnz .LBB12_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[4:5]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v2, s16
+; GFX942-NEXT:    v_mov_b32_e32 v1, v0
+; GFX942-NEXT:    buffer_load_dword v0, v2, s[0:3], 0 offen
+; GFX942-NEXT:    s_mov_b64 s[4:5], 0
+; GFX942-NEXT:    v_max_f32_e32 v3, v1, v1
+; GFX942-NEXT:  .LBB12_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v5, v0
+; GFX942-NEXT:    v_max_f32_e32 v0, v5, v5
+; GFX942-NEXT:    v_max_f32_e32 v4, v0, v3
+; GFX942-NEXT:    v_mov_b64_e32 v[0:1], v[4:5]
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    buffer_atomic_cmpswap v[0:1], v2, s[0:3], 0 offen sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v0, v5
+; GFX942-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[4:5]
+; GFX942-NEXT:    s_cbranch_execnz .LBB12_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[4:5]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -1653,31 +1653,31 @@ define void @buffer_fat_ptr_agent_atomic_fmax_noret_f32__amdgpu_no_fine_grained_
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fmax_noret_f32__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v2, s16
-; GFX940-NEXT:    buffer_load_dword v1, v2, s[0:3], 0 offen
-; GFX940-NEXT:    s_mov_b64 s[4:5], 0
-; GFX940-NEXT:    v_max_f32_e32 v3, v0, v0
-; GFX940-NEXT:  .LBB13_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_max_f32_e32 v0, v1, v1
-; GFX940-NEXT:    v_max_f32_e32 v0, v0, v3
-; GFX940-NEXT:    v_mov_b64_e32 v[4:5], v[0:1]
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    buffer_atomic_cmpswap v[4:5], v2, s[0:3], 0 offen sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v4, v1
-; GFX940-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
-; GFX940-NEXT:    v_mov_b32_e32 v1, v4
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[4:5]
-; GFX940-NEXT:    s_cbranch_execnz .LBB13_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[4:5]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmax_noret_f32__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v2, s16
+; GFX942-NEXT:    buffer_load_dword v1, v2, s[0:3], 0 offen
+; GFX942-NEXT:    s_mov_b64 s[4:5], 0
+; GFX942-NEXT:    v_max_f32_e32 v3, v0, v0
+; GFX942-NEXT:  .LBB13_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_max_f32_e32 v0, v1, v1
+; GFX942-NEXT:    v_max_f32_e32 v0, v0, v3
+; GFX942-NEXT:    v_mov_b64_e32 v[4:5], v[0:1]
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    buffer_atomic_cmpswap v[4:5], v2, s[0:3], 0 offen sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v4, v1
+; GFX942-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
+; GFX942-NEXT:    v_mov_b32_e32 v1, v4
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[4:5]
+; GFX942-NEXT:    s_cbranch_execnz .LBB13_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[4:5]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fmax_noret_f32__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -1826,15 +1826,15 @@ define double @buffer_fat_ptr_agent_atomic_fmax_ret_f64__amdgpu_no_fine_grained_
 ; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s4
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_f64__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v2, s16
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 offen sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_f64__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v2, s16
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 offen sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_f64__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -1996,15 +1996,15 @@ define void @buffer_fat_ptr_agent_atomic_fmax_noret_f64__amdgpu_no_fine_grained_
 ; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s4
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fmax_noret_f64__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v2, s16
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 offen
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmax_noret_f64__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v2, s16
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 offen
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fmax_noret_f64__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
diff --git llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_fmin.ll llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_fmin.ll
index b52a39f1a55c..b8538cbf254f 100644
--- llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_fmin.ll
+++ llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_fmin.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 < %s | FileCheck -check-prefix=GFX12 %s
-; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx940 < %s | FileCheck -check-prefix=GFX940 %s
+; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx942 < %s | FileCheck -check-prefix=GFX942 %s
 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 < %s | FileCheck -check-prefix=GFX11 %s
 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s
 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx90a < %s | FileCheck -check-prefix=GFX90A %s
@@ -24,12 +24,12 @@ define float @local_atomic_fmin_ret_f32(ptr addrspace(3) %ptr, float %val) {
 ; GFX12-NEXT:    global_inv scope:SCOPE_SE
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: local_atomic_fmin_ret_f32:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    ds_min_rtn_f32 v0, v0, v1
-; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: local_atomic_fmin_ret_f32:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    ds_min_rtn_f32 v0, v0, v1
+; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: local_atomic_fmin_ret_f32:
 ; GFX11:       ; %bb.0:
@@ -96,12 +96,12 @@ define void @local_atomic_fmin_noret_f32(ptr addrspace(3) %ptr, float %val) {
 ; GFX12-NEXT:    global_inv scope:SCOPE_SE
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: local_atomic_fmin_noret_f32:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    ds_min_f32 v0, v1
-; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: local_atomic_fmin_noret_f32:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    ds_min_f32 v0, v1
+; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: local_atomic_fmin_noret_f32:
 ; GFX11:       ; %bb.0:
@@ -168,14 +168,14 @@ define double @local_atomic_fmin_ret_f64(ptr addrspace(3) %ptr, double %val) {
 ; GFX12-NEXT:    global_inv scope:SCOPE_SE
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: local_atomic_fmin_ret_f64:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v4, v1
-; GFX940-NEXT:    v_mov_b32_e32 v5, v2
-; GFX940-NEXT:    ds_min_rtn_f64 v[0:1], v0, v[4:5]
-; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: local_atomic_fmin_ret_f64:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v4, v1
+; GFX942-NEXT:    v_mov_b32_e32 v5, v2
+; GFX942-NEXT:    ds_min_rtn_f64 v[0:1], v0, v[4:5]
+; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: local_atomic_fmin_ret_f64:
 ; GFX11:       ; %bb.0:
@@ -244,14 +244,14 @@ define void @local_atomic_fmin_noret_f64(ptr addrspace(3) %ptr, double %val) {
 ; GFX12-NEXT:    global_inv scope:SCOPE_SE
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: local_atomic_fmin_noret_f64:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v4, v1
-; GFX940-NEXT:    v_mov_b32_e32 v5, v2
-; GFX940-NEXT:    ds_min_f64 v0, v[4:5]
-; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: local_atomic_fmin_noret_f64:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v4, v1
+; GFX942-NEXT:    v_mov_b32_e32 v5, v2
+; GFX942-NEXT:    ds_min_f64 v0, v[4:5]
+; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: local_atomic_fmin_noret_f64:
 ; GFX11:       ; %bb.0:
@@ -320,30 +320,30 @@ define float @global_agent_atomic_fmin_ret_f32__amdgpu_no_fine_grained_memory(pt
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: global_agent_atomic_fmin_ret_f32__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    global_load_dword v3, v[0:1], off
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    v_max_f32_e32 v2, v2, v2
-; GFX940-NEXT:  .LBB4_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v5, v3
-; GFX940-NEXT:    v_max_f32_e32 v3, v5, v5
-; GFX940-NEXT:    v_min_f32_e32 v4, v3, v2
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    global_atomic_cmpswap v3, v[0:1], v[4:5], off sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB4_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v0, v3
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: global_agent_atomic_fmin_ret_f32__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    global_load_dword v3, v[0:1], off
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    v_max_f32_e32 v2, v2, v2
+; GFX942-NEXT:  .LBB4_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v5, v3
+; GFX942-NEXT:    v_max_f32_e32 v3, v5, v5
+; GFX942-NEXT:    v_min_f32_e32 v4, v3, v2
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    global_atomic_cmpswap v3, v[0:1], v[4:5], off sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB4_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v0, v3
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: global_agent_atomic_fmin_ret_f32__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -466,29 +466,29 @@ define void @global_agent_atomic_fmin_noret_f32__amdgpu_no_fine_grained_memory(p
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: global_agent_atomic_fmin_noret_f32__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    global_load_dword v3, v[0:1], off
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    v_max_f32_e32 v4, v2, v2
-; GFX940-NEXT:  .LBB5_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_max_f32_e32 v2, v3, v3
-; GFX940-NEXT:    v_min_f32_e32 v2, v2, v4
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    global_atomic_cmpswap v2, v[0:1], v[2:3], off sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v3, v2
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB5_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: global_agent_atomic_fmin_noret_f32__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    global_load_dword v3, v[0:1], off
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    v_max_f32_e32 v4, v2, v2
+; GFX942-NEXT:  .LBB5_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_max_f32_e32 v2, v3, v3
+; GFX942-NEXT:    v_min_f32_e32 v2, v2, v4
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    global_atomic_cmpswap v2, v[0:1], v[2:3], off sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v3, v2
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB5_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: global_agent_atomic_fmin_noret_f32__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -626,14 +626,14 @@ define double @global_agent_atomic_fmin_ret_f64__amdgpu_no_fine_grained_memory(p
 ; GFX12-NEXT:    v_dual_mov_b32 v0, v4 :: v_dual_mov_b32 v1, v5
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: global_agent_atomic_fmin_ret_f64__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    global_atomic_min_f64 v[0:1], v[0:1], v[2:3], off sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: global_agent_atomic_fmin_ret_f64__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    global_atomic_min_f64 v[0:1], v[0:1], v[2:3], off sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: global_agent_atomic_fmin_ret_f64__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -781,14 +781,14 @@ define void @global_agent_atomic_fmin_noret_f64__amdgpu_no_fine_grained_memory(p
 ; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s0
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: global_agent_atomic_fmin_noret_f64__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    global_atomic_min_f64 v[0:1], v[2:3], off
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: global_agent_atomic_fmin_noret_f64__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    global_atomic_min_f64 v[0:1], v[2:3], off
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: global_agent_atomic_fmin_noret_f64__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -911,30 +911,30 @@ define float @flat_agent_atomic_fmin_ret_f32__amdgpu_no_fine_grained_memory(ptr
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fmin_ret_f32__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    flat_load_dword v3, v[0:1]
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    v_max_f32_e32 v2, v2, v2
-; GFX940-NEXT:  .LBB8_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v5, v3
-; GFX940-NEXT:    v_max_f32_e32 v3, v5, v5
-; GFX940-NEXT:    v_min_f32_e32 v4, v3, v2
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v3, v[0:1], v[4:5] sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB8_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v0, v3
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fmin_ret_f32__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    flat_load_dword v3, v[0:1]
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    v_max_f32_e32 v2, v2, v2
+; GFX942-NEXT:  .LBB8_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v5, v3
+; GFX942-NEXT:    v_max_f32_e32 v3, v5, v5
+; GFX942-NEXT:    v_min_f32_e32 v4, v3, v2
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v3, v[0:1], v[4:5] sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB8_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v0, v3
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fmin_ret_f32__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -1053,29 +1053,29 @@ define void @flat_agent_atomic_fmin_noret_f32__amdgpu_no_fine_grained_memory(ptr
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fmin_noret_f32__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    flat_load_dword v3, v[0:1]
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    v_max_f32_e32 v4, v2, v2
-; GFX940-NEXT:  .LBB9_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_max_f32_e32 v2, v3, v3
-; GFX940-NEXT:    v_min_f32_e32 v2, v2, v4
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v3, v2
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB9_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fmin_noret_f32__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    flat_load_dword v3, v[0:1]
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    v_max_f32_e32 v4, v2, v2
+; GFX942-NEXT:  .LBB9_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_max_f32_e32 v2, v3, v3
+; GFX942-NEXT:    v_min_f32_e32 v2, v2, v4
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v3, v2
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB9_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fmin_noret_f32__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -1212,14 +1212,14 @@ define double @flat_agent_atomic_fmin_ret_f64__amdgpu_no_fine_grained_memory(ptr
 ; GFX12-NEXT:    v_dual_mov_b32 v0, v4 :: v_dual_mov_b32 v1, v5
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fmin_ret_f64__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_min_f64 v[0:1], v[0:1], v[2:3] sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fmin_ret_f64__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_min_f64 v[0:1], v[0:1], v[2:3] sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fmin_ret_f64__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -1365,14 +1365,14 @@ define void @flat_agent_atomic_fmin_noret_f64__amdgpu_no_fine_grained_memory(ptr
 ; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s0
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fmin_noret_f64__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_min_f64 v[0:1], v[2:3]
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fmin_noret_f64__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_min_f64 v[0:1], v[2:3]
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fmin_noret_f64__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -1497,32 +1497,32 @@ define float @buffer_fat_ptr_agent_atomic_fmin_ret_f32__amdgpu_no_fine_grained_m
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_f32__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v2, s16
-; GFX940-NEXT:    v_mov_b32_e32 v1, v0
-; GFX940-NEXT:    buffer_load_dword v0, v2, s[0:3], 0 offen
-; GFX940-NEXT:    s_mov_b64 s[4:5], 0
-; GFX940-NEXT:    v_max_f32_e32 v3, v1, v1
-; GFX940-NEXT:  .LBB12_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v5, v0
-; GFX940-NEXT:    v_max_f32_e32 v0, v5, v5
-; GFX940-NEXT:    v_min_f32_e32 v4, v0, v3
-; GFX940-NEXT:    v_mov_b64_e32 v[0:1], v[4:5]
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    buffer_atomic_cmpswap v[0:1], v2, s[0:3], 0 offen sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v0, v5
-; GFX940-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[4:5]
-; GFX940-NEXT:    s_cbranch_execnz .LBB12_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[4:5]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_f32__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v2, s16
+; GFX942-NEXT:    v_mov_b32_e32 v1, v0
+; GFX942-NEXT:    buffer_load_dword v0, v2, s[0:3], 0 offen
+; GFX942-NEXT:    s_mov_b64 s[4:5], 0
+; GFX942-NEXT:    v_max_f32_e32 v3, v1, v1
+; GFX942-NEXT:  .LBB12_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v5, v0
+; GFX942-NEXT:    v_max_f32_e32 v0, v5, v5
+; GFX942-NEXT:    v_min_f32_e32 v4, v0, v3
+; GFX942-NEXT:    v_mov_b64_e32 v[0:1], v[4:5]
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    buffer_atomic_cmpswap v[0:1], v2, s[0:3], 0 offen sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v0, v5
+; GFX942-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[4:5]
+; GFX942-NEXT:    s_cbranch_execnz .LBB12_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[4:5]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_f32__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -1653,31 +1653,31 @@ define void @buffer_fat_ptr_agent_atomic_fmin_noret_f32__amdgpu_no_fine_grained_
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fmin_noret_f32__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v2, s16
-; GFX940-NEXT:    buffer_load_dword v1, v2, s[0:3], 0 offen
-; GFX940-NEXT:    s_mov_b64 s[4:5], 0
-; GFX940-NEXT:    v_max_f32_e32 v3, v0, v0
-; GFX940-NEXT:  .LBB13_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_max_f32_e32 v0, v1, v1
-; GFX940-NEXT:    v_min_f32_e32 v0, v0, v3
-; GFX940-NEXT:    v_mov_b64_e32 v[4:5], v[0:1]
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    buffer_atomic_cmpswap v[4:5], v2, s[0:3], 0 offen sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v4, v1
-; GFX940-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
-; GFX940-NEXT:    v_mov_b32_e32 v1, v4
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[4:5]
-; GFX940-NEXT:    s_cbranch_execnz .LBB13_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[4:5]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmin_noret_f32__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v2, s16
+; GFX942-NEXT:    buffer_load_dword v1, v2, s[0:3], 0 offen
+; GFX942-NEXT:    s_mov_b64 s[4:5], 0
+; GFX942-NEXT:    v_max_f32_e32 v3, v0, v0
+; GFX942-NEXT:  .LBB13_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_max_f32_e32 v0, v1, v1
+; GFX942-NEXT:    v_min_f32_e32 v0, v0, v3
+; GFX942-NEXT:    v_mov_b64_e32 v[4:5], v[0:1]
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    buffer_atomic_cmpswap v[4:5], v2, s[0:3], 0 offen sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v4, v1
+; GFX942-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
+; GFX942-NEXT:    v_mov_b32_e32 v1, v4
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[4:5]
+; GFX942-NEXT:    s_cbranch_execnz .LBB13_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[4:5]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fmin_noret_f32__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -1826,15 +1826,15 @@ define double @buffer_fat_ptr_agent_atomic_fmin_ret_f64__amdgpu_no_fine_grained_
 ; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s4
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_f64__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v2, s16
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 offen sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_f64__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v2, s16
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 offen sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_f64__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -1996,15 +1996,15 @@ define void @buffer_fat_ptr_agent_atomic_fmin_noret_f64__amdgpu_no_fine_grained_
 ; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s4
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fmin_noret_f64__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v2, s16
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 offen
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmin_noret_f64__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v2, s16
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 offen
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fmin_noret_f64__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
diff --git llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.f32-no-rtn.ll llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.f32-no-rtn.ll
index 0816eae28f61..714328a42d67 100644
--- llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.f32-no-rtn.ll
+++ llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.f32-no-rtn.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
 ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx908 -verify-machineinstrs -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX908_GFX11 %s
-; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX940 %s
-; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx940 -verify-machineinstrs -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX940 %s
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX942 %s
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx942 -verify-machineinstrs -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX942 %s
 ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX908_GFX11 %s
 ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX12 %s
 
@@ -20,19 +20,19 @@ define amdgpu_ps void @buffer_atomic_fadd_f32_offset_no_rtn(float %val, <4 x i32
   ; GFX908_GFX11-NEXT:   BUFFER_ATOMIC_ADD_F32_OFFSET [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
   ; GFX908_GFX11-NEXT:   S_ENDPGM 0
   ;
-  ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_f32_offset_no_rtn
-  ; GFX90A_GFX940: bb.1 (%ir-block.0):
-  ; GFX90A_GFX940-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0
-  ; GFX90A_GFX940-NEXT: {{  $}}
-  ; GFX90A_GFX940-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
-  ; GFX90A_GFX940-NEXT:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr4
-  ; GFX90A_GFX940-NEXT:   BUFFER_ATOMIC_ADD_F32_OFFSET [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
-  ; GFX90A_GFX940-NEXT:   S_ENDPGM 0
+  ; GFX90A_GFX942-LABEL: name: buffer_atomic_fadd_f32_offset_no_rtn
+  ; GFX90A_GFX942: bb.1 (%ir-block.0):
+  ; GFX90A_GFX942-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0
+  ; GFX90A_GFX942-NEXT: {{  $}}
+  ; GFX90A_GFX942-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
+  ; GFX90A_GFX942-NEXT:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr4
+  ; GFX90A_GFX942-NEXT:   BUFFER_ATOMIC_ADD_F32_OFFSET [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
+  ; GFX90A_GFX942-NEXT:   S_ENDPGM 0
   ;
   ; GFX12-LABEL: name: buffer_atomic_fadd_f32_offset_no_rtn
   ; GFX12: bb.1 (%ir-block.0):
@@ -67,20 +67,20 @@ define amdgpu_ps void @buffer_atomic_fadd_f32_offen_no_rtn(float %val, <4 x i32>
   ; GFX908_GFX11-NEXT:   BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
   ; GFX908_GFX11-NEXT:   S_ENDPGM 0
   ;
-  ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_f32_offen_no_rtn
-  ; GFX90A_GFX940: bb.1 (%ir-block.0):
-  ; GFX90A_GFX940-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1
-  ; GFX90A_GFX940-NEXT: {{  $}}
-  ; GFX90A_GFX940-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
-  ; GFX90A_GFX940-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4
-  ; GFX90A_GFX940-NEXT:   BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
-  ; GFX90A_GFX940-NEXT:   S_ENDPGM 0
+  ; GFX90A_GFX942-LABEL: name: buffer_atomic_fadd_f32_offen_no_rtn
+  ; GFX90A_GFX942: bb.1 (%ir-block.0):
+  ; GFX90A_GFX942-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1
+  ; GFX90A_GFX942-NEXT: {{  $}}
+  ; GFX90A_GFX942-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
+  ; GFX90A_GFX942-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4
+  ; GFX90A_GFX942-NEXT:   BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
+  ; GFX90A_GFX942-NEXT:   S_ENDPGM 0
   ;
   ; GFX12-LABEL: name: buffer_atomic_fadd_f32_offen_no_rtn
   ; GFX12: bb.1 (%ir-block.0):
@@ -116,20 +116,20 @@ define amdgpu_ps void @buffer_atomic_fadd_f32_idxen_no_rtn(float %val, <4 x i32>
   ; GFX908_GFX11-NEXT:   BUFFER_ATOMIC_ADD_F32_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
   ; GFX908_GFX11-NEXT:   S_ENDPGM 0
   ;
-  ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_f32_idxen_no_rtn
-  ; GFX90A_GFX940: bb.1 (%ir-block.0):
-  ; GFX90A_GFX940-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1
-  ; GFX90A_GFX940-NEXT: {{  $}}
-  ; GFX90A_GFX940-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
-  ; GFX90A_GFX940-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4
-  ; GFX90A_GFX940-NEXT:   BUFFER_ATOMIC_ADD_F32_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
-  ; GFX90A_GFX940-NEXT:   S_ENDPGM 0
+  ; GFX90A_GFX942-LABEL: name: buffer_atomic_fadd_f32_idxen_no_rtn
+  ; GFX90A_GFX942: bb.1 (%ir-block.0):
+  ; GFX90A_GFX942-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1
+  ; GFX90A_GFX942-NEXT: {{  $}}
+  ; GFX90A_GFX942-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
+  ; GFX90A_GFX942-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4
+  ; GFX90A_GFX942-NEXT:   BUFFER_ATOMIC_ADD_F32_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
+  ; GFX90A_GFX942-NEXT:   S_ENDPGM 0
   ;
   ; GFX12-LABEL: name: buffer_atomic_fadd_f32_idxen_no_rtn
   ; GFX12: bb.1 (%ir-block.0):
@@ -167,22 +167,22 @@ define amdgpu_ps void @buffer_atomic_fadd_f32_bothen_no_rtn(float %val, <4 x i32
   ; GFX908_GFX11-NEXT:   BUFFER_ATOMIC_ADD_F32_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
   ; GFX908_GFX11-NEXT:   S_ENDPGM 0
   ;
-  ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_f32_bothen_no_rtn
-  ; GFX90A_GFX940: bb.1 (%ir-block.0):
-  ; GFX90A_GFX940-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2
-  ; GFX90A_GFX940-NEXT: {{  $}}
-  ; GFX90A_GFX940-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
-  ; GFX90A_GFX940-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr4
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1
-  ; GFX90A_GFX940-NEXT:   BUFFER_ATOMIC_ADD_F32_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
-  ; GFX90A_GFX940-NEXT:   S_ENDPGM 0
+  ; GFX90A_GFX942-LABEL: name: buffer_atomic_fadd_f32_bothen_no_rtn
+  ; GFX90A_GFX942: bb.1 (%ir-block.0):
+  ; GFX90A_GFX942-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2
+  ; GFX90A_GFX942-NEXT: {{  $}}
+  ; GFX90A_GFX942-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
+  ; GFX90A_GFX942-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr4
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1
+  ; GFX90A_GFX942-NEXT:   BUFFER_ATOMIC_ADD_F32_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
+  ; GFX90A_GFX942-NEXT:   S_ENDPGM 0
   ;
   ; GFX12-LABEL: name: buffer_atomic_fadd_f32_bothen_no_rtn
   ; GFX12: bb.1 (%ir-block.0):
@@ -219,19 +219,19 @@ define amdgpu_ps void @buffer_ptr_atomic_fadd_f32_offset_no_rtn(float %val, ptr
   ; GFX908_GFX11-NEXT:   BUFFER_ATOMIC_ADD_F32_OFFSET [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
   ; GFX908_GFX11-NEXT:   S_ENDPGM 0
   ;
-  ; GFX90A_GFX940-LABEL: name: buffer_ptr_atomic_fadd_f32_offset_no_rtn
-  ; GFX90A_GFX940: bb.1 (%ir-block.0):
-  ; GFX90A_GFX940-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0
-  ; GFX90A_GFX940-NEXT: {{  $}}
-  ; GFX90A_GFX940-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3
-  ; GFX90A_GFX940-NEXT:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr4
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
-  ; GFX90A_GFX940-NEXT:   BUFFER_ATOMIC_ADD_F32_OFFSET [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
-  ; GFX90A_GFX940-NEXT:   S_ENDPGM 0
+  ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_f32_offset_no_rtn
+  ; GFX90A_GFX942: bb.1 (%ir-block.0):
+  ; GFX90A_GFX942-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0
+  ; GFX90A_GFX942-NEXT: {{  $}}
+  ; GFX90A_GFX942-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3
+  ; GFX90A_GFX942-NEXT:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr4
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
+  ; GFX90A_GFX942-NEXT:   BUFFER_ATOMIC_ADD_F32_OFFSET [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
+  ; GFX90A_GFX942-NEXT:   S_ENDPGM 0
   ;
   ; GFX12-LABEL: name: buffer_ptr_atomic_fadd_f32_offset_no_rtn
   ; GFX12: bb.1 (%ir-block.0):
@@ -266,20 +266,20 @@ define amdgpu_ps void @buffer_ptr_atomic_fadd_f32_offen_no_rtn(float %val, ptr a
   ; GFX908_GFX11-NEXT:   BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
   ; GFX908_GFX11-NEXT:   S_ENDPGM 0
   ;
-  ; GFX90A_GFX940-LABEL: name: buffer_ptr_atomic_fadd_f32_offen_no_rtn
-  ; GFX90A_GFX940: bb.1 (%ir-block.0):
-  ; GFX90A_GFX940-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1
-  ; GFX90A_GFX940-NEXT: {{  $}}
-  ; GFX90A_GFX940-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3
-  ; GFX90A_GFX940-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
-  ; GFX90A_GFX940-NEXT:   BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
-  ; GFX90A_GFX940-NEXT:   S_ENDPGM 0
+  ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_f32_offen_no_rtn
+  ; GFX90A_GFX942: bb.1 (%ir-block.0):
+  ; GFX90A_GFX942-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1
+  ; GFX90A_GFX942-NEXT: {{  $}}
+  ; GFX90A_GFX942-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3
+  ; GFX90A_GFX942-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
+  ; GFX90A_GFX942-NEXT:   BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
+  ; GFX90A_GFX942-NEXT:   S_ENDPGM 0
   ;
   ; GFX12-LABEL: name: buffer_ptr_atomic_fadd_f32_offen_no_rtn
   ; GFX12: bb.1 (%ir-block.0):
@@ -315,20 +315,20 @@ define amdgpu_ps void @buffer_ptr_atomic_fadd_f32_idxen_no_rtn(float %val, ptr a
   ; GFX908_GFX11-NEXT:   BUFFER_ATOMIC_ADD_F32_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
   ; GFX908_GFX11-NEXT:   S_ENDPGM 0
   ;
-  ; GFX90A_GFX940-LABEL: name: buffer_ptr_atomic_fadd_f32_idxen_no_rtn
-  ; GFX90A_GFX940: bb.1 (%ir-block.0):
-  ; GFX90A_GFX940-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1
-  ; GFX90A_GFX940-NEXT: {{  $}}
-  ; GFX90A_GFX940-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3
-  ; GFX90A_GFX940-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
-  ; GFX90A_GFX940-NEXT:   BUFFER_ATOMIC_ADD_F32_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
-  ; GFX90A_GFX940-NEXT:   S_ENDPGM 0
+  ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_f32_idxen_no_rtn
+  ; GFX90A_GFX942: bb.1 (%ir-block.0):
+  ; GFX90A_GFX942-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1
+  ; GFX90A_GFX942-NEXT: {{  $}}
+  ; GFX90A_GFX942-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3
+  ; GFX90A_GFX942-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
+  ; GFX90A_GFX942-NEXT:   BUFFER_ATOMIC_ADD_F32_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
+  ; GFX90A_GFX942-NEXT:   S_ENDPGM 0
   ;
   ; GFX12-LABEL: name: buffer_ptr_atomic_fadd_f32_idxen_no_rtn
   ; GFX12: bb.1 (%ir-block.0):
@@ -366,22 +366,22 @@ define amdgpu_ps void @buffer_ptr_atomic_fadd_f32_bothen_no_rtn(float %val, ptr
   ; GFX908_GFX11-NEXT:   BUFFER_ATOMIC_ADD_F32_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
   ; GFX908_GFX11-NEXT:   S_ENDPGM 0
   ;
-  ; GFX90A_GFX940-LABEL: name: buffer_ptr_atomic_fadd_f32_bothen_no_rtn
-  ; GFX90A_GFX940: bb.1 (%ir-block.0):
-  ; GFX90A_GFX940-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2
-  ; GFX90A_GFX940-NEXT: {{  $}}
-  ; GFX90A_GFX940-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3
-  ; GFX90A_GFX940-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr4
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1
-  ; GFX90A_GFX940-NEXT:   BUFFER_ATOMIC_ADD_F32_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
-  ; GFX90A_GFX940-NEXT:   S_ENDPGM 0
+  ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_f32_bothen_no_rtn
+  ; GFX90A_GFX942: bb.1 (%ir-block.0):
+  ; GFX90A_GFX942-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2
+  ; GFX90A_GFX942-NEXT: {{  $}}
+  ; GFX90A_GFX942-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3
+  ; GFX90A_GFX942-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr4
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1
+  ; GFX90A_GFX942-NEXT:   BUFFER_ATOMIC_ADD_F32_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
+  ; GFX90A_GFX942-NEXT:   S_ENDPGM 0
   ;
   ; GFX12-LABEL: name: buffer_ptr_atomic_fadd_f32_bothen_no_rtn
   ; GFX12: bb.1 (%ir-block.0):
diff --git llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.f32-rtn.ll llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.f32-rtn.ll
index c0b84c914ce5..fb95d99e9f65 100644
--- llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.f32-rtn.ll
+++ llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.f32-rtn.ll
@@ -1,24 +1,24 @@
 ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX940 %s
-; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx940 -verify-machineinstrs -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX940 %s
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX942 %s
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx942 -verify-machineinstrs -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX942 %s
 ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX11 %s
 ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX12 %s
 
 define amdgpu_ps float @buffer_atomic_fadd_f32_offset_rtn(float %val, <4 x i32> inreg %rsrc, i32 inreg %soffset) {
-  ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_f32_offset_rtn
-  ; GFX90A_GFX940: bb.1 (%ir-block.0):
-  ; GFX90A_GFX940-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0
-  ; GFX90A_GFX940-NEXT: {{  $}}
-  ; GFX90A_GFX940-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
-  ; GFX90A_GFX940-NEXT:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr4
-  ; GFX90A_GFX940-NEXT:   [[BUFFER_ATOMIC_ADD_F32_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFSET_RTN [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
-  ; GFX90A_GFX940-NEXT:   $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_OFFSET_RTN]]
-  ; GFX90A_GFX940-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
+  ; GFX90A_GFX942-LABEL: name: buffer_atomic_fadd_f32_offset_rtn
+  ; GFX90A_GFX942: bb.1 (%ir-block.0):
+  ; GFX90A_GFX942-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0
+  ; GFX90A_GFX942-NEXT: {{  $}}
+  ; GFX90A_GFX942-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
+  ; GFX90A_GFX942-NEXT:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr4
+  ; GFX90A_GFX942-NEXT:   [[BUFFER_ATOMIC_ADD_F32_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFSET_RTN [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
+  ; GFX90A_GFX942-NEXT:   $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_OFFSET_RTN]]
+  ; GFX90A_GFX942-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
   ;
   ; GFX11-LABEL: name: buffer_atomic_fadd_f32_offset_rtn
   ; GFX11: bb.1 (%ir-block.0):
@@ -54,21 +54,21 @@ define amdgpu_ps float @buffer_atomic_fadd_f32_offset_rtn(float %val, <4 x i32>
 }
 
 define amdgpu_ps float @buffer_atomic_fadd_f32_offen_rtn(float %val, <4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) {
-  ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_f32_offen_rtn
-  ; GFX90A_GFX940: bb.1 (%ir-block.0):
-  ; GFX90A_GFX940-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1
-  ; GFX90A_GFX940-NEXT: {{  $}}
-  ; GFX90A_GFX940-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
-  ; GFX90A_GFX940-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4
-  ; GFX90A_GFX940-NEXT:   [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
-  ; GFX90A_GFX940-NEXT:   $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN]]
-  ; GFX90A_GFX940-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
+  ; GFX90A_GFX942-LABEL: name: buffer_atomic_fadd_f32_offen_rtn
+  ; GFX90A_GFX942: bb.1 (%ir-block.0):
+  ; GFX90A_GFX942-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1
+  ; GFX90A_GFX942-NEXT: {{  $}}
+  ; GFX90A_GFX942-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
+  ; GFX90A_GFX942-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4
+  ; GFX90A_GFX942-NEXT:   [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
+  ; GFX90A_GFX942-NEXT:   $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN]]
+  ; GFX90A_GFX942-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
   ;
   ; GFX11-LABEL: name: buffer_atomic_fadd_f32_offen_rtn
   ; GFX11: bb.1 (%ir-block.0):
@@ -106,21 +106,21 @@ define amdgpu_ps float @buffer_atomic_fadd_f32_offen_rtn(float %val, <4 x i32> i
 }
 
 define amdgpu_ps float @buffer_atomic_fadd_f32_idxen_rtn(float %val, <4 x i32> inreg %rsrc, i32 %vindex, i32 inreg %soffset) {
-  ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_f32_idxen_rtn
-  ; GFX90A_GFX940: bb.1 (%ir-block.0):
-  ; GFX90A_GFX940-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1
-  ; GFX90A_GFX940-NEXT: {{  $}}
-  ; GFX90A_GFX940-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
-  ; GFX90A_GFX940-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4
-  ; GFX90A_GFX940-NEXT:   [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_IDXEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
-  ; GFX90A_GFX940-NEXT:   $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN]]
-  ; GFX90A_GFX940-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
+  ; GFX90A_GFX942-LABEL: name: buffer_atomic_fadd_f32_idxen_rtn
+  ; GFX90A_GFX942: bb.1 (%ir-block.0):
+  ; GFX90A_GFX942-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1
+  ; GFX90A_GFX942-NEXT: {{  $}}
+  ; GFX90A_GFX942-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
+  ; GFX90A_GFX942-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4
+  ; GFX90A_GFX942-NEXT:   [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_IDXEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
+  ; GFX90A_GFX942-NEXT:   $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN]]
+  ; GFX90A_GFX942-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
   ;
   ; GFX11-LABEL: name: buffer_atomic_fadd_f32_idxen_rtn
   ; GFX11: bb.1 (%ir-block.0):
@@ -158,23 +158,23 @@ define amdgpu_ps float @buffer_atomic_fadd_f32_idxen_rtn(float %val, <4 x i32> i
 }
 
 define amdgpu_ps float @buffer_atomic_fadd_f32_bothen_rtn(float %val, <4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) {
-  ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_f32_bothen_rtn
-  ; GFX90A_GFX940: bb.1 (%ir-block.0):
-  ; GFX90A_GFX940-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2
-  ; GFX90A_GFX940-NEXT: {{  $}}
-  ; GFX90A_GFX940-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
-  ; GFX90A_GFX940-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr4
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1
-  ; GFX90A_GFX940-NEXT:   [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
-  ; GFX90A_GFX940-NEXT:   $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN]]
-  ; GFX90A_GFX940-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
+  ; GFX90A_GFX942-LABEL: name: buffer_atomic_fadd_f32_bothen_rtn
+  ; GFX90A_GFX942: bb.1 (%ir-block.0):
+  ; GFX90A_GFX942-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2
+  ; GFX90A_GFX942-NEXT: {{  $}}
+  ; GFX90A_GFX942-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
+  ; GFX90A_GFX942-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr4
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1
+  ; GFX90A_GFX942-NEXT:   [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
+  ; GFX90A_GFX942-NEXT:   $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN]]
+  ; GFX90A_GFX942-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
   ;
   ; GFX11-LABEL: name: buffer_atomic_fadd_f32_bothen_rtn
   ; GFX11: bb.1 (%ir-block.0):
@@ -216,20 +216,20 @@ define amdgpu_ps float @buffer_atomic_fadd_f32_bothen_rtn(float %val, <4 x i32>
 }
 
 define amdgpu_ps float @buffer_ptr_atomic_fadd_f32_offset_rtn(float %val, ptr addrspace(8) inreg %rsrc, i32 inreg %soffset) {
-  ; GFX90A_GFX940-LABEL: name: buffer_ptr_atomic_fadd_f32_offset_rtn
-  ; GFX90A_GFX940: bb.1 (%ir-block.0):
-  ; GFX90A_GFX940-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0
-  ; GFX90A_GFX940-NEXT: {{  $}}
-  ; GFX90A_GFX940-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3
-  ; GFX90A_GFX940-NEXT:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr4
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
-  ; GFX90A_GFX940-NEXT:   [[BUFFER_ATOMIC_ADD_F32_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFSET_RTN [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
-  ; GFX90A_GFX940-NEXT:   $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_OFFSET_RTN]]
-  ; GFX90A_GFX940-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
+  ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_f32_offset_rtn
+  ; GFX90A_GFX942: bb.1 (%ir-block.0):
+  ; GFX90A_GFX942-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0
+  ; GFX90A_GFX942-NEXT: {{  $}}
+  ; GFX90A_GFX942-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3
+  ; GFX90A_GFX942-NEXT:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr4
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
+  ; GFX90A_GFX942-NEXT:   [[BUFFER_ATOMIC_ADD_F32_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFSET_RTN [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
+  ; GFX90A_GFX942-NEXT:   $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_OFFSET_RTN]]
+  ; GFX90A_GFX942-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
   ;
   ; GFX11-LABEL: name: buffer_ptr_atomic_fadd_f32_offset_rtn
   ; GFX11: bb.1 (%ir-block.0):
@@ -265,21 +265,21 @@ define amdgpu_ps float @buffer_ptr_atomic_fadd_f32_offset_rtn(float %val, ptr ad
 }
 
 define amdgpu_ps float @buffer_ptr_atomic_fadd_f32_offen_rtn(float %val, ptr addrspace(8) inreg %rsrc, i32 %voffset, i32 inreg %soffset) {
-  ; GFX90A_GFX940-LABEL: name: buffer_ptr_atomic_fadd_f32_offen_rtn
-  ; GFX90A_GFX940: bb.1 (%ir-block.0):
-  ; GFX90A_GFX940-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1
-  ; GFX90A_GFX940-NEXT: {{  $}}
-  ; GFX90A_GFX940-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3
-  ; GFX90A_GFX940-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
-  ; GFX90A_GFX940-NEXT:   [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
-  ; GFX90A_GFX940-NEXT:   $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN]]
-  ; GFX90A_GFX940-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
+  ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_f32_offen_rtn
+  ; GFX90A_GFX942: bb.1 (%ir-block.0):
+  ; GFX90A_GFX942-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1
+  ; GFX90A_GFX942-NEXT: {{  $}}
+  ; GFX90A_GFX942-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3
+  ; GFX90A_GFX942-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
+  ; GFX90A_GFX942-NEXT:   [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
+  ; GFX90A_GFX942-NEXT:   $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN]]
+  ; GFX90A_GFX942-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
   ;
   ; GFX11-LABEL: name: buffer_ptr_atomic_fadd_f32_offen_rtn
   ; GFX11: bb.1 (%ir-block.0):
@@ -317,21 +317,21 @@ define amdgpu_ps float @buffer_ptr_atomic_fadd_f32_offen_rtn(float %val, ptr add
 }
 
 define amdgpu_ps float @buffer_ptr_atomic_fadd_f32_idxen_rtn(float %val, ptr addrspace(8) inreg %rsrc, i32 %vindex, i32 inreg %soffset) {
-  ; GFX90A_GFX940-LABEL: name: buffer_ptr_atomic_fadd_f32_idxen_rtn
-  ; GFX90A_GFX940: bb.1 (%ir-block.0):
-  ; GFX90A_GFX940-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1
-  ; GFX90A_GFX940-NEXT: {{  $}}
-  ; GFX90A_GFX940-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3
-  ; GFX90A_GFX940-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
-  ; GFX90A_GFX940-NEXT:   [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_IDXEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
-  ; GFX90A_GFX940-NEXT:   $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN]]
-  ; GFX90A_GFX940-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
+  ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_f32_idxen_rtn
+  ; GFX90A_GFX942: bb.1 (%ir-block.0):
+  ; GFX90A_GFX942-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1
+  ; GFX90A_GFX942-NEXT: {{  $}}
+  ; GFX90A_GFX942-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3
+  ; GFX90A_GFX942-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
+  ; GFX90A_GFX942-NEXT:   [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_IDXEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
+  ; GFX90A_GFX942-NEXT:   $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN]]
+  ; GFX90A_GFX942-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
   ;
   ; GFX11-LABEL: name: buffer_ptr_atomic_fadd_f32_idxen_rtn
   ; GFX11: bb.1 (%ir-block.0):
@@ -369,23 +369,23 @@ define amdgpu_ps float @buffer_ptr_atomic_fadd_f32_idxen_rtn(float %val, ptr add
 }
 
 define amdgpu_ps float @buffer_ptr_atomic_fadd_f32_bothen_rtn(float %val, ptr addrspace(8) inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) {
-  ; GFX90A_GFX940-LABEL: name: buffer_ptr_atomic_fadd_f32_bothen_rtn
-  ; GFX90A_GFX940: bb.1 (%ir-block.0):
-  ; GFX90A_GFX940-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2
-  ; GFX90A_GFX940-NEXT: {{  $}}
-  ; GFX90A_GFX940-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3
-  ; GFX90A_GFX940-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr4
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1
-  ; GFX90A_GFX940-NEXT:   [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
-  ; GFX90A_GFX940-NEXT:   $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN]]
-  ; GFX90A_GFX940-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
+  ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_f32_bothen_rtn
+  ; GFX90A_GFX942: bb.1 (%ir-block.0):
+  ; GFX90A_GFX942-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2
+  ; GFX90A_GFX942-NEXT: {{  $}}
+  ; GFX90A_GFX942-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3
+  ; GFX90A_GFX942-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr4
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1
+  ; GFX90A_GFX942-NEXT:   [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
+  ; GFX90A_GFX942-NEXT:   $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN]]
+  ; GFX90A_GFX942-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
   ;
   ; GFX11-LABEL: name: buffer_ptr_atomic_fadd_f32_bothen_rtn
   ; GFX11: bb.1 (%ir-block.0):
diff --git llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.f64.ll llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.f64.ll
index 7c4069b4b313..f71f573e5a79 100644
--- llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.f64.ll
+++ llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.f64.ll
@@ -1,391 +1,391 @@
 ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX940 %s
-; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx940 -verify-machineinstrs -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX940 %s
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX942 %s
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx942 -verify-machineinstrs -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX942 %s
 
 define amdgpu_ps void @buffer_atomic_fadd_f64_offset_no_rtn(double %val, <4 x i32> inreg %rsrc, i32 inreg %soffset) {
-  ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_f64_offset_no_rtn
-  ; GFX90A_GFX940: bb.1 (%ir-block.0):
-  ; GFX90A_GFX940-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1
-  ; GFX90A_GFX940-NEXT: {{  $}}
-  ; GFX90A_GFX940-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3
-  ; GFX90A_GFX940-NEXT:   [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4
-  ; GFX90A_GFX940-NEXT:   BUFFER_ATOMIC_ADD_F64_OFFSET [[REG_SEQUENCE]], [[REG_SEQUENCE1]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 8)
-  ; GFX90A_GFX940-NEXT:   S_ENDPGM 0
+  ; GFX90A_GFX942-LABEL: name: buffer_atomic_fadd_f64_offset_no_rtn
+  ; GFX90A_GFX942: bb.1 (%ir-block.0):
+  ; GFX90A_GFX942-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1
+  ; GFX90A_GFX942-NEXT: {{  $}}
+  ; GFX90A_GFX942-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3
+  ; GFX90A_GFX942-NEXT:   [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4
+  ; GFX90A_GFX942-NEXT:   BUFFER_ATOMIC_ADD_F64_OFFSET [[REG_SEQUENCE]], [[REG_SEQUENCE1]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 8)
+  ; GFX90A_GFX942-NEXT:   S_ENDPGM 0
   %ret = call double @llvm.amdgcn.raw.buffer.atomic.fadd.f64(double %val, <4 x i32> %rsrc, i32 0, i32 %soffset, i32 0)
   ret void
 }
 
 define amdgpu_ps void @buffer_atomic_fadd_f64_offen_no_rtn(double %val, <4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) {
-  ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_f64_offen_no_rtn
-  ; GFX90A_GFX940: bb.1 (%ir-block.0):
-  ; GFX90A_GFX940-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2
-  ; GFX90A_GFX940-NEXT: {{  $}}
-  ; GFX90A_GFX940-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3
-  ; GFX90A_GFX940-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr4
-  ; GFX90A_GFX940-NEXT:   BUFFER_ATOMIC_ADD_F64_OFFEN [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 8)
-  ; GFX90A_GFX940-NEXT:   S_ENDPGM 0
+  ; GFX90A_GFX942-LABEL: name: buffer_atomic_fadd_f64_offen_no_rtn
+  ; GFX90A_GFX942: bb.1 (%ir-block.0):
+  ; GFX90A_GFX942-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2
+  ; GFX90A_GFX942-NEXT: {{  $}}
+  ; GFX90A_GFX942-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3
+  ; GFX90A_GFX942-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr4
+  ; GFX90A_GFX942-NEXT:   BUFFER_ATOMIC_ADD_F64_OFFEN [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 8)
+  ; GFX90A_GFX942-NEXT:   S_ENDPGM 0
   %ret = call double @llvm.amdgcn.raw.buffer.atomic.fadd.f64(double %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
   ret void
 }
 
 define amdgpu_ps void @buffer_atomic_fadd_f64_idxen_no_rtn(double %val, <4 x i32> inreg %rsrc, i32 %vindex, i32 inreg %soffset) {
-  ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_f64_idxen_no_rtn
-  ; GFX90A_GFX940: bb.1 (%ir-block.0):
-  ; GFX90A_GFX940-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2
-  ; GFX90A_GFX940-NEXT: {{  $}}
-  ; GFX90A_GFX940-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3
-  ; GFX90A_GFX940-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr4
-  ; GFX90A_GFX940-NEXT:   BUFFER_ATOMIC_ADD_F64_IDXEN [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 8)
-  ; GFX90A_GFX940-NEXT:   S_ENDPGM 0
+  ; GFX90A_GFX942-LABEL: name: buffer_atomic_fadd_f64_idxen_no_rtn
+  ; GFX90A_GFX942: bb.1 (%ir-block.0):
+  ; GFX90A_GFX942-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2
+  ; GFX90A_GFX942-NEXT: {{  $}}
+  ; GFX90A_GFX942-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3
+  ; GFX90A_GFX942-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr4
+  ; GFX90A_GFX942-NEXT:   BUFFER_ATOMIC_ADD_F64_IDXEN [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 8)
+  ; GFX90A_GFX942-NEXT:   S_ENDPGM 0
   %ret = call double @llvm.amdgcn.struct.buffer.atomic.fadd.f64(double %val, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0)
   ret void
 }
 
 define amdgpu_ps void @buffer_atomic_fadd_f64_bothen_no_rtn(double %val, <4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) {
-  ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_f64_bothen_no_rtn
-  ; GFX90A_GFX940: bb.1 (%ir-block.0):
-  ; GFX90A_GFX940-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2, $vgpr3
-  ; GFX90A_GFX940-NEXT: {{  $}}
-  ; GFX90A_GFX940-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3
-  ; GFX90A_GFX940-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
-  ; GFX90A_GFX940-NEXT:   [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr4
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1
-  ; GFX90A_GFX940-NEXT:   BUFFER_ATOMIC_ADD_F64_BOTHEN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY8]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 8)
-  ; GFX90A_GFX940-NEXT:   S_ENDPGM 0
+  ; GFX90A_GFX942-LABEL: name: buffer_atomic_fadd_f64_bothen_no_rtn
+  ; GFX90A_GFX942: bb.1 (%ir-block.0):
+  ; GFX90A_GFX942-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2, $vgpr3
+  ; GFX90A_GFX942-NEXT: {{  $}}
+  ; GFX90A_GFX942-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3
+  ; GFX90A_GFX942-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+  ; GFX90A_GFX942-NEXT:   [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr4
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1
+  ; GFX90A_GFX942-NEXT:   BUFFER_ATOMIC_ADD_F64_BOTHEN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY8]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 8)
+  ; GFX90A_GFX942-NEXT:   S_ENDPGM 0
   %ret = call double @llvm.amdgcn.struct.buffer.atomic.fadd.f64(double %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 2)
   ret void
 }
 
 define amdgpu_ps double @buffer_atomic_fadd_f64_offset_rtn(double %val, <4 x i32> inreg %rsrc, i32 inreg %soffset) {
-  ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_f64_offset_rtn
-  ; GFX90A_GFX940: bb.1 (%ir-block.0):
-  ; GFX90A_GFX940-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1
-  ; GFX90A_GFX940-NEXT: {{  $}}
-  ; GFX90A_GFX940-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3
-  ; GFX90A_GFX940-NEXT:   [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4
-  ; GFX90A_GFX940-NEXT:   [[BUFFER_ATOMIC_ADD_F64_OFFSET_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_OFFSET_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE1]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 8)
-  ; GFX90A_GFX940-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFSET_RTN]].sub0
-  ; GFX90A_GFX940-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFSET_RTN]].sub1
-  ; GFX90A_GFX940-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec
-  ; GFX90A_GFX940-NEXT:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
-  ; GFX90A_GFX940-NEXT:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec
-  ; GFX90A_GFX940-NEXT:   $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
-  ; GFX90A_GFX940-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1
+  ; GFX90A_GFX942-LABEL: name: buffer_atomic_fadd_f64_offset_rtn
+  ; GFX90A_GFX942: bb.1 (%ir-block.0):
+  ; GFX90A_GFX942-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1
+  ; GFX90A_GFX942-NEXT: {{  $}}
+  ; GFX90A_GFX942-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3
+  ; GFX90A_GFX942-NEXT:   [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4
+  ; GFX90A_GFX942-NEXT:   [[BUFFER_ATOMIC_ADD_F64_OFFSET_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_OFFSET_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE1]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 8)
+  ; GFX90A_GFX942-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFSET_RTN]].sub0
+  ; GFX90A_GFX942-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFSET_RTN]].sub1
+  ; GFX90A_GFX942-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec
+  ; GFX90A_GFX942-NEXT:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+  ; GFX90A_GFX942-NEXT:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec
+  ; GFX90A_GFX942-NEXT:   $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
+  ; GFX90A_GFX942-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1
   %ret = call double @llvm.amdgcn.raw.buffer.atomic.fadd.f64(double %val, <4 x i32> %rsrc, i32 0, i32 %soffset, i32 0)
   ret double %ret
 }
 
 define amdgpu_ps double @buffer_atomic_fadd_f64_offen_rtn(double %val, <4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) {
-  ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_f64_offen_rtn
-  ; GFX90A_GFX940: bb.1 (%ir-block.0):
-  ; GFX90A_GFX940-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2
-  ; GFX90A_GFX940-NEXT: {{  $}}
-  ; GFX90A_GFX940-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3
-  ; GFX90A_GFX940-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr4
-  ; GFX90A_GFX940-NEXT:   [[BUFFER_ATOMIC_ADD_F64_OFFEN_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_OFFEN_RTN [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 8)
-  ; GFX90A_GFX940-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFEN_RTN]].sub0
-  ; GFX90A_GFX940-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFEN_RTN]].sub1
-  ; GFX90A_GFX940-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec
-  ; GFX90A_GFX940-NEXT:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
-  ; GFX90A_GFX940-NEXT:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec
-  ; GFX90A_GFX940-NEXT:   $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
-  ; GFX90A_GFX940-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1
+  ; GFX90A_GFX942-LABEL: name: buffer_atomic_fadd_f64_offen_rtn
+  ; GFX90A_GFX942: bb.1 (%ir-block.0):
+  ; GFX90A_GFX942-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2
+  ; GFX90A_GFX942-NEXT: {{  $}}
+  ; GFX90A_GFX942-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3
+  ; GFX90A_GFX942-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr4
+  ; GFX90A_GFX942-NEXT:   [[BUFFER_ATOMIC_ADD_F64_OFFEN_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_OFFEN_RTN [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 8)
+  ; GFX90A_GFX942-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFEN_RTN]].sub0
+  ; GFX90A_GFX942-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFEN_RTN]].sub1
+  ; GFX90A_GFX942-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec
+  ; GFX90A_GFX942-NEXT:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+  ; GFX90A_GFX942-NEXT:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec
+  ; GFX90A_GFX942-NEXT:   $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
+  ; GFX90A_GFX942-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1
   %ret = call double @llvm.amdgcn.raw.buffer.atomic.fadd.f64(double %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
   ret double %ret
 }
 
 define amdgpu_ps double @buffer_atomic_fadd_f64_idxen_rtn(double %val, <4 x i32> inreg %rsrc, i32 %vindex, i32 inreg %soffset) {
-  ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_f64_idxen_rtn
-  ; GFX90A_GFX940: bb.1 (%ir-block.0):
-  ; GFX90A_GFX940-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2
-  ; GFX90A_GFX940-NEXT: {{  $}}
-  ; GFX90A_GFX940-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3
-  ; GFX90A_GFX940-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr4
-  ; GFX90A_GFX940-NEXT:   [[BUFFER_ATOMIC_ADD_F64_IDXEN_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_IDXEN_RTN [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 8)
-  ; GFX90A_GFX940-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_IDXEN_RTN]].sub0
-  ; GFX90A_GFX940-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_IDXEN_RTN]].sub1
-  ; GFX90A_GFX940-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec
-  ; GFX90A_GFX940-NEXT:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
-  ; GFX90A_GFX940-NEXT:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec
-  ; GFX90A_GFX940-NEXT:   $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
-  ; GFX90A_GFX940-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1
+  ; GFX90A_GFX942-LABEL: name: buffer_atomic_fadd_f64_idxen_rtn
+  ; GFX90A_GFX942: bb.1 (%ir-block.0):
+  ; GFX90A_GFX942-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2
+  ; GFX90A_GFX942-NEXT: {{  $}}
+  ; GFX90A_GFX942-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3
+  ; GFX90A_GFX942-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr4
+  ; GFX90A_GFX942-NEXT:   [[BUFFER_ATOMIC_ADD_F64_IDXEN_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_IDXEN_RTN [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 8)
+  ; GFX90A_GFX942-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_IDXEN_RTN]].sub0
+  ; GFX90A_GFX942-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_IDXEN_RTN]].sub1
+  ; GFX90A_GFX942-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec
+  ; GFX90A_GFX942-NEXT:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+  ; GFX90A_GFX942-NEXT:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec
+  ; GFX90A_GFX942-NEXT:   $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
+  ; GFX90A_GFX942-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1
   %ret = call double @llvm.amdgcn.struct.buffer.atomic.fadd.f64(double %val, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0)
   ret double %ret
 }
 
 define amdgpu_ps double @buffer_atomic_fadd_f64_bothen_rtn(double %val, <4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) {
-  ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_f64_bothen_rtn
-  ; GFX90A_GFX940: bb.1 (%ir-block.0):
-  ; GFX90A_GFX940-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2, $vgpr3
-  ; GFX90A_GFX940-NEXT: {{  $}}
-  ; GFX90A_GFX940-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3
-  ; GFX90A_GFX940-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
-  ; GFX90A_GFX940-NEXT:   [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr4
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1
-  ; GFX90A_GFX940-NEXT:   [[BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY8]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 8)
-  ; GFX90A_GFX940-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN]].sub0
-  ; GFX90A_GFX940-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN]].sub1
-  ; GFX90A_GFX940-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec
-  ; GFX90A_GFX940-NEXT:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
-  ; GFX90A_GFX940-NEXT:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec
-  ; GFX90A_GFX940-NEXT:   $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
-  ; GFX90A_GFX940-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1
+  ; GFX90A_GFX942-LABEL: name: buffer_atomic_fadd_f64_bothen_rtn
+  ; GFX90A_GFX942: bb.1 (%ir-block.0):
+  ; GFX90A_GFX942-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2, $vgpr3
+  ; GFX90A_GFX942-NEXT: {{  $}}
+  ; GFX90A_GFX942-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3
+  ; GFX90A_GFX942-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+  ; GFX90A_GFX942-NEXT:   [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr4
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1
+  ; GFX90A_GFX942-NEXT:   [[BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY8]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 8)
+  ; GFX90A_GFX942-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN]].sub0
+  ; GFX90A_GFX942-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN]].sub1
+  ; GFX90A_GFX942-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec
+  ; GFX90A_GFX942-NEXT:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+  ; GFX90A_GFX942-NEXT:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec
+  ; GFX90A_GFX942-NEXT:   $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
+  ; GFX90A_GFX942-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1
   %ret = call double @llvm.amdgcn.struct.buffer.atomic.fadd.f64(double %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
   ret double %ret
 }
 
 define amdgpu_ps void @buffer_ptr_atomic_fadd_f64_offset_no_rtn(double %val, ptr addrspace(8) inreg %rsrc, i32 inreg %soffset) {
-  ; GFX90A_GFX940-LABEL: name: buffer_ptr_atomic_fadd_f64_offset_no_rtn
-  ; GFX90A_GFX940: bb.1 (%ir-block.0):
-  ; GFX90A_GFX940-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1
-  ; GFX90A_GFX940-NEXT: {{  $}}
-  ; GFX90A_GFX940-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3
-  ; GFX90A_GFX940-NEXT:   [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3
-  ; GFX90A_GFX940-NEXT:   BUFFER_ATOMIC_ADD_F64_OFFSET [[REG_SEQUENCE]], [[REG_SEQUENCE1]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8)
-  ; GFX90A_GFX940-NEXT:   S_ENDPGM 0
+  ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_f64_offset_no_rtn
+  ; GFX90A_GFX942: bb.1 (%ir-block.0):
+  ; GFX90A_GFX942-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1
+  ; GFX90A_GFX942-NEXT: {{  $}}
+  ; GFX90A_GFX942-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3
+  ; GFX90A_GFX942-NEXT:   [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3
+  ; GFX90A_GFX942-NEXT:   BUFFER_ATOMIC_ADD_F64_OFFSET [[REG_SEQUENCE]], [[REG_SEQUENCE1]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8)
+  ; GFX90A_GFX942-NEXT:   S_ENDPGM 0
   %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f64(double %val, ptr addrspace(8) %rsrc, i32 0, i32 %soffset, i32 0)
   ret void
 }
 
 define amdgpu_ps void @buffer_ptr_atomic_fadd_f64_offen_no_rtn(double %val, ptr addrspace(8) inreg %rsrc, i32 %voffset, i32 inreg %soffset) {
-  ; GFX90A_GFX940-LABEL: name: buffer_ptr_atomic_fadd_f64_offen_no_rtn
-  ; GFX90A_GFX940: bb.1 (%ir-block.0):
-  ; GFX90A_GFX940-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2
-  ; GFX90A_GFX940-NEXT: {{  $}}
-  ; GFX90A_GFX940-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3
-  ; GFX90A_GFX940-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr4
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3
-  ; GFX90A_GFX940-NEXT:   BUFFER_ATOMIC_ADD_F64_OFFEN [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8)
-  ; GFX90A_GFX940-NEXT:   S_ENDPGM 0
+  ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_f64_offen_no_rtn
+  ; GFX90A_GFX942: bb.1 (%ir-block.0):
+  ; GFX90A_GFX942-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2
+  ; GFX90A_GFX942-NEXT: {{  $}}
+  ; GFX90A_GFX942-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3
+  ; GFX90A_GFX942-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr4
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3
+  ; GFX90A_GFX942-NEXT:   BUFFER_ATOMIC_ADD_F64_OFFEN [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8)
+  ; GFX90A_GFX942-NEXT:   S_ENDPGM 0
   %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f64(double %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0)
   ret void
 }
 
 define amdgpu_ps void @buffer_ptr_atomic_fadd_f64_idxen_no_rtn(double %val, ptr addrspace(8) inreg %rsrc, i32 %vindex, i32 inreg %soffset) {
-  ; GFX90A_GFX940-LABEL: name: buffer_ptr_atomic_fadd_f64_idxen_no_rtn
-  ; GFX90A_GFX940: bb.1 (%ir-block.0):
-  ; GFX90A_GFX940-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2
-  ; GFX90A_GFX940-NEXT: {{  $}}
-  ; GFX90A_GFX940-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3
-  ; GFX90A_GFX940-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr4
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3
-  ; GFX90A_GFX940-NEXT:   BUFFER_ATOMIC_ADD_F64_IDXEN [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8)
-  ; GFX90A_GFX940-NEXT:   S_ENDPGM 0
+  ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_f64_idxen_no_rtn
+  ; GFX90A_GFX942: bb.1 (%ir-block.0):
+  ; GFX90A_GFX942-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2
+  ; GFX90A_GFX942-NEXT: {{  $}}
+  ; GFX90A_GFX942-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3
+  ; GFX90A_GFX942-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr4
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3
+  ; GFX90A_GFX942-NEXT:   BUFFER_ATOMIC_ADD_F64_IDXEN [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8)
+  ; GFX90A_GFX942-NEXT:   S_ENDPGM 0
   %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f64(double %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0)
   ret void
 }
 
 define amdgpu_ps void @buffer_ptr_atomic_fadd_f64_bothen_no_rtn(double %val, ptr addrspace(8) inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) {
-  ; GFX90A_GFX940-LABEL: name: buffer_ptr_atomic_fadd_f64_bothen_no_rtn
-  ; GFX90A_GFX940: bb.1 (%ir-block.0):
-  ; GFX90A_GFX940-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2, $vgpr3
-  ; GFX90A_GFX940-NEXT: {{  $}}
-  ; GFX90A_GFX940-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3
-  ; GFX90A_GFX940-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
-  ; GFX90A_GFX940-NEXT:   [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr4
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1
-  ; GFX90A_GFX940-NEXT:   BUFFER_ATOMIC_ADD_F64_BOTHEN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY8]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8)
-  ; GFX90A_GFX940-NEXT:   S_ENDPGM 0
+  ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_f64_bothen_no_rtn
+  ; GFX90A_GFX942: bb.1 (%ir-block.0):
+  ; GFX90A_GFX942-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2, $vgpr3
+  ; GFX90A_GFX942-NEXT: {{  $}}
+  ; GFX90A_GFX942-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3
+  ; GFX90A_GFX942-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+  ; GFX90A_GFX942-NEXT:   [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr4
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1
+  ; GFX90A_GFX942-NEXT:   BUFFER_ATOMIC_ADD_F64_BOTHEN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY8]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8)
+  ; GFX90A_GFX942-NEXT:   S_ENDPGM 0
   %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f64(double %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 2)
   ret void
 }
 
 define amdgpu_ps double @buffer_ptr_atomic_fadd_f64_offset_rtn(double %val, ptr addrspace(8) inreg %rsrc, i32 inreg %soffset) {
-  ; GFX90A_GFX940-LABEL: name: buffer_ptr_atomic_fadd_f64_offset_rtn
-  ; GFX90A_GFX940: bb.1 (%ir-block.0):
-  ; GFX90A_GFX940-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1
-  ; GFX90A_GFX940-NEXT: {{  $}}
-  ; GFX90A_GFX940-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3
-  ; GFX90A_GFX940-NEXT:   [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3
-  ; GFX90A_GFX940-NEXT:   [[BUFFER_ATOMIC_ADD_F64_OFFSET_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_OFFSET_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE1]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8)
-  ; GFX90A_GFX940-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFSET_RTN]].sub0
-  ; GFX90A_GFX940-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFSET_RTN]].sub1
-  ; GFX90A_GFX940-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec
-  ; GFX90A_GFX940-NEXT:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
-  ; GFX90A_GFX940-NEXT:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec
-  ; GFX90A_GFX940-NEXT:   $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
-  ; GFX90A_GFX940-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1
+  ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_f64_offset_rtn
+  ; GFX90A_GFX942: bb.1 (%ir-block.0):
+  ; GFX90A_GFX942-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1
+  ; GFX90A_GFX942-NEXT: {{  $}}
+  ; GFX90A_GFX942-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3
+  ; GFX90A_GFX942-NEXT:   [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3
+  ; GFX90A_GFX942-NEXT:   [[BUFFER_ATOMIC_ADD_F64_OFFSET_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_OFFSET_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE1]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8)
+  ; GFX90A_GFX942-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFSET_RTN]].sub0
+  ; GFX90A_GFX942-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFSET_RTN]].sub1
+  ; GFX90A_GFX942-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec
+  ; GFX90A_GFX942-NEXT:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+  ; GFX90A_GFX942-NEXT:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec
+  ; GFX90A_GFX942-NEXT:   $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
+  ; GFX90A_GFX942-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1
   %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f64(double %val, ptr addrspace(8) %rsrc, i32 0, i32 %soffset, i32 0)
   ret double %ret
 }
 
 define amdgpu_ps double @buffer_ptr_atomic_fadd_f64_offen_rtn(double %val, ptr addrspace(8) inreg %rsrc, i32 %voffset, i32 inreg %soffset) {
-  ; GFX90A_GFX940-LABEL: name: buffer_ptr_atomic_fadd_f64_offen_rtn
-  ; GFX90A_GFX940: bb.1 (%ir-block.0):
-  ; GFX90A_GFX940-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2
-  ; GFX90A_GFX940-NEXT: {{  $}}
-  ; GFX90A_GFX940-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3
-  ; GFX90A_GFX940-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr4
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3
-  ; GFX90A_GFX940-NEXT:   [[BUFFER_ATOMIC_ADD_F64_OFFEN_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_OFFEN_RTN [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8)
-  ; GFX90A_GFX940-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFEN_RTN]].sub0
-  ; GFX90A_GFX940-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFEN_RTN]].sub1
-  ; GFX90A_GFX940-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec
-  ; GFX90A_GFX940-NEXT:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
-  ; GFX90A_GFX940-NEXT:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec
-  ; GFX90A_GFX940-NEXT:   $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
-  ; GFX90A_GFX940-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1
+  ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_f64_offen_rtn
+  ; GFX90A_GFX942: bb.1 (%ir-block.0):
+  ; GFX90A_GFX942-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2
+  ; GFX90A_GFX942-NEXT: {{  $}}
+  ; GFX90A_GFX942-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3
+  ; GFX90A_GFX942-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr4
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3
+  ; GFX90A_GFX942-NEXT:   [[BUFFER_ATOMIC_ADD_F64_OFFEN_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_OFFEN_RTN [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8)
+  ; GFX90A_GFX942-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFEN_RTN]].sub0
+  ; GFX90A_GFX942-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFEN_RTN]].sub1
+  ; GFX90A_GFX942-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec
+  ; GFX90A_GFX942-NEXT:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+  ; GFX90A_GFX942-NEXT:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec
+  ; GFX90A_GFX942-NEXT:   $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
+  ; GFX90A_GFX942-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1
   %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f64(double %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0)
   ret double %ret
 }
 
 define amdgpu_ps double @buffer_ptr_atomic_fadd_f64_idxen_rtn(double %val, ptr addrspace(8) inreg %rsrc, i32 %vindex, i32 inreg %soffset) {
-  ; GFX90A_GFX940-LABEL: name: buffer_ptr_atomic_fadd_f64_idxen_rtn
-  ; GFX90A_GFX940: bb.1 (%ir-block.0):
-  ; GFX90A_GFX940-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2
-  ; GFX90A_GFX940-NEXT: {{  $}}
-  ; GFX90A_GFX940-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3
-  ; GFX90A_GFX940-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr4
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3
-  ; GFX90A_GFX940-NEXT:   [[BUFFER_ATOMIC_ADD_F64_IDXEN_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_IDXEN_RTN [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8)
-  ; GFX90A_GFX940-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_IDXEN_RTN]].sub0
-  ; GFX90A_GFX940-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_IDXEN_RTN]].sub1
-  ; GFX90A_GFX940-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec
-  ; GFX90A_GFX940-NEXT:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
-  ; GFX90A_GFX940-NEXT:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec
-  ; GFX90A_GFX940-NEXT:   $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
-  ; GFX90A_GFX940-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1
+  ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_f64_idxen_rtn
+  ; GFX90A_GFX942: bb.1 (%ir-block.0):
+  ; GFX90A_GFX942-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2
+  ; GFX90A_GFX942-NEXT: {{  $}}
+  ; GFX90A_GFX942-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3
+  ; GFX90A_GFX942-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr4
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3
+  ; GFX90A_GFX942-NEXT:   [[BUFFER_ATOMIC_ADD_F64_IDXEN_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_IDXEN_RTN [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8)
+  ; GFX90A_GFX942-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_IDXEN_RTN]].sub0
+  ; GFX90A_GFX942-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_IDXEN_RTN]].sub1
+  ; GFX90A_GFX942-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec
+  ; GFX90A_GFX942-NEXT:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+  ; GFX90A_GFX942-NEXT:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec
+  ; GFX90A_GFX942-NEXT:   $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
+  ; GFX90A_GFX942-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1
   %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f64(double %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0)
   ret double %ret
 }
 
 define amdgpu_ps double @buffer_ptr_atomic_fadd_f64_bothen_rtn(double %val, ptr addrspace(8) inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) {
-  ; GFX90A_GFX940-LABEL: name: buffer_ptr_atomic_fadd_f64_bothen_rtn
-  ; GFX90A_GFX940: bb.1 (%ir-block.0):
-  ; GFX90A_GFX940-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2, $vgpr3
-  ; GFX90A_GFX940-NEXT: {{  $}}
-  ; GFX90A_GFX940-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3
-  ; GFX90A_GFX940-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
-  ; GFX90A_GFX940-NEXT:   [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr4
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1
-  ; GFX90A_GFX940-NEXT:   [[BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY8]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8)
-  ; GFX90A_GFX940-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN]].sub0
-  ; GFX90A_GFX940-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN]].sub1
-  ; GFX90A_GFX940-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec
-  ; GFX90A_GFX940-NEXT:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
-  ; GFX90A_GFX940-NEXT:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec
-  ; GFX90A_GFX940-NEXT:   $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
-  ; GFX90A_GFX940-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1
+  ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_f64_bothen_rtn
+  ; GFX90A_GFX942: bb.1 (%ir-block.0):
+  ; GFX90A_GFX942-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2, $vgpr3
+  ; GFX90A_GFX942-NEXT: {{  $}}
+  ; GFX90A_GFX942-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3
+  ; GFX90A_GFX942-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+  ; GFX90A_GFX942-NEXT:   [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr4
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1
+  ; GFX90A_GFX942-NEXT:   [[BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY8]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8)
+  ; GFX90A_GFX942-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN]].sub0
+  ; GFX90A_GFX942-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN]].sub1
+  ; GFX90A_GFX942-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec
+  ; GFX90A_GFX942-NEXT:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+  ; GFX90A_GFX942-NEXT:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec
+  ; GFX90A_GFX942-NEXT:   $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
+  ; GFX90A_GFX942-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1
   %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f64(double %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
   ret double %ret
 }
diff --git llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.v2f16-no-rtn.ll llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.v2f16-no-rtn.ll
index 9514bea86e4d..3ef735ddb763 100644
--- llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.v2f16-no-rtn.ll
+++ llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.v2f16-no-rtn.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
 ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx908 -verify-machineinstrs -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX908 %s
-; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX940 %s
-; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx940 -verify-machineinstrs -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX940 %s
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX942 %s
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx942 -verify-machineinstrs -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX942 %s
 
 define amdgpu_ps void @buffer_atomic_fadd_v2f16_offset_no_rtn(<2 x half> %val, <4 x i32> inreg %rsrc, i32 inreg %soffset) {
   ; GFX908-LABEL: name: buffer_atomic_fadd_v2f16_offset_no_rtn
@@ -18,19 +18,19 @@ define amdgpu_ps void @buffer_atomic_fadd_v2f16_offset_no_rtn(<2 x half> %val, <
   ; GFX908-NEXT:   BUFFER_ATOMIC_PK_ADD_F16_OFFSET [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 8)
   ; GFX908-NEXT:   S_ENDPGM 0
   ;
-  ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_v2f16_offset_no_rtn
-  ; GFX90A_GFX940: bb.1 (%ir-block.0):
-  ; GFX90A_GFX940-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0
-  ; GFX90A_GFX940-NEXT: {{  $}}
-  ; GFX90A_GFX940-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
-  ; GFX90A_GFX940-NEXT:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr4
-  ; GFX90A_GFX940-NEXT:   BUFFER_ATOMIC_PK_ADD_F16_OFFSET [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 8)
-  ; GFX90A_GFX940-NEXT:   S_ENDPGM 0
+  ; GFX90A_GFX942-LABEL: name: buffer_atomic_fadd_v2f16_offset_no_rtn
+  ; GFX90A_GFX942: bb.1 (%ir-block.0):
+  ; GFX90A_GFX942-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0
+  ; GFX90A_GFX942-NEXT: {{  $}}
+  ; GFX90A_GFX942-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
+  ; GFX90A_GFX942-NEXT:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr4
+  ; GFX90A_GFX942-NEXT:   BUFFER_ATOMIC_PK_ADD_F16_OFFSET [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 8)
+  ; GFX90A_GFX942-NEXT:   S_ENDPGM 0
   %ret = call <2 x half> @llvm.amdgcn.raw.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 4095, i32 %soffset, i32 0)
   ret void
 }
@@ -51,20 +51,20 @@ define amdgpu_ps void @buffer_atomic_fadd_v2f16_offen_no_rtn(<2 x half> %val, <4
   ; GFX908-NEXT:   BUFFER_ATOMIC_PK_ADD_F16_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 8)
   ; GFX908-NEXT:   S_ENDPGM 0
   ;
-  ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_v2f16_offen_no_rtn
-  ; GFX90A_GFX940: bb.1 (%ir-block.0):
-  ; GFX90A_GFX940-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1
-  ; GFX90A_GFX940-NEXT: {{  $}}
-  ; GFX90A_GFX940-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
-  ; GFX90A_GFX940-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4
-  ; GFX90A_GFX940-NEXT:   BUFFER_ATOMIC_PK_ADD_F16_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 8)
-  ; GFX90A_GFX940-NEXT:   S_ENDPGM 0
+  ; GFX90A_GFX942-LABEL: name: buffer_atomic_fadd_v2f16_offen_no_rtn
+  ; GFX90A_GFX942: bb.1 (%ir-block.0):
+  ; GFX90A_GFX942-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1
+  ; GFX90A_GFX942-NEXT: {{  $}}
+  ; GFX90A_GFX942-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
+  ; GFX90A_GFX942-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4
+  ; GFX90A_GFX942-NEXT:   BUFFER_ATOMIC_PK_ADD_F16_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 8)
+  ; GFX90A_GFX942-NEXT:   S_ENDPGM 0
   %ret = call <2 x half> @llvm.amdgcn.raw.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
   ret void
 }
@@ -85,20 +85,20 @@ define amdgpu_ps void @buffer_atomic_fadd_v2f16_idxen_no_rtn(<2 x half> %val, <4
   ; GFX908-NEXT:   BUFFER_ATOMIC_PK_ADD_F16_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 8)
   ; GFX908-NEXT:   S_ENDPGM 0
   ;
-  ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_v2f16_idxen_no_rtn
-  ; GFX90A_GFX940: bb.1 (%ir-block.0):
-  ; GFX90A_GFX940-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1
-  ; GFX90A_GFX940-NEXT: {{  $}}
-  ; GFX90A_GFX940-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
-  ; GFX90A_GFX940-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4
-  ; GFX90A_GFX940-NEXT:   BUFFER_ATOMIC_PK_ADD_F16_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 8)
-  ; GFX90A_GFX940-NEXT:   S_ENDPGM 0
+  ; GFX90A_GFX942-LABEL: name: buffer_atomic_fadd_v2f16_idxen_no_rtn
+  ; GFX90A_GFX942: bb.1 (%ir-block.0):
+  ; GFX90A_GFX942-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1
+  ; GFX90A_GFX942-NEXT: {{  $}}
+  ; GFX90A_GFX942-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
+  ; GFX90A_GFX942-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4
+  ; GFX90A_GFX942-NEXT:   BUFFER_ATOMIC_PK_ADD_F16_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 8)
+  ; GFX90A_GFX942-NEXT:   S_ENDPGM 0
   %ret = call <2 x half> @llvm.amdgcn.struct.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0)
   ret void
 }
@@ -121,22 +121,22 @@ define amdgpu_ps void @buffer_atomic_fadd_v2f16_bothen_no_rtn(<2 x half> %val, <
   ; GFX908-NEXT:   BUFFER_ATOMIC_PK_ADD_F16_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 2, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 8)
   ; GFX908-NEXT:   S_ENDPGM 0
   ;
-  ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_v2f16_bothen_no_rtn
-  ; GFX90A_GFX940: bb.1 (%ir-block.0):
-  ; GFX90A_GFX940-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2
-  ; GFX90A_GFX940-NEXT: {{  $}}
-  ; GFX90A_GFX940-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
-  ; GFX90A_GFX940-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr4
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1
-  ; GFX90A_GFX940-NEXT:   BUFFER_ATOMIC_PK_ADD_F16_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 2, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 8)
-  ; GFX90A_GFX940-NEXT:   S_ENDPGM 0
+  ; GFX90A_GFX942-LABEL: name: buffer_atomic_fadd_v2f16_bothen_no_rtn
+  ; GFX90A_GFX942: bb.1 (%ir-block.0):
+  ; GFX90A_GFX942-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2
+  ; GFX90A_GFX942-NEXT: {{  $}}
+  ; GFX90A_GFX942-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
+  ; GFX90A_GFX942-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr4
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1
+  ; GFX90A_GFX942-NEXT:   BUFFER_ATOMIC_PK_ADD_F16_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 2, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 8)
+  ; GFX90A_GFX942-NEXT:   S_ENDPGM 0
   %ret = call <2 x half> @llvm.amdgcn.struct.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 2)
   ret void
 }
@@ -156,19 +156,19 @@ define amdgpu_ps void @buffer_ptr_atomic_fadd_v2f16_offset_no_rtn(<2 x half> %va
   ; GFX908-NEXT:   BUFFER_ATOMIC_PK_ADD_F16_OFFSET [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on %ir.rsrc, align 1, addrspace 8)
   ; GFX908-NEXT:   S_ENDPGM 0
   ;
-  ; GFX90A_GFX940-LABEL: name: buffer_ptr_atomic_fadd_v2f16_offset_no_rtn
-  ; GFX90A_GFX940: bb.1 (%ir-block.0):
-  ; GFX90A_GFX940-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0
-  ; GFX90A_GFX940-NEXT: {{  $}}
-  ; GFX90A_GFX940-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3
-  ; GFX90A_GFX940-NEXT:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr4
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
-  ; GFX90A_GFX940-NEXT:   BUFFER_ATOMIC_PK_ADD_F16_OFFSET [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on %ir.rsrc, align 1, addrspace 8)
-  ; GFX90A_GFX940-NEXT:   S_ENDPGM 0
+  ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_v2f16_offset_no_rtn
+  ; GFX90A_GFX942: bb.1 (%ir-block.0):
+  ; GFX90A_GFX942-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0
+  ; GFX90A_GFX942-NEXT: {{  $}}
+  ; GFX90A_GFX942-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3
+  ; GFX90A_GFX942-NEXT:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr4
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
+  ; GFX90A_GFX942-NEXT:   BUFFER_ATOMIC_PK_ADD_F16_OFFSET [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on %ir.rsrc, align 1, addrspace 8)
+  ; GFX90A_GFX942-NEXT:   S_ENDPGM 0
   %ret = call <2 x half> @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.v2f16(<2 x half> %val, ptr addrspace(8) %rsrc, i32 4095, i32 %soffset, i32 0)
   ret void
 }
@@ -189,20 +189,20 @@ define amdgpu_ps void @buffer_ptr_atomic_fadd_v2f16_offen_no_rtn(<2 x half> %val
   ; GFX908-NEXT:   BUFFER_ATOMIC_PK_ADD_F16_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on %ir.rsrc, align 1, addrspace 8)
   ; GFX908-NEXT:   S_ENDPGM 0
   ;
-  ; GFX90A_GFX940-LABEL: name: buffer_ptr_atomic_fadd_v2f16_offen_no_rtn
-  ; GFX90A_GFX940: bb.1 (%ir-block.0):
-  ; GFX90A_GFX940-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1
-  ; GFX90A_GFX940-NEXT: {{  $}}
-  ; GFX90A_GFX940-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3
-  ; GFX90A_GFX940-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
-  ; GFX90A_GFX940-NEXT:   BUFFER_ATOMIC_PK_ADD_F16_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on %ir.rsrc, align 1, addrspace 8)
-  ; GFX90A_GFX940-NEXT:   S_ENDPGM 0
+  ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_v2f16_offen_no_rtn
+  ; GFX90A_GFX942: bb.1 (%ir-block.0):
+  ; GFX90A_GFX942-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1
+  ; GFX90A_GFX942-NEXT: {{  $}}
+  ; GFX90A_GFX942-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3
+  ; GFX90A_GFX942-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
+  ; GFX90A_GFX942-NEXT:   BUFFER_ATOMIC_PK_ADD_F16_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on %ir.rsrc, align 1, addrspace 8)
+  ; GFX90A_GFX942-NEXT:   S_ENDPGM 0
   %ret = call <2 x half> @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.v2f16(<2 x half> %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0)
   ret void
 }
@@ -223,20 +223,20 @@ define amdgpu_ps void @buffer_ptr_atomic_fadd_v2f16_idxen_no_rtn(<2 x half> %val
   ; GFX908-NEXT:   BUFFER_ATOMIC_PK_ADD_F16_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on %ir.rsrc, align 1, addrspace 8)
   ; GFX908-NEXT:   S_ENDPGM 0
   ;
-  ; GFX90A_GFX940-LABEL: name: buffer_ptr_atomic_fadd_v2f16_idxen_no_rtn
-  ; GFX90A_GFX940: bb.1 (%ir-block.0):
-  ; GFX90A_GFX940-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1
-  ; GFX90A_GFX940-NEXT: {{  $}}
-  ; GFX90A_GFX940-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3
-  ; GFX90A_GFX940-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
-  ; GFX90A_GFX940-NEXT:   BUFFER_ATOMIC_PK_ADD_F16_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on %ir.rsrc, align 1, addrspace 8)
-  ; GFX90A_GFX940-NEXT:   S_ENDPGM 0
+  ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_v2f16_idxen_no_rtn
+  ; GFX90A_GFX942: bb.1 (%ir-block.0):
+  ; GFX90A_GFX942-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1
+  ; GFX90A_GFX942-NEXT: {{  $}}
+  ; GFX90A_GFX942-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3
+  ; GFX90A_GFX942-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
+  ; GFX90A_GFX942-NEXT:   BUFFER_ATOMIC_PK_ADD_F16_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on %ir.rsrc, align 1, addrspace 8)
+  ; GFX90A_GFX942-NEXT:   S_ENDPGM 0
   %ret = call <2 x half> @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.v2f16(<2 x half> %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0)
   ret void
 }
@@ -259,22 +259,22 @@ define amdgpu_ps void @buffer_ptr_atomic_fadd_v2f16_bothen_no_rtn(<2 x half> %va
   ; GFX908-NEXT:   BUFFER_ATOMIC_PK_ADD_F16_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 2, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on %ir.rsrc, align 1, addrspace 8)
   ; GFX908-NEXT:   S_ENDPGM 0
   ;
-  ; GFX90A_GFX940-LABEL: name: buffer_ptr_atomic_fadd_v2f16_bothen_no_rtn
-  ; GFX90A_GFX940: bb.1 (%ir-block.0):
-  ; GFX90A_GFX940-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2
-  ; GFX90A_GFX940-NEXT: {{  $}}
-  ; GFX90A_GFX940-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3
-  ; GFX90A_GFX940-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr4
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1
-  ; GFX90A_GFX940-NEXT:   BUFFER_ATOMIC_PK_ADD_F16_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 2, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on %ir.rsrc, align 1, addrspace 8)
-  ; GFX90A_GFX940-NEXT:   S_ENDPGM 0
+  ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_v2f16_bothen_no_rtn
+  ; GFX90A_GFX942: bb.1 (%ir-block.0):
+  ; GFX90A_GFX942-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2
+  ; GFX90A_GFX942-NEXT: {{  $}}
+  ; GFX90A_GFX942-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3
+  ; GFX90A_GFX942-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr4
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1
+  ; GFX90A_GFX942-NEXT:   BUFFER_ATOMIC_PK_ADD_F16_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 2, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on %ir.rsrc, align 1, addrspace 8)
+  ; GFX90A_GFX942-NEXT:   S_ENDPGM 0
   %ret = call <2 x half> @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.v2f16(<2 x half> %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 2)
   ret void
 }
diff --git llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.v2f16-rtn.ll llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.v2f16-rtn.ll
index 632ad55fdf89..756f287b7798 100644
--- llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.v2f16-rtn.ll
+++ llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.v2f16-rtn.ll
@@ -1,165 +1,165 @@
 ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX940 %s
-; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx940 -verify-machineinstrs -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX940 %s
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX942 %s
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx942 -verify-machineinstrs -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX942 %s
 
 define amdgpu_ps <2 x half> @buffer_atomic_fadd_v2f16_offset_rtn(<2 x half> %val, <4 x i32> inreg %rsrc, i32 inreg %soffset) {
-  ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_v2f16_offset_rtn
-  ; GFX90A_GFX940: bb.1 (%ir-block.0):
-  ; GFX90A_GFX940-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0
-  ; GFX90A_GFX940-NEXT: {{  $}}
-  ; GFX90A_GFX940-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
-  ; GFX90A_GFX940-NEXT:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr4
-  ; GFX90A_GFX940-NEXT:   [[BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 1, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 8)
-  ; GFX90A_GFX940-NEXT:   $vgpr0 = COPY [[BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN]]
-  ; GFX90A_GFX940-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
+  ; GFX90A_GFX942-LABEL: name: buffer_atomic_fadd_v2f16_offset_rtn
+  ; GFX90A_GFX942: bb.1 (%ir-block.0):
+  ; GFX90A_GFX942-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0
+  ; GFX90A_GFX942-NEXT: {{  $}}
+  ; GFX90A_GFX942-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
+  ; GFX90A_GFX942-NEXT:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr4
+  ; GFX90A_GFX942-NEXT:   [[BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 1, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 8)
+  ; GFX90A_GFX942-NEXT:   $vgpr0 = COPY [[BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN]]
+  ; GFX90A_GFX942-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
   %ret = call <2 x half> @llvm.amdgcn.raw.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 0, i32 %soffset, i32 0)
   ret <2 x half> %ret
 }
 
 define amdgpu_ps <2 x half> @buffer_atomic_fadd_v2f16_offen_rtn(<2 x half> %val, <4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) {
-  ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_v2f16_offen_rtn
-  ; GFX90A_GFX940: bb.1 (%ir-block.0):
-  ; GFX90A_GFX940-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1
-  ; GFX90A_GFX940-NEXT: {{  $}}
-  ; GFX90A_GFX940-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
-  ; GFX90A_GFX940-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4
-  ; GFX90A_GFX940-NEXT:   [[BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 8)
-  ; GFX90A_GFX940-NEXT:   $vgpr0 = COPY [[BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN]]
-  ; GFX90A_GFX940-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
+  ; GFX90A_GFX942-LABEL: name: buffer_atomic_fadd_v2f16_offen_rtn
+  ; GFX90A_GFX942: bb.1 (%ir-block.0):
+  ; GFX90A_GFX942-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1
+  ; GFX90A_GFX942-NEXT: {{  $}}
+  ; GFX90A_GFX942-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
+  ; GFX90A_GFX942-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4
+  ; GFX90A_GFX942-NEXT:   [[BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 8)
+  ; GFX90A_GFX942-NEXT:   $vgpr0 = COPY [[BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN]]
+  ; GFX90A_GFX942-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
   %ret = call <2 x half> @llvm.amdgcn.raw.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
   ret <2 x half> %ret
 }
 
 define amdgpu_ps <2 x half> @buffer_atomic_fadd_v2f16_idxen_rtn(<2 x half> %val, <4 x i32> inreg %rsrc, i32 %vindex, i32 inreg %soffset) {
-  ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_v2f16_idxen_rtn
-  ; GFX90A_GFX940: bb.1 (%ir-block.0):
-  ; GFX90A_GFX940-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1
-  ; GFX90A_GFX940-NEXT: {{  $}}
-  ; GFX90A_GFX940-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
-  ; GFX90A_GFX940-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4
-  ; GFX90A_GFX940-NEXT:   [[BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 8)
-  ; GFX90A_GFX940-NEXT:   $vgpr0 = COPY [[BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN]]
-  ; GFX90A_GFX940-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
+  ; GFX90A_GFX942-LABEL: name: buffer_atomic_fadd_v2f16_idxen_rtn
+  ; GFX90A_GFX942: bb.1 (%ir-block.0):
+  ; GFX90A_GFX942-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1
+  ; GFX90A_GFX942-NEXT: {{  $}}
+  ; GFX90A_GFX942-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
+  ; GFX90A_GFX942-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4
+  ; GFX90A_GFX942-NEXT:   [[BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 8)
+  ; GFX90A_GFX942-NEXT:   $vgpr0 = COPY [[BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN]]
+  ; GFX90A_GFX942-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
   %ret = call <2 x half> @llvm.amdgcn.struct.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0)
   ret <2 x half> %ret
 }
 
 define amdgpu_ps <2 x half> @buffer_atomic_fadd_v2f16_bothen_rtn(<2 x half> %val, <4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) {
-  ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_v2f16_bothen_rtn
-  ; GFX90A_GFX940: bb.1 (%ir-block.0):
-  ; GFX90A_GFX940-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2
-  ; GFX90A_GFX940-NEXT: {{  $}}
-  ; GFX90A_GFX940-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
-  ; GFX90A_GFX940-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr4
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1
-  ; GFX90A_GFX940-NEXT:   [[BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 3, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 8)
-  ; GFX90A_GFX940-NEXT:   $vgpr0 = COPY [[BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN]]
-  ; GFX90A_GFX940-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
+  ; GFX90A_GFX942-LABEL: name: buffer_atomic_fadd_v2f16_bothen_rtn
+  ; GFX90A_GFX942: bb.1 (%ir-block.0):
+  ; GFX90A_GFX942-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2
+  ; GFX90A_GFX942-NEXT: {{  $}}
+  ; GFX90A_GFX942-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
+  ; GFX90A_GFX942-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr4
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1
+  ; GFX90A_GFX942-NEXT:   [[BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 3, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 8)
+  ; GFX90A_GFX942-NEXT:   $vgpr0 = COPY [[BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN]]
+  ; GFX90A_GFX942-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
   %ret = call <2 x half> @llvm.amdgcn.struct.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 2)
   ret <2 x half> %ret
 }
 
 define amdgpu_ps <2 x half> @buffer_ptr_atomic_fadd_v2f16_offset_rtn(<2 x half> %val, ptr addrspace(8) inreg %rsrc, i32 inreg %soffset) {
-  ; GFX90A_GFX940-LABEL: name: buffer_ptr_atomic_fadd_v2f16_offset_rtn
-  ; GFX90A_GFX940: bb.1 (%ir-block.0):
-  ; GFX90A_GFX940-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0
-  ; GFX90A_GFX940-NEXT: {{  $}}
-  ; GFX90A_GFX940-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3
-  ; GFX90A_GFX940-NEXT:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr4
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
-  ; GFX90A_GFX940-NEXT:   [[BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 1, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on %ir.rsrc, align 1, addrspace 8)
-  ; GFX90A_GFX940-NEXT:   $vgpr0 = COPY [[BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN]]
-  ; GFX90A_GFX940-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
+  ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_v2f16_offset_rtn
+  ; GFX90A_GFX942: bb.1 (%ir-block.0):
+  ; GFX90A_GFX942-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0
+  ; GFX90A_GFX942-NEXT: {{  $}}
+  ; GFX90A_GFX942-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3
+  ; GFX90A_GFX942-NEXT:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr4
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
+  ; GFX90A_GFX942-NEXT:   [[BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 1, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on %ir.rsrc, align 1, addrspace 8)
+  ; GFX90A_GFX942-NEXT:   $vgpr0 = COPY [[BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN]]
+  ; GFX90A_GFX942-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
   %ret = call <2 x half> @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.v2f16(<2 x half> %val, ptr addrspace(8) %rsrc, i32 0, i32 %soffset, i32 0)
   ret <2 x half> %ret
 }
 
 define amdgpu_ps <2 x half> @buffer_ptr_atomic_fadd_v2f16_offen_rtn(<2 x half> %val, ptr addrspace(8) inreg %rsrc, i32 %voffset, i32 inreg %soffset) {
-  ; GFX90A_GFX940-LABEL: name: buffer_ptr_atomic_fadd_v2f16_offen_rtn
-  ; GFX90A_GFX940: bb.1 (%ir-block.0):
-  ; GFX90A_GFX940-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1
-  ; GFX90A_GFX940-NEXT: {{  $}}
-  ; GFX90A_GFX940-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3
-  ; GFX90A_GFX940-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
-  ; GFX90A_GFX940-NEXT:   [[BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on %ir.rsrc, align 1, addrspace 8)
-  ; GFX90A_GFX940-NEXT:   $vgpr0 = COPY [[BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN]]
-  ; GFX90A_GFX940-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
+  ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_v2f16_offen_rtn
+  ; GFX90A_GFX942: bb.1 (%ir-block.0):
+  ; GFX90A_GFX942-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1
+  ; GFX90A_GFX942-NEXT: {{  $}}
+  ; GFX90A_GFX942-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3
+  ; GFX90A_GFX942-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
+  ; GFX90A_GFX942-NEXT:   [[BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on %ir.rsrc, align 1, addrspace 8)
+  ; GFX90A_GFX942-NEXT:   $vgpr0 = COPY [[BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN]]
+  ; GFX90A_GFX942-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
   %ret = call <2 x half> @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.v2f16(<2 x half> %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0)
   ret <2 x half> %ret
 }
 
 define amdgpu_ps <2 x half> @buffer_ptr_atomic_fadd_v2f16_idxen_rtn(<2 x half> %val, ptr addrspace(8) inreg %rsrc, i32 %vindex, i32 inreg %soffset) {
-  ; GFX90A_GFX940-LABEL: name: buffer_ptr_atomic_fadd_v2f16_idxen_rtn
-  ; GFX90A_GFX940: bb.1 (%ir-block.0):
-  ; GFX90A_GFX940-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1
-  ; GFX90A_GFX940-NEXT: {{  $}}
-  ; GFX90A_GFX940-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3
-  ; GFX90A_GFX940-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
-  ; GFX90A_GFX940-NEXT:   [[BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on %ir.rsrc, align 1, addrspace 8)
-  ; GFX90A_GFX940-NEXT:   $vgpr0 = COPY [[BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN]]
-  ; GFX90A_GFX940-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
+  ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_v2f16_idxen_rtn
+  ; GFX90A_GFX942: bb.1 (%ir-block.0):
+  ; GFX90A_GFX942-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1
+  ; GFX90A_GFX942-NEXT: {{  $}}
+  ; GFX90A_GFX942-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3
+  ; GFX90A_GFX942-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
+  ; GFX90A_GFX942-NEXT:   [[BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on %ir.rsrc, align 1, addrspace 8)
+  ; GFX90A_GFX942-NEXT:   $vgpr0 = COPY [[BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN]]
+  ; GFX90A_GFX942-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
   %ret = call <2 x half> @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.v2f16(<2 x half> %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0)
   ret <2 x half> %ret
 }
 
 define amdgpu_ps <2 x half> @buffer_ptr_atomic_fadd_v2f16_bothen_rtn(<2 x half> %val, ptr addrspace(8) inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) {
-  ; GFX90A_GFX940-LABEL: name: buffer_ptr_atomic_fadd_v2f16_bothen_rtn
-  ; GFX90A_GFX940: bb.1 (%ir-block.0):
-  ; GFX90A_GFX940-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2
-  ; GFX90A_GFX940-NEXT: {{  $}}
-  ; GFX90A_GFX940-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3
-  ; GFX90A_GFX940-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr4
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1
-  ; GFX90A_GFX940-NEXT:   [[BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 3, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on %ir.rsrc, align 1, addrspace 8)
-  ; GFX90A_GFX940-NEXT:   $vgpr0 = COPY [[BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN]]
-  ; GFX90A_GFX940-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
+  ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_v2f16_bothen_rtn
+  ; GFX90A_GFX942: bb.1 (%ir-block.0):
+  ; GFX90A_GFX942-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2
+  ; GFX90A_GFX942-NEXT: {{  $}}
+  ; GFX90A_GFX942-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3
+  ; GFX90A_GFX942-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr4
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1
+  ; GFX90A_GFX942-NEXT:   [[BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 3, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on %ir.rsrc, align 1, addrspace 8)
+  ; GFX90A_GFX942-NEXT:   $vgpr0 = COPY [[BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN]]
+  ; GFX90A_GFX942-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
   %ret = call <2 x half> @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.v2f16(<2 x half> %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 2)
   ret <2 x half> %ret
 }
diff --git llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.f32.ll llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.f32.ll
index 7a97ac8211f6..340e293cda7b 100644
--- llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.f32.ll
+++ llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.f32.ll
@@ -1,19 +1,19 @@
 ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx940 -verify-machineinstrs -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX940 %s
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx942 -verify-machineinstrs -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX942 %s
 ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX11 %s
 ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX11 %s
 
 define amdgpu_ps void @flat_atomic_fadd_f32_no_rtn_intrinsic(ptr %ptr, float %data) {
-  ; GFX940-LABEL: name: flat_atomic_fadd_f32_no_rtn_intrinsic
-  ; GFX940: bb.1 (%ir-block.0):
-  ; GFX940-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2
-  ; GFX940-NEXT: {{  $}}
-  ; GFX940-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GFX940-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-  ; GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
-  ; GFX940-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
-  ; GFX940-NEXT:   FLAT_ATOMIC_ADD_F32 [[REG_SEQUENCE]], [[COPY2]], 0, 0, implicit $exec, implicit $flat_scr :: (load store syncscope("agent") seq_cst (s32) on %ir.ptr)
-  ; GFX940-NEXT:   S_ENDPGM 0
+  ; GFX942-LABEL: name: flat_atomic_fadd_f32_no_rtn_intrinsic
+  ; GFX942: bb.1 (%ir-block.0):
+  ; GFX942-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2
+  ; GFX942-NEXT: {{  $}}
+  ; GFX942-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GFX942-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+  ; GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
+  ; GFX942-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+  ; GFX942-NEXT:   FLAT_ATOMIC_ADD_F32 [[REG_SEQUENCE]], [[COPY2]], 0, 0, implicit $exec, implicit $flat_scr :: (load store syncscope("agent") seq_cst (s32) on %ir.ptr)
+  ; GFX942-NEXT:   S_ENDPGM 0
   ;
   ; GFX11-LABEL: name: flat_atomic_fadd_f32_no_rtn_intrinsic
   ; GFX11: bb.1 (%ir-block.0):
@@ -30,17 +30,17 @@ define amdgpu_ps void @flat_atomic_fadd_f32_no_rtn_intrinsic(ptr %ptr, float %da
 }
 
 define amdgpu_ps float @flat_atomic_fadd_f32_rtn_intrinsic(ptr %ptr, float %data) {
-  ; GFX940-LABEL: name: flat_atomic_fadd_f32_rtn_intrinsic
-  ; GFX940: bb.1 (%ir-block.0):
-  ; GFX940-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2
-  ; GFX940-NEXT: {{  $}}
-  ; GFX940-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GFX940-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-  ; GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
-  ; GFX940-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
-  ; GFX940-NEXT:   [[FLAT_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_F32_RTN [[REG_SEQUENCE]], [[COPY2]], 0, 1, implicit $exec, implicit $flat_scr :: (load store syncscope("agent") seq_cst (s32) on %ir.ptr)
-  ; GFX940-NEXT:   $vgpr0 = COPY [[FLAT_ATOMIC_ADD_F32_RTN]]
-  ; GFX940-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
+  ; GFX942-LABEL: name: flat_atomic_fadd_f32_rtn_intrinsic
+  ; GFX942: bb.1 (%ir-block.0):
+  ; GFX942-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2
+  ; GFX942-NEXT: {{  $}}
+  ; GFX942-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GFX942-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+  ; GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
+  ; GFX942-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+  ; GFX942-NEXT:   [[FLAT_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_F32_RTN [[REG_SEQUENCE]], [[COPY2]], 0, 1, implicit $exec, implicit $flat_scr :: (load store syncscope("agent") seq_cst (s32) on %ir.ptr)
+  ; GFX942-NEXT:   $vgpr0 = COPY [[FLAT_ATOMIC_ADD_F32_RTN]]
+  ; GFX942-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
   ;
   ; GFX11-LABEL: name: flat_atomic_fadd_f32_rtn_intrinsic
   ; GFX11: bb.1 (%ir-block.0):
@@ -58,16 +58,16 @@ define amdgpu_ps float @flat_atomic_fadd_f32_rtn_intrinsic(ptr %ptr, float %data
 }
 
 define amdgpu_ps void @flat_atomic_fadd_f32_no_rtn_atomicrmw(ptr %ptr, float %data) {
-  ; GFX940-LABEL: name: flat_atomic_fadd_f32_no_rtn_atomicrmw
-  ; GFX940: bb.1 (%ir-block.0):
-  ; GFX940-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2
-  ; GFX940-NEXT: {{  $}}
-  ; GFX940-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GFX940-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-  ; GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
-  ; GFX940-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
-  ; GFX940-NEXT:   FLAT_ATOMIC_ADD_F32 [[REG_SEQUENCE]], [[COPY2]], 0, 0, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr)
-  ; GFX940-NEXT:   S_ENDPGM 0
+  ; GFX942-LABEL: name: flat_atomic_fadd_f32_no_rtn_atomicrmw
+  ; GFX942: bb.1 (%ir-block.0):
+  ; GFX942-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2
+  ; GFX942-NEXT: {{  $}}
+  ; GFX942-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GFX942-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+  ; GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
+  ; GFX942-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+  ; GFX942-NEXT:   FLAT_ATOMIC_ADD_F32 [[REG_SEQUENCE]], [[COPY2]], 0, 0, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr)
+  ; GFX942-NEXT:   S_ENDPGM 0
   ;
   ; GFX11-LABEL: name: flat_atomic_fadd_f32_no_rtn_atomicrmw
   ; GFX11: bb.1 (%ir-block.0):
@@ -84,17 +84,17 @@ define amdgpu_ps void @flat_atomic_fadd_f32_no_rtn_atomicrmw(ptr %ptr, float %da
 }
 
 define amdgpu_ps float @flat_atomic_fadd_f32_rtn_atomicrmw(ptr %ptr, float %data) {
-  ; GFX940-LABEL: name: flat_atomic_fadd_f32_rtn_atomicrmw
-  ; GFX940: bb.1 (%ir-block.0):
-  ; GFX940-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2
-  ; GFX940-NEXT: {{  $}}
-  ; GFX940-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GFX940-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-  ; GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
-  ; GFX940-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
-  ; GFX940-NEXT:   [[FLAT_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_F32_RTN [[REG_SEQUENCE]], [[COPY2]], 0, 1, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr)
-  ; GFX940-NEXT:   $vgpr0 = COPY [[FLAT_ATOMIC_ADD_F32_RTN]]
-  ; GFX940-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
+  ; GFX942-LABEL: name: flat_atomic_fadd_f32_rtn_atomicrmw
+  ; GFX942: bb.1 (%ir-block.0):
+  ; GFX942-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2
+  ; GFX942-NEXT: {{  $}}
+  ; GFX942-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GFX942-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+  ; GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
+  ; GFX942-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+  ; GFX942-NEXT:   [[FLAT_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_F32_RTN [[REG_SEQUENCE]], [[COPY2]], 0, 1, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr)
+  ; GFX942-NEXT:   $vgpr0 = COPY [[FLAT_ATOMIC_ADD_F32_RTN]]
+  ; GFX942-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
   ;
   ; GFX11-LABEL: name: flat_atomic_fadd_f32_rtn_atomicrmw
   ; GFX11: bb.1 (%ir-block.0):
diff --git llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.f64.ll llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.f64.ll
index c9ab351f9401..a3562a18631d 100644
--- llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.f64.ll
+++ llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.f64.ll
@@ -1,43 +1,43 @@
 ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx90a -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX940 %s
-; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx940 -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX940 %s
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx90a -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX942 %s
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx942 -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX942 %s
 
 define amdgpu_ps void @flat_atomic_fadd_f64_no_rtn_atomicrmw(ptr %ptr, double %data) {
-  ; GFX90A_GFX940-LABEL: name: flat_atomic_fadd_f64_no_rtn_atomicrmw
-  ; GFX90A_GFX940: bb.1 (%ir-block.0):
-  ; GFX90A_GFX940-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
-  ; GFX90A_GFX940-NEXT: {{  $}}
-  ; GFX90A_GFX940-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1
-  ; GFX90A_GFX940-NEXT:   FLAT_ATOMIC_ADD_F64 [[REG_SEQUENCE]], [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr)
-  ; GFX90A_GFX940-NEXT:   S_ENDPGM 0
+  ; GFX90A_GFX942-LABEL: name: flat_atomic_fadd_f64_no_rtn_atomicrmw
+  ; GFX90A_GFX942: bb.1 (%ir-block.0):
+  ; GFX90A_GFX942-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
+  ; GFX90A_GFX942-NEXT: {{  $}}
+  ; GFX90A_GFX942-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1
+  ; GFX90A_GFX942-NEXT:   FLAT_ATOMIC_ADD_F64 [[REG_SEQUENCE]], [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr)
+  ; GFX90A_GFX942-NEXT:   S_ENDPGM 0
   %ret = atomicrmw fadd ptr %ptr, double %data syncscope("wavefront") monotonic, !noalias.addrspace !1, !amdgpu.no.fine.grained.memory !0
   ret void
 }
 
 define amdgpu_ps double @flat_atomic_fadd_f64_rtn_atomicrmw(ptr %ptr, double %data) {
-  ; GFX90A_GFX940-LABEL: name: flat_atomic_fadd_f64_rtn_atomicrmw
-  ; GFX90A_GFX940: bb.1 (%ir-block.0):
-  ; GFX90A_GFX940-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
-  ; GFX90A_GFX940-NEXT: {{  $}}
-  ; GFX90A_GFX940-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1
-  ; GFX90A_GFX940-NEXT:   [[FLAT_ATOMIC_ADD_F64_RTN:%[0-9]+]]:vreg_64_align2 = FLAT_ATOMIC_ADD_F64_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr)
-  ; GFX90A_GFX940-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[FLAT_ATOMIC_ADD_F64_RTN]].sub0
-  ; GFX90A_GFX940-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[FLAT_ATOMIC_ADD_F64_RTN]].sub1
-  ; GFX90A_GFX940-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
-  ; GFX90A_GFX940-NEXT:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
-  ; GFX90A_GFX940-NEXT:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec
-  ; GFX90A_GFX940-NEXT:   $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
-  ; GFX90A_GFX940-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1
+  ; GFX90A_GFX942-LABEL: name: flat_atomic_fadd_f64_rtn_atomicrmw
+  ; GFX90A_GFX942: bb.1 (%ir-block.0):
+  ; GFX90A_GFX942-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
+  ; GFX90A_GFX942-NEXT: {{  $}}
+  ; GFX90A_GFX942-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1
+  ; GFX90A_GFX942-NEXT:   [[FLAT_ATOMIC_ADD_F64_RTN:%[0-9]+]]:vreg_64_align2 = FLAT_ATOMIC_ADD_F64_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr)
+  ; GFX90A_GFX942-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[FLAT_ATOMIC_ADD_F64_RTN]].sub0
+  ; GFX90A_GFX942-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[FLAT_ATOMIC_ADD_F64_RTN]].sub1
+  ; GFX90A_GFX942-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
+  ; GFX90A_GFX942-NEXT:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+  ; GFX90A_GFX942-NEXT:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec
+  ; GFX90A_GFX942-NEXT:   $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
+  ; GFX90A_GFX942-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1
   %ret = atomicrmw fadd ptr %ptr, double %data syncscope("wavefront") monotonic, !noalias.addrspace !1, !amdgpu.no.fine.grained.memory !0
   ret double %ret
 }
diff --git llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.v2f16.ll llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.v2f16.ll
index 0896e4dc7af1..5909fe3d3694 100644
--- llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.v2f16.ll
+++ llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.v2f16.ll
@@ -1,66 +1,66 @@
 ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx940 -verify-machineinstrs -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX940 %s
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx942 -verify-machineinstrs -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX942 %s
 
 define amdgpu_ps <2 x half> @flat_atomic_fadd_v2f16_rtn(ptr %ptr, <2 x half> %data) {
-  ; GFX940-LABEL: name: flat_atomic_fadd_v2f16_rtn
-  ; GFX940: bb.1 (%ir-block.0):
-  ; GFX940-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2
-  ; GFX940-NEXT: {{  $}}
-  ; GFX940-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GFX940-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-  ; GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
-  ; GFX940-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
-  ; GFX940-NEXT:   [[FLAT_ATOMIC_PK_ADD_F16_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_PK_ADD_F16_RTN [[REG_SEQUENCE]], [[COPY2]], 0, 1, implicit $exec, implicit $flat_scr :: (load store syncscope("agent") seq_cst (<2 x s16>) on %ir.ptr)
-  ; GFX940-NEXT:   $vgpr0 = COPY [[FLAT_ATOMIC_PK_ADD_F16_RTN]]
-  ; GFX940-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
+  ; GFX942-LABEL: name: flat_atomic_fadd_v2f16_rtn
+  ; GFX942: bb.1 (%ir-block.0):
+  ; GFX942-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2
+  ; GFX942-NEXT: {{  $}}
+  ; GFX942-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GFX942-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+  ; GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
+  ; GFX942-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+  ; GFX942-NEXT:   [[FLAT_ATOMIC_PK_ADD_F16_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_PK_ADD_F16_RTN [[REG_SEQUENCE]], [[COPY2]], 0, 1, implicit $exec, implicit $flat_scr :: (load store syncscope("agent") seq_cst (<2 x s16>) on %ir.ptr)
+  ; GFX942-NEXT:   $vgpr0 = COPY [[FLAT_ATOMIC_PK_ADD_F16_RTN]]
+  ; GFX942-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
   %ret = atomicrmw fadd ptr %ptr, <2 x half> %data syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0
   ret <2 x half> %ret
 }
 
 define amdgpu_ps <2 x half> @flat_atomic_fadd_v2f16_saddr_rtn(ptr inreg %ptr, <2 x half> %data) {
-  ; GFX940-LABEL: name: flat_atomic_fadd_v2f16_saddr_rtn
-  ; GFX940: bb.1 (%ir-block.0):
-  ; GFX940-NEXT:   liveins: $sgpr0, $sgpr1, $vgpr0
-  ; GFX940-NEXT: {{  $}}
-  ; GFX940-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
-  ; GFX940-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
-  ; GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
-  ; GFX940-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GFX940-NEXT:   [[COPY3:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]]
-  ; GFX940-NEXT:   [[FLAT_ATOMIC_PK_ADD_F16_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_PK_ADD_F16_RTN [[COPY3]], [[COPY2]], 0, 1, implicit $exec, implicit $flat_scr :: (load store syncscope("agent") seq_cst (<2 x s16>) on %ir.ptr)
-  ; GFX940-NEXT:   $vgpr0 = COPY [[FLAT_ATOMIC_PK_ADD_F16_RTN]]
-  ; GFX940-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
+  ; GFX942-LABEL: name: flat_atomic_fadd_v2f16_saddr_rtn
+  ; GFX942: bb.1 (%ir-block.0):
+  ; GFX942-NEXT:   liveins: $sgpr0, $sgpr1, $vgpr0
+  ; GFX942-NEXT: {{  $}}
+  ; GFX942-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+  ; GFX942-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
+  ; GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
+  ; GFX942-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GFX942-NEXT:   [[COPY3:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]]
+  ; GFX942-NEXT:   [[FLAT_ATOMIC_PK_ADD_F16_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_PK_ADD_F16_RTN [[COPY3]], [[COPY2]], 0, 1, implicit $exec, implicit $flat_scr :: (load store syncscope("agent") seq_cst (<2 x s16>) on %ir.ptr)
+  ; GFX942-NEXT:   $vgpr0 = COPY [[FLAT_ATOMIC_PK_ADD_F16_RTN]]
+  ; GFX942-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
   %ret = atomicrmw fadd ptr %ptr, <2 x half> %data syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0
   ret <2 x half> %ret
 }
 
 define amdgpu_ps void @flat_atomic_fadd_v2f16_no_rtn(ptr %ptr, <2 x half> %data) {
-  ; GFX940-LABEL: name: flat_atomic_fadd_v2f16_no_rtn
-  ; GFX940: bb.1 (%ir-block.0):
-  ; GFX940-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2
-  ; GFX940-NEXT: {{  $}}
-  ; GFX940-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GFX940-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-  ; GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
-  ; GFX940-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
-  ; GFX940-NEXT:   FLAT_ATOMIC_PK_ADD_F16 [[REG_SEQUENCE]], [[COPY2]], 0, 0, implicit $exec, implicit $flat_scr :: (load store syncscope("agent") seq_cst (<2 x s16>) on %ir.ptr)
-  ; GFX940-NEXT:   S_ENDPGM 0
+  ; GFX942-LABEL: name: flat_atomic_fadd_v2f16_no_rtn
+  ; GFX942: bb.1 (%ir-block.0):
+  ; GFX942-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2
+  ; GFX942-NEXT: {{  $}}
+  ; GFX942-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GFX942-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+  ; GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
+  ; GFX942-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+  ; GFX942-NEXT:   FLAT_ATOMIC_PK_ADD_F16 [[REG_SEQUENCE]], [[COPY2]], 0, 0, implicit $exec, implicit $flat_scr :: (load store syncscope("agent") seq_cst (<2 x s16>) on %ir.ptr)
+  ; GFX942-NEXT:   S_ENDPGM 0
   %ret = atomicrmw fadd ptr %ptr, <2 x half> %data syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0
   ret void
 }
 
 define amdgpu_ps void @flat_atomic_fadd_v2f16_saddr_no_rtn(ptr inreg %ptr, <2 x half> %data) {
-  ; GFX940-LABEL: name: flat_atomic_fadd_v2f16_saddr_no_rtn
-  ; GFX940: bb.1 (%ir-block.0):
-  ; GFX940-NEXT:   liveins: $sgpr0, $sgpr1, $vgpr0
-  ; GFX940-NEXT: {{  $}}
-  ; GFX940-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
-  ; GFX940-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
-  ; GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
-  ; GFX940-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GFX940-NEXT:   [[COPY3:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]]
-  ; GFX940-NEXT:   FLAT_ATOMIC_PK_ADD_F16 [[COPY3]], [[COPY2]], 0, 0, implicit $exec, implicit $flat_scr :: (load store syncscope("agent") seq_cst (<2 x s16>) on %ir.ptr)
-  ; GFX940-NEXT:   S_ENDPGM 0
+  ; GFX942-LABEL: name: flat_atomic_fadd_v2f16_saddr_no_rtn
+  ; GFX942: bb.1 (%ir-block.0):
+  ; GFX942-NEXT:   liveins: $sgpr0, $sgpr1, $vgpr0
+  ; GFX942-NEXT: {{  $}}
+  ; GFX942-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+  ; GFX942-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
+  ; GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
+  ; GFX942-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GFX942-NEXT:   [[COPY3:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]]
+  ; GFX942-NEXT:   FLAT_ATOMIC_PK_ADD_F16 [[COPY3]], [[COPY2]], 0, 0, implicit $exec, implicit $flat_scr :: (load store syncscope("agent") seq_cst (<2 x s16>) on %ir.ptr)
+  ; GFX942-NEXT:   S_ENDPGM 0
   %ret = atomicrmw fadd ptr %ptr, <2 x half> %data syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0
   ret void
 }
diff --git llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch-init.ll llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch-init.ll
index 9b9249b62b0b..00c44c27257b 100644
--- llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch-init.ll
+++ llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch-init.ll
@@ -1,6 +1,6 @@
 ; RUN: opt -passes=amdgpu-attributor -mcpu=gfx900 < %s | llc -mcpu=gfx900 | FileCheck -check-prefixes=GCN,RW-FLAT %s
 ; RUN: opt -passes=amdgpu-attributor -mcpu=gfx900 -mattr=+architected-flat-scratch < %s | llc | FileCheck -check-prefixes=GCN,RO-FLAT %s
-; RUN: opt -passes=amdgpu-attributor -mcpu=gfx940 < %s | llc | FileCheck -check-prefixes=GCN,RO-FLAT %s
+; RUN: opt -passes=amdgpu-attributor -mcpu=gfx942 < %s | llc | FileCheck -check-prefixes=GCN,RO-FLAT %s
 
 target triple = "amdgcn-amd-amdhsa"
 
diff --git llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll
index 7cafa2f608a4..6a2b696a7fe7 100644
--- llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll
+++ llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll
@@ -1,13 +1,13 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -global-isel -mattr=-promote-alloca -mattr=+enable-flat-scratch -verify-machineinstrs < %s | FileCheck -check-prefix=GFX9 %s
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 -global-isel -mattr=-promote-alloca -mattr=+enable-flat-scratch -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -global-isel -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=GFX940 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -global-isel -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=GFX942 %s
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -global-isel -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=GFX11 %s
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -global-isel -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=GFX12 %s
 
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -global-isel -mattr=-unaligned-access-mode -mattr=-promote-alloca -mattr=+enable-flat-scratch -verify-machineinstrs < %s | FileCheck -check-prefixes=UNALIGNED_GFX9 %s
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 -global-isel -mattr=-unaligned-access-mode -mattr=-promote-alloca -mattr=+enable-flat-scratch -verify-machineinstrs < %s | FileCheck -check-prefixes=UNALIGNED_GFX10 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -global-isel -mattr=-unaligned-access-mode -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefixes=UNALIGNED_GFX940 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -global-isel -mattr=-unaligned-access-mode -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefixes=UNALIGNED_GFX942 %s
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -global-isel -mattr=-unaligned-access-mode -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefixes=UNALIGNED_GFX11 %s
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -global-isel -mattr=-unaligned-access-mode -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefixes=UNALIGNED_GFX12 %s
 
@@ -46,19 +46,19 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) {
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    s_endpgm
 ;
-; GFX940-LABEL: store_load_sindex_kernel:
-; GFX940:       ; %bb.0: ; %bb
-; GFX940-NEXT:    s_load_dword s0, s[4:5], 0x0
-; GFX940-NEXT:    v_mov_b32_e32 v0, 15
-; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-NEXT:    s_lshl_b32 s1, s0, 2
-; GFX940-NEXT:    s_and_b32 s0, s0, 15
-; GFX940-NEXT:    s_lshl_b32 s0, s0, 2
-; GFX940-NEXT:    scratch_store_dword off, v0, s1 sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    scratch_load_dword v0, off, s0 sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    s_endpgm
+; GFX942-LABEL: store_load_sindex_kernel:
+; GFX942:       ; %bb.0: ; %bb
+; GFX942-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX942-NEXT:    v_mov_b32_e32 v0, 15
+; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-NEXT:    s_lshl_b32 s1, s0, 2
+; GFX942-NEXT:    s_and_b32 s0, s0, 15
+; GFX942-NEXT:    s_lshl_b32 s0, s0, 2
+; GFX942-NEXT:    scratch_store_dword off, v0, s1 sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    scratch_load_dword v0, off, s0 sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    s_endpgm
 ;
 ; GFX11-LABEL: store_load_sindex_kernel:
 ; GFX11:       ; %bb.0: ; %bb
@@ -122,19 +122,19 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) {
 ; UNALIGNED_GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; UNALIGNED_GFX10-NEXT:    s_endpgm
 ;
-; UNALIGNED_GFX940-LABEL: store_load_sindex_kernel:
-; UNALIGNED_GFX940:       ; %bb.0: ; %bb
-; UNALIGNED_GFX940-NEXT:    s_load_dword s0, s[4:5], 0x0
-; UNALIGNED_GFX940-NEXT:    v_mov_b32_e32 v0, 15
-; UNALIGNED_GFX940-NEXT:    s_waitcnt lgkmcnt(0)
-; UNALIGNED_GFX940-NEXT:    s_lshl_b32 s1, s0, 2
-; UNALIGNED_GFX940-NEXT:    s_and_b32 s0, s0, 15
-; UNALIGNED_GFX940-NEXT:    s_lshl_b32 s0, s0, 2
-; UNALIGNED_GFX940-NEXT:    scratch_store_dword off, v0, s1 sc0 sc1
-; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
-; UNALIGNED_GFX940-NEXT:    scratch_load_dword v0, off, s0 sc0 sc1
-; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
-; UNALIGNED_GFX940-NEXT:    s_endpgm
+; UNALIGNED_GFX942-LABEL: store_load_sindex_kernel:
+; UNALIGNED_GFX942:       ; %bb.0: ; %bb
+; UNALIGNED_GFX942-NEXT:    s_load_dword s0, s[4:5], 0x0
+; UNALIGNED_GFX942-NEXT:    v_mov_b32_e32 v0, 15
+; UNALIGNED_GFX942-NEXT:    s_waitcnt lgkmcnt(0)
+; UNALIGNED_GFX942-NEXT:    s_lshl_b32 s1, s0, 2
+; UNALIGNED_GFX942-NEXT:    s_and_b32 s0, s0, 15
+; UNALIGNED_GFX942-NEXT:    s_lshl_b32 s0, s0, 2
+; UNALIGNED_GFX942-NEXT:    scratch_store_dword off, v0, s1 sc0 sc1
+; UNALIGNED_GFX942-NEXT:    s_waitcnt vmcnt(0)
+; UNALIGNED_GFX942-NEXT:    scratch_load_dword v0, off, s0 sc0 sc1
+; UNALIGNED_GFX942-NEXT:    s_waitcnt vmcnt(0)
+; UNALIGNED_GFX942-NEXT:    s_endpgm
 ;
 ; UNALIGNED_GFX11-LABEL: store_load_sindex_kernel:
 ; UNALIGNED_GFX11:       ; %bb.0: ; %bb
@@ -204,18 +204,18 @@ define amdgpu_kernel void @store_load_vindex_kernel() {
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    s_endpgm
 ;
-; GFX940-LABEL: store_load_vindex_kernel:
-; GFX940:       ; %bb.0: ; %bb
-; GFX940-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
-; GFX940-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
-; GFX940-NEXT:    v_mov_b32_e32 v2, 15
-; GFX940-NEXT:    v_sub_u32_e32 v0, 0, v0
-; GFX940-NEXT:    scratch_store_dword v1, v2, off sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX940-NEXT:    scratch_load_dword v0, v0, off offset:124 sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    s_endpgm
+; GFX942-LABEL: store_load_vindex_kernel:
+; GFX942:       ; %bb.0: ; %bb
+; GFX942-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
+; GFX942-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
+; GFX942-NEXT:    v_mov_b32_e32 v2, 15
+; GFX942-NEXT:    v_sub_u32_e32 v0, 0, v0
+; GFX942-NEXT:    scratch_store_dword v1, v2, off sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX942-NEXT:    scratch_load_dword v0, v0, off offset:124 sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    s_endpgm
 ;
 ; GFX11-LABEL: store_load_vindex_kernel:
 ; GFX11:       ; %bb.0: ; %bb
@@ -274,18 +274,18 @@ define amdgpu_kernel void @store_load_vindex_kernel() {
 ; UNALIGNED_GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; UNALIGNED_GFX10-NEXT:    s_endpgm
 ;
-; UNALIGNED_GFX940-LABEL: store_load_vindex_kernel:
-; UNALIGNED_GFX940:       ; %bb.0: ; %bb
-; UNALIGNED_GFX940-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
-; UNALIGNED_GFX940-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
-; UNALIGNED_GFX940-NEXT:    v_mov_b32_e32 v2, 15
-; UNALIGNED_GFX940-NEXT:    v_sub_u32_e32 v0, 0, v0
-; UNALIGNED_GFX940-NEXT:    scratch_store_dword v1, v2, off sc0 sc1
-; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
-; UNALIGNED_GFX940-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; UNALIGNED_GFX940-NEXT:    scratch_load_dword v0, v0, off offset:124 sc0 sc1
-; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
-; UNALIGNED_GFX940-NEXT:    s_endpgm
+; UNALIGNED_GFX942-LABEL: store_load_vindex_kernel:
+; UNALIGNED_GFX942:       ; %bb.0: ; %bb
+; UNALIGNED_GFX942-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
+; UNALIGNED_GFX942-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
+; UNALIGNED_GFX942-NEXT:    v_mov_b32_e32 v2, 15
+; UNALIGNED_GFX942-NEXT:    v_sub_u32_e32 v0, 0, v0
+; UNALIGNED_GFX942-NEXT:    scratch_store_dword v1, v2, off sc0 sc1
+; UNALIGNED_GFX942-NEXT:    s_waitcnt vmcnt(0)
+; UNALIGNED_GFX942-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; UNALIGNED_GFX942-NEXT:    scratch_load_dword v0, v0, off offset:124 sc0 sc1
+; UNALIGNED_GFX942-NEXT:    s_waitcnt vmcnt(0)
+; UNALIGNED_GFX942-NEXT:    s_endpgm
 ;
 ; UNALIGNED_GFX11-LABEL: store_load_vindex_kernel:
 ; UNALIGNED_GFX11:       ; %bb.0: ; %bb
@@ -356,19 +356,19 @@ define void @store_load_vindex_foo(i32 %idx) {
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: store_load_vindex_foo:
-; GFX940:       ; %bb.0: ; %bb
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
-; GFX940-NEXT:    v_add_u32_e32 v1, s32, v1
-; GFX940-NEXT:    v_mov_b32_e32 v2, 15
-; GFX940-NEXT:    v_and_b32_e32 v0, 15, v0
-; GFX940-NEXT:    scratch_store_dword v1, v2, off sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX940-NEXT:    scratch_load_dword v0, v0, s32 sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: store_load_vindex_foo:
+; GFX942:       ; %bb.0: ; %bb
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
+; GFX942-NEXT:    v_add_u32_e32 v1, s32, v1
+; GFX942-NEXT:    v_mov_b32_e32 v2, 15
+; GFX942-NEXT:    v_and_b32_e32 v0, 15, v0
+; GFX942-NEXT:    scratch_store_dword v1, v2, off sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX942-NEXT:    scratch_load_dword v0, v0, s32 sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: store_load_vindex_foo:
 ; GFX11:       ; %bb.0: ; %bb
@@ -432,19 +432,19 @@ define void @store_load_vindex_foo(i32 %idx) {
 ; UNALIGNED_GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; UNALIGNED_GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
-; UNALIGNED_GFX940-LABEL: store_load_vindex_foo:
-; UNALIGNED_GFX940:       ; %bb.0: ; %bb
-; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; UNALIGNED_GFX940-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
-; UNALIGNED_GFX940-NEXT:    v_add_u32_e32 v1, s32, v1
-; UNALIGNED_GFX940-NEXT:    v_mov_b32_e32 v2, 15
-; UNALIGNED_GFX940-NEXT:    v_and_b32_e32 v0, 15, v0
-; UNALIGNED_GFX940-NEXT:    scratch_store_dword v1, v2, off sc0 sc1
-; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
-; UNALIGNED_GFX940-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; UNALIGNED_GFX940-NEXT:    scratch_load_dword v0, v0, s32 sc0 sc1
-; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
-; UNALIGNED_GFX940-NEXT:    s_setpc_b64 s[30:31]
+; UNALIGNED_GFX942-LABEL: store_load_vindex_foo:
+; UNALIGNED_GFX942:       ; %bb.0: ; %bb
+; UNALIGNED_GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; UNALIGNED_GFX942-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
+; UNALIGNED_GFX942-NEXT:    v_add_u32_e32 v1, s32, v1
+; UNALIGNED_GFX942-NEXT:    v_mov_b32_e32 v2, 15
+; UNALIGNED_GFX942-NEXT:    v_and_b32_e32 v0, 15, v0
+; UNALIGNED_GFX942-NEXT:    scratch_store_dword v1, v2, off sc0 sc1
+; UNALIGNED_GFX942-NEXT:    s_waitcnt vmcnt(0)
+; UNALIGNED_GFX942-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; UNALIGNED_GFX942-NEXT:    scratch_load_dword v0, v0, s32 sc0 sc1
+; UNALIGNED_GFX942-NEXT:    s_waitcnt vmcnt(0)
+; UNALIGNED_GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; UNALIGNED_GFX11-LABEL: store_load_vindex_foo:
 ; UNALIGNED_GFX11:       ; %bb.0: ; %bb
@@ -503,13 +503,13 @@ define void @private_ptr_foo(ptr addrspace(5) nocapture %arg) {
 ; GFX10-NEXT:    scratch_store_dword v0, v1, off offset:4
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: private_ptr_foo:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v1, 0x41200000
-; GFX940-NEXT:    scratch_store_dword v0, v1, off offset:4 sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: private_ptr_foo:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v1, 0x41200000
+; GFX942-NEXT:    scratch_store_dword v0, v1, off offset:4 sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: private_ptr_foo:
 ; GFX11:       ; %bb.0:
@@ -544,13 +544,13 @@ define void @private_ptr_foo(ptr addrspace(5) nocapture %arg) {
 ; UNALIGNED_GFX10-NEXT:    scratch_store_dword v0, v1, off offset:4
 ; UNALIGNED_GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
-; UNALIGNED_GFX940-LABEL: private_ptr_foo:
-; UNALIGNED_GFX940:       ; %bb.0:
-; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; UNALIGNED_GFX940-NEXT:    v_mov_b32_e32 v1, 0x41200000
-; UNALIGNED_GFX940-NEXT:    scratch_store_dword v0, v1, off offset:4 sc0 sc1
-; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
-; UNALIGNED_GFX940-NEXT:    s_setpc_b64 s[30:31]
+; UNALIGNED_GFX942-LABEL: private_ptr_foo:
+; UNALIGNED_GFX942:       ; %bb.0:
+; UNALIGNED_GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; UNALIGNED_GFX942-NEXT:    v_mov_b32_e32 v1, 0x41200000
+; UNALIGNED_GFX942-NEXT:    scratch_store_dword v0, v1, off offset:4 sc0 sc1
+; UNALIGNED_GFX942-NEXT:    s_waitcnt vmcnt(0)
+; UNALIGNED_GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; UNALIGNED_GFX11-LABEL: private_ptr_foo:
 ; UNALIGNED_GFX11:       ; %bb.0:
@@ -617,23 +617,23 @@ define amdgpu_kernel void @store_load_sindex_small_offset_kernel(i32 %idx) {
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    s_endpgm
 ;
-; GFX940-LABEL: store_load_sindex_small_offset_kernel:
-; GFX940:       ; %bb.0: ; %bb
-; GFX940-NEXT:    s_load_dword s0, s[4:5], 0x0
-; GFX940-NEXT:    scratch_load_dword v0, off, off sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v0, 15
-; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-NEXT:    s_lshl_b32 s1, s0, 2
-; GFX940-NEXT:    s_and_b32 s0, s0, 15
-; GFX940-NEXT:    s_addk_i32 s1, 0x100
-; GFX940-NEXT:    s_lshl_b32 s0, s0, 2
-; GFX940-NEXT:    scratch_store_dword off, v0, s1 sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    s_addk_i32 s0, 0x100
-; GFX940-NEXT:    scratch_load_dword v0, off, s0 sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    s_endpgm
+; GFX942-LABEL: store_load_sindex_small_offset_kernel:
+; GFX942:       ; %bb.0: ; %bb
+; GFX942-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX942-NEXT:    scratch_load_dword v0, off, off sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v0, 15
+; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-NEXT:    s_lshl_b32 s1, s0, 2
+; GFX942-NEXT:    s_and_b32 s0, s0, 15
+; GFX942-NEXT:    s_addk_i32 s1, 0x100
+; GFX942-NEXT:    s_lshl_b32 s0, s0, 2
+; GFX942-NEXT:    scratch_store_dword off, v0, s1 sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    s_addk_i32 s0, 0x100
+; GFX942-NEXT:    scratch_load_dword v0, off, s0 sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    s_endpgm
 ;
 ; GFX11-LABEL: store_load_sindex_small_offset_kernel:
 ; GFX11:       ; %bb.0: ; %bb
@@ -713,23 +713,23 @@ define amdgpu_kernel void @store_load_sindex_small_offset_kernel(i32 %idx) {
 ; UNALIGNED_GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; UNALIGNED_GFX10-NEXT:    s_endpgm
 ;
-; UNALIGNED_GFX940-LABEL: store_load_sindex_small_offset_kernel:
-; UNALIGNED_GFX940:       ; %bb.0: ; %bb
-; UNALIGNED_GFX940-NEXT:    s_load_dword s0, s[4:5], 0x0
-; UNALIGNED_GFX940-NEXT:    scratch_load_dword v0, off, off sc0 sc1
-; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
-; UNALIGNED_GFX940-NEXT:    v_mov_b32_e32 v0, 15
-; UNALIGNED_GFX940-NEXT:    s_waitcnt lgkmcnt(0)
-; UNALIGNED_GFX940-NEXT:    s_lshl_b32 s1, s0, 2
-; UNALIGNED_GFX940-NEXT:    s_and_b32 s0, s0, 15
-; UNALIGNED_GFX940-NEXT:    s_addk_i32 s1, 0x100
-; UNALIGNED_GFX940-NEXT:    s_lshl_b32 s0, s0, 2
-; UNALIGNED_GFX940-NEXT:    scratch_store_dword off, v0, s1 sc0 sc1
-; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
-; UNALIGNED_GFX940-NEXT:    s_addk_i32 s0, 0x100
-; UNALIGNED_GFX940-NEXT:    scratch_load_dword v0, off, s0 sc0 sc1
-; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
-; UNALIGNED_GFX940-NEXT:    s_endpgm
+; UNALIGNED_GFX942-LABEL: store_load_sindex_small_offset_kernel:
+; UNALIGNED_GFX942:       ; %bb.0: ; %bb
+; UNALIGNED_GFX942-NEXT:    s_load_dword s0, s[4:5], 0x0
+; UNALIGNED_GFX942-NEXT:    scratch_load_dword v0, off, off sc0 sc1
+; UNALIGNED_GFX942-NEXT:    s_waitcnt vmcnt(0)
+; UNALIGNED_GFX942-NEXT:    v_mov_b32_e32 v0, 15
+; UNALIGNED_GFX942-NEXT:    s_waitcnt lgkmcnt(0)
+; UNALIGNED_GFX942-NEXT:    s_lshl_b32 s1, s0, 2
+; UNALIGNED_GFX942-NEXT:    s_and_b32 s0, s0, 15
+; UNALIGNED_GFX942-NEXT:    s_addk_i32 s1, 0x100
+; UNALIGNED_GFX942-NEXT:    s_lshl_b32 s0, s0, 2
+; UNALIGNED_GFX942-NEXT:    scratch_store_dword off, v0, s1 sc0 sc1
+; UNALIGNED_GFX942-NEXT:    s_waitcnt vmcnt(0)
+; UNALIGNED_GFX942-NEXT:    s_addk_i32 s0, 0x100
+; UNALIGNED_GFX942-NEXT:    scratch_load_dword v0, off, s0 sc0 sc1
+; UNALIGNED_GFX942-NEXT:    s_waitcnt vmcnt(0)
+; UNALIGNED_GFX942-NEXT:    s_endpgm
 ;
 ; UNALIGNED_GFX11-LABEL: store_load_sindex_small_offset_kernel:
 ; UNALIGNED_GFX11:       ; %bb.0: ; %bb
@@ -819,21 +819,21 @@ define amdgpu_kernel void @store_load_vindex_small_offset_kernel() {
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    s_endpgm
 ;
-; GFX940-LABEL: store_load_vindex_small_offset_kernel:
-; GFX940:       ; %bb.0: ; %bb
-; GFX940-NEXT:    scratch_load_dword v1, off, off sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
-; GFX940-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
-; GFX940-NEXT:    v_sub_u32_e32 v0, 0, v0
-; GFX940-NEXT:    v_mov_b32_e32 v2, 15
-; GFX940-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX940-NEXT:    scratch_store_dword v1, v2, off offset:256 sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_add_u32_e32 v0, 0x100, v0
-; GFX940-NEXT:    scratch_load_dword v0, v0, off offset:124 sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    s_endpgm
+; GFX942-LABEL: store_load_vindex_small_offset_kernel:
+; GFX942:       ; %bb.0: ; %bb
+; GFX942-NEXT:    scratch_load_dword v1, off, off sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
+; GFX942-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
+; GFX942-NEXT:    v_sub_u32_e32 v0, 0, v0
+; GFX942-NEXT:    v_mov_b32_e32 v2, 15
+; GFX942-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX942-NEXT:    scratch_store_dword v1, v2, off offset:256 sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_add_u32_e32 v0, 0x100, v0
+; GFX942-NEXT:    scratch_load_dword v0, v0, off offset:124 sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    s_endpgm
 ;
 ; GFX11-LABEL: store_load_vindex_small_offset_kernel:
 ; GFX11:       ; %bb.0: ; %bb
@@ -906,21 +906,21 @@ define amdgpu_kernel void @store_load_vindex_small_offset_kernel() {
 ; UNALIGNED_GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; UNALIGNED_GFX10-NEXT:    s_endpgm
 ;
-; UNALIGNED_GFX940-LABEL: store_load_vindex_small_offset_kernel:
-; UNALIGNED_GFX940:       ; %bb.0: ; %bb
-; UNALIGNED_GFX940-NEXT:    scratch_load_dword v1, off, off sc0 sc1
-; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
-; UNALIGNED_GFX940-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
-; UNALIGNED_GFX940-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
-; UNALIGNED_GFX940-NEXT:    v_sub_u32_e32 v0, 0, v0
-; UNALIGNED_GFX940-NEXT:    v_mov_b32_e32 v2, 15
-; UNALIGNED_GFX940-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; UNALIGNED_GFX940-NEXT:    scratch_store_dword v1, v2, off offset:256 sc0 sc1
-; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
-; UNALIGNED_GFX940-NEXT:    v_add_u32_e32 v0, 0x100, v0
-; UNALIGNED_GFX940-NEXT:    scratch_load_dword v0, v0, off offset:124 sc0 sc1
-; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
-; UNALIGNED_GFX940-NEXT:    s_endpgm
+; UNALIGNED_GFX942-LABEL: store_load_vindex_small_offset_kernel:
+; UNALIGNED_GFX942:       ; %bb.0: ; %bb
+; UNALIGNED_GFX942-NEXT:    scratch_load_dword v1, off, off sc0 sc1
+; UNALIGNED_GFX942-NEXT:    s_waitcnt vmcnt(0)
+; UNALIGNED_GFX942-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
+; UNALIGNED_GFX942-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
+; UNALIGNED_GFX942-NEXT:    v_sub_u32_e32 v0, 0, v0
+; UNALIGNED_GFX942-NEXT:    v_mov_b32_e32 v2, 15
+; UNALIGNED_GFX942-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; UNALIGNED_GFX942-NEXT:    scratch_store_dword v1, v2, off offset:256 sc0 sc1
+; UNALIGNED_GFX942-NEXT:    s_waitcnt vmcnt(0)
+; UNALIGNED_GFX942-NEXT:    v_add_u32_e32 v0, 0x100, v0
+; UNALIGNED_GFX942-NEXT:    scratch_load_dword v0, v0, off offset:124 sc0 sc1
+; UNALIGNED_GFX942-NEXT:    s_waitcnt vmcnt(0)
+; UNALIGNED_GFX942-NEXT:    s_endpgm
 ;
 ; UNALIGNED_GFX11-LABEL: store_load_vindex_small_offset_kernel:
 ; UNALIGNED_GFX11:       ; %bb.0: ; %bb
@@ -1007,22 +1007,22 @@ define void @store_load_vindex_small_offset_foo(i32 %idx) {
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: store_load_vindex_small_offset_foo:
-; GFX940:       ; %bb.0: ; %bb
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    scratch_load_dword v1, off, s32 sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
-; GFX940-NEXT:    v_add_u32_e32 v1, s32, v1
-; GFX940-NEXT:    v_add_u32_e32 v1, 0x100, v1
-; GFX940-NEXT:    v_mov_b32_e32 v2, 15
-; GFX940-NEXT:    v_and_b32_e32 v0, 15, v0
-; GFX940-NEXT:    scratch_store_dword v1, v2, off sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX940-NEXT:    scratch_load_dword v0, v0, s32 offset:256 sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: store_load_vindex_small_offset_foo:
+; GFX942:       ; %bb.0: ; %bb
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    scratch_load_dword v1, off, s32 sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
+; GFX942-NEXT:    v_add_u32_e32 v1, s32, v1
+; GFX942-NEXT:    v_add_u32_e32 v1, 0x100, v1
+; GFX942-NEXT:    v_mov_b32_e32 v2, 15
+; GFX942-NEXT:    v_and_b32_e32 v0, 15, v0
+; GFX942-NEXT:    scratch_store_dword v1, v2, off sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX942-NEXT:    scratch_load_dword v0, v0, s32 offset:256 sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: store_load_vindex_small_offset_foo:
 ; GFX11:       ; %bb.0: ; %bb
@@ -1098,22 +1098,22 @@ define void @store_load_vindex_small_offset_foo(i32 %idx) {
 ; UNALIGNED_GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; UNALIGNED_GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
-; UNALIGNED_GFX940-LABEL: store_load_vindex_small_offset_foo:
-; UNALIGNED_GFX940:       ; %bb.0: ; %bb
-; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; UNALIGNED_GFX940-NEXT:    scratch_load_dword v1, off, s32 sc0 sc1
-; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
-; UNALIGNED_GFX940-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
-; UNALIGNED_GFX940-NEXT:    v_add_u32_e32 v1, s32, v1
-; UNALIGNED_GFX940-NEXT:    v_add_u32_e32 v1, 0x100, v1
-; UNALIGNED_GFX940-NEXT:    v_mov_b32_e32 v2, 15
-; UNALIGNED_GFX940-NEXT:    v_and_b32_e32 v0, 15, v0
-; UNALIGNED_GFX940-NEXT:    scratch_store_dword v1, v2, off sc0 sc1
-; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
-; UNALIGNED_GFX940-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; UNALIGNED_GFX940-NEXT:    scratch_load_dword v0, v0, s32 offset:256 sc0 sc1
-; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
-; UNALIGNED_GFX940-NEXT:    s_setpc_b64 s[30:31]
+; UNALIGNED_GFX942-LABEL: store_load_vindex_small_offset_foo:
+; UNALIGNED_GFX942:       ; %bb.0: ; %bb
+; UNALIGNED_GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; UNALIGNED_GFX942-NEXT:    scratch_load_dword v1, off, s32 sc0 sc1
+; UNALIGNED_GFX942-NEXT:    s_waitcnt vmcnt(0)
+; UNALIGNED_GFX942-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
+; UNALIGNED_GFX942-NEXT:    v_add_u32_e32 v1, s32, v1
+; UNALIGNED_GFX942-NEXT:    v_add_u32_e32 v1, 0x100, v1
+; UNALIGNED_GFX942-NEXT:    v_mov_b32_e32 v2, 15
+; UNALIGNED_GFX942-NEXT:    v_and_b32_e32 v0, 15, v0
+; UNALIGNED_GFX942-NEXT:    scratch_store_dword v1, v2, off sc0 sc1
+; UNALIGNED_GFX942-NEXT:    s_waitcnt vmcnt(0)
+; UNALIGNED_GFX942-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; UNALIGNED_GFX942-NEXT:    scratch_load_dword v0, v0, s32 offset:256 sc0 sc1
+; UNALIGNED_GFX942-NEXT:    s_waitcnt vmcnt(0)
+; UNALIGNED_GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; UNALIGNED_GFX11-LABEL: store_load_vindex_small_offset_foo:
 ; UNALIGNED_GFX11:       ; %bb.0: ; %bb
@@ -1206,23 +1206,23 @@ define amdgpu_kernel void @store_load_sindex_large_offset_kernel(i32 %idx) {
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    s_endpgm
 ;
-; GFX940-LABEL: store_load_sindex_large_offset_kernel:
-; GFX940:       ; %bb.0: ; %bb
-; GFX940-NEXT:    s_load_dword s0, s[4:5], 0x0
-; GFX940-NEXT:    scratch_load_dword v0, off, off offset:4 sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v0, 15
-; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-NEXT:    s_lshl_b32 s1, s0, 2
-; GFX940-NEXT:    s_and_b32 s0, s0, 15
-; GFX940-NEXT:    s_addk_i32 s1, 0x4004
-; GFX940-NEXT:    s_lshl_b32 s0, s0, 2
-; GFX940-NEXT:    scratch_store_dword off, v0, s1 sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    s_addk_i32 s0, 0x4004
-; GFX940-NEXT:    scratch_load_dword v0, off, s0 sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    s_endpgm
+; GFX942-LABEL: store_load_sindex_large_offset_kernel:
+; GFX942:       ; %bb.0: ; %bb
+; GFX942-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX942-NEXT:    scratch_load_dword v0, off, off offset:4 sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v0, 15
+; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-NEXT:    s_lshl_b32 s1, s0, 2
+; GFX942-NEXT:    s_and_b32 s0, s0, 15
+; GFX942-NEXT:    s_addk_i32 s1, 0x4004
+; GFX942-NEXT:    s_lshl_b32 s0, s0, 2
+; GFX942-NEXT:    scratch_store_dword off, v0, s1 sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    s_addk_i32 s0, 0x4004
+; GFX942-NEXT:    scratch_load_dword v0, off, s0 sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    s_endpgm
 ;
 ; GFX11-LABEL: store_load_sindex_large_offset_kernel:
 ; GFX11:       ; %bb.0: ; %bb
@@ -1302,23 +1302,23 @@ define amdgpu_kernel void @store_load_sindex_large_offset_kernel(i32 %idx) {
 ; UNALIGNED_GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; UNALIGNED_GFX10-NEXT:    s_endpgm
 ;
-; UNALIGNED_GFX940-LABEL: store_load_sindex_large_offset_kernel:
-; UNALIGNED_GFX940:       ; %bb.0: ; %bb
-; UNALIGNED_GFX940-NEXT:    s_load_dword s0, s[4:5], 0x0
-; UNALIGNED_GFX940-NEXT:    scratch_load_dword v0, off, off offset:4 sc0 sc1
-; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
-; UNALIGNED_GFX940-NEXT:    v_mov_b32_e32 v0, 15
-; UNALIGNED_GFX940-NEXT:    s_waitcnt lgkmcnt(0)
-; UNALIGNED_GFX940-NEXT:    s_lshl_b32 s1, s0, 2
-; UNALIGNED_GFX940-NEXT:    s_and_b32 s0, s0, 15
-; UNALIGNED_GFX940-NEXT:    s_addk_i32 s1, 0x4004
-; UNALIGNED_GFX940-NEXT:    s_lshl_b32 s0, s0, 2
-; UNALIGNED_GFX940-NEXT:    scratch_store_dword off, v0, s1 sc0 sc1
-; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
-; UNALIGNED_GFX940-NEXT:    s_addk_i32 s0, 0x4004
-; UNALIGNED_GFX940-NEXT:    scratch_load_dword v0, off, s0 sc0 sc1
-; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
-; UNALIGNED_GFX940-NEXT:    s_endpgm
+; UNALIGNED_GFX942-LABEL: store_load_sindex_large_offset_kernel:
+; UNALIGNED_GFX942:       ; %bb.0: ; %bb
+; UNALIGNED_GFX942-NEXT:    s_load_dword s0, s[4:5], 0x0
+; UNALIGNED_GFX942-NEXT:    scratch_load_dword v0, off, off offset:4 sc0 sc1
+; UNALIGNED_GFX942-NEXT:    s_waitcnt vmcnt(0)
+; UNALIGNED_GFX942-NEXT:    v_mov_b32_e32 v0, 15
+; UNALIGNED_GFX942-NEXT:    s_waitcnt lgkmcnt(0)
+; UNALIGNED_GFX942-NEXT:    s_lshl_b32 s1, s0, 2
+; UNALIGNED_GFX942-NEXT:    s_and_b32 s0, s0, 15
+; UNALIGNED_GFX942-NEXT:    s_addk_i32 s1, 0x4004
+; UNALIGNED_GFX942-NEXT:    s_lshl_b32 s0, s0, 2
+; UNALIGNED_GFX942-NEXT:    scratch_store_dword off, v0, s1 sc0 sc1
+; UNALIGNED_GFX942-NEXT:    s_waitcnt vmcnt(0)
+; UNALIGNED_GFX942-NEXT:    s_addk_i32 s0, 0x4004
+; UNALIGNED_GFX942-NEXT:    scratch_load_dword v0, off, s0 sc0 sc1
+; UNALIGNED_GFX942-NEXT:    s_waitcnt vmcnt(0)
+; UNALIGNED_GFX942-NEXT:    s_endpgm
 ;
 ; UNALIGNED_GFX11-LABEL: store_load_sindex_large_offset_kernel:
 ; UNALIGNED_GFX11:       ; %bb.0: ; %bb
@@ -1408,22 +1408,22 @@ define amdgpu_kernel void @store_load_vindex_large_offset_kernel() {
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    s_endpgm
 ;
-; GFX940-LABEL: store_load_vindex_large_offset_kernel:
-; GFX940:       ; %bb.0: ; %bb
-; GFX940-NEXT:    scratch_load_dword v1, off, off offset:4 sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
-; GFX940-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
-; GFX940-NEXT:    v_sub_u32_e32 v0, 0, v0
-; GFX940-NEXT:    v_mov_b32_e32 v2, 15
-; GFX940-NEXT:    s_movk_i32 s0, 0x4004
-; GFX940-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX940-NEXT:    scratch_store_dword v1, v2, s0 sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_add_u32_e32 v0, 0x4004, v0
-; GFX940-NEXT:    scratch_load_dword v0, v0, off offset:124 sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    s_endpgm
+; GFX942-LABEL: store_load_vindex_large_offset_kernel:
+; GFX942:       ; %bb.0: ; %bb
+; GFX942-NEXT:    scratch_load_dword v1, off, off offset:4 sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
+; GFX942-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
+; GFX942-NEXT:    v_sub_u32_e32 v0, 0, v0
+; GFX942-NEXT:    v_mov_b32_e32 v2, 15
+; GFX942-NEXT:    s_movk_i32 s0, 0x4004
+; GFX942-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX942-NEXT:    scratch_store_dword v1, v2, s0 sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_add_u32_e32 v0, 0x4004, v0
+; GFX942-NEXT:    scratch_load_dword v0, v0, off offset:124 sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    s_endpgm
 ;
 ; GFX11-LABEL: store_load_vindex_large_offset_kernel:
 ; GFX11:       ; %bb.0: ; %bb
@@ -1497,22 +1497,22 @@ define amdgpu_kernel void @store_load_vindex_large_offset_kernel() {
 ; UNALIGNED_GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; UNALIGNED_GFX10-NEXT:    s_endpgm
 ;
-; UNALIGNED_GFX940-LABEL: store_load_vindex_large_offset_kernel:
-; UNALIGNED_GFX940:       ; %bb.0: ; %bb
-; UNALIGNED_GFX940-NEXT:    scratch_load_dword v1, off, off offset:4 sc0 sc1
-; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
-; UNALIGNED_GFX940-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
-; UNALIGNED_GFX940-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
-; UNALIGNED_GFX940-NEXT:    v_sub_u32_e32 v0, 0, v0
-; UNALIGNED_GFX940-NEXT:    v_mov_b32_e32 v2, 15
-; UNALIGNED_GFX940-NEXT:    s_movk_i32 s0, 0x4004
-; UNALIGNED_GFX940-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; UNALIGNED_GFX940-NEXT:    scratch_store_dword v1, v2, s0 sc0 sc1
-; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
-; UNALIGNED_GFX940-NEXT:    v_add_u32_e32 v0, 0x4004, v0
-; UNALIGNED_GFX940-NEXT:    scratch_load_dword v0, v0, off offset:124 sc0 sc1
-; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
-; UNALIGNED_GFX940-NEXT:    s_endpgm
+; UNALIGNED_GFX942-LABEL: store_load_vindex_large_offset_kernel:
+; UNALIGNED_GFX942:       ; %bb.0: ; %bb
+; UNALIGNED_GFX942-NEXT:    scratch_load_dword v1, off, off offset:4 sc0 sc1
+; UNALIGNED_GFX942-NEXT:    s_waitcnt vmcnt(0)
+; UNALIGNED_GFX942-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
+; UNALIGNED_GFX942-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
+; UNALIGNED_GFX942-NEXT:    v_sub_u32_e32 v0, 0, v0
+; UNALIGNED_GFX942-NEXT:    v_mov_b32_e32 v2, 15
+; UNALIGNED_GFX942-NEXT:    s_movk_i32 s0, 0x4004
+; UNALIGNED_GFX942-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; UNALIGNED_GFX942-NEXT:    scratch_store_dword v1, v2, s0 sc0 sc1
+; UNALIGNED_GFX942-NEXT:    s_waitcnt vmcnt(0)
+; UNALIGNED_GFX942-NEXT:    v_add_u32_e32 v0, 0x4004, v0
+; UNALIGNED_GFX942-NEXT:    scratch_load_dword v0, v0, off offset:124 sc0 sc1
+; UNALIGNED_GFX942-NEXT:    s_waitcnt vmcnt(0)
+; UNALIGNED_GFX942-NEXT:    s_endpgm
 ;
 ; UNALIGNED_GFX11-LABEL: store_load_vindex_large_offset_kernel:
 ; UNALIGNED_GFX11:       ; %bb.0: ; %bb
@@ -1600,23 +1600,23 @@ define void @store_load_vindex_large_offset_foo(i32 %idx) {
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: store_load_vindex_large_offset_foo:
-; GFX940:       ; %bb.0: ; %bb
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    scratch_load_dword v1, off, s32 offset:4 sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
-; GFX940-NEXT:    v_add_u32_e32 v1, s32, v1
-; GFX940-NEXT:    v_add_u32_e32 v1, 0x4004, v1
-; GFX940-NEXT:    v_mov_b32_e32 v2, 15
-; GFX940-NEXT:    v_and_b32_e32 v0, 15, v0
-; GFX940-NEXT:    scratch_store_dword v1, v2, off sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX940-NEXT:    s_add_i32 s0, s32, 0x4004
-; GFX940-NEXT:    scratch_load_dword v0, v0, s0 sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: store_load_vindex_large_offset_foo:
+; GFX942:       ; %bb.0: ; %bb
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    scratch_load_dword v1, off, s32 offset:4 sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
+; GFX942-NEXT:    v_add_u32_e32 v1, s32, v1
+; GFX942-NEXT:    v_add_u32_e32 v1, 0x4004, v1
+; GFX942-NEXT:    v_mov_b32_e32 v2, 15
+; GFX942-NEXT:    v_and_b32_e32 v0, 15, v0
+; GFX942-NEXT:    scratch_store_dword v1, v2, off sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX942-NEXT:    s_add_i32 s0, s32, 0x4004
+; GFX942-NEXT:    scratch_load_dword v0, v0, s0 sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: store_load_vindex_large_offset_foo:
 ; GFX11:       ; %bb.0: ; %bb
@@ -1693,23 +1693,23 @@ define void @store_load_vindex_large_offset_foo(i32 %idx) {
 ; UNALIGNED_GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; UNALIGNED_GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
-; UNALIGNED_GFX940-LABEL: store_load_vindex_large_offset_foo:
-; UNALIGNED_GFX940:       ; %bb.0: ; %bb
-; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; UNALIGNED_GFX940-NEXT:    scratch_load_dword v1, off, s32 offset:4 sc0 sc1
-; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
-; UNALIGNED_GFX940-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
-; UNALIGNED_GFX940-NEXT:    v_add_u32_e32 v1, s32, v1
-; UNALIGNED_GFX940-NEXT:    v_add_u32_e32 v1, 0x4004, v1
-; UNALIGNED_GFX940-NEXT:    v_mov_b32_e32 v2, 15
-; UNALIGNED_GFX940-NEXT:    v_and_b32_e32 v0, 15, v0
-; UNALIGNED_GFX940-NEXT:    scratch_store_dword v1, v2, off sc0 sc1
-; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
-; UNALIGNED_GFX940-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; UNALIGNED_GFX940-NEXT:    s_add_i32 s0, s32, 0x4004
-; UNALIGNED_GFX940-NEXT:    scratch_load_dword v0, v0, s0 sc0 sc1
-; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
-; UNALIGNED_GFX940-NEXT:    s_setpc_b64 s[30:31]
+; UNALIGNED_GFX942-LABEL: store_load_vindex_large_offset_foo:
+; UNALIGNED_GFX942:       ; %bb.0: ; %bb
+; UNALIGNED_GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; UNALIGNED_GFX942-NEXT:    scratch_load_dword v1, off, s32 offset:4 sc0 sc1
+; UNALIGNED_GFX942-NEXT:    s_waitcnt vmcnt(0)
+; UNALIGNED_GFX942-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
+; UNALIGNED_GFX942-NEXT:    v_add_u32_e32 v1, s32, v1
+; UNALIGNED_GFX942-NEXT:    v_add_u32_e32 v1, 0x4004, v1
+; UNALIGNED_GFX942-NEXT:    v_mov_b32_e32 v2, 15
+; UNALIGNED_GFX942-NEXT:    v_and_b32_e32 v0, 15, v0
+; UNALIGNED_GFX942-NEXT:    scratch_store_dword v1, v2, off sc0 sc1
+; UNALIGNED_GFX942-NEXT:    s_waitcnt vmcnt(0)
+; UNALIGNED_GFX942-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; UNALIGNED_GFX942-NEXT:    s_add_i32 s0, s32, 0x4004
+; UNALIGNED_GFX942-NEXT:    scratch_load_dword v0, v0, s0 sc0 sc1
+; UNALIGNED_GFX942-NEXT:    s_waitcnt vmcnt(0)
+; UNALIGNED_GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; UNALIGNED_GFX11-LABEL: store_load_vindex_large_offset_foo:
 ; UNALIGNED_GFX11:       ; %bb.0: ; %bb
@@ -1796,19 +1796,19 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    s_endpgm
 ;
-; GFX940-LABEL: store_load_large_imm_offset_kernel:
-; GFX940:       ; %bb.0: ; %bb
-; GFX940-NEXT:    v_mov_b32_e32 v0, 13
-; GFX940-NEXT:    s_movk_i32 s0, 0x3e80
-; GFX940-NEXT:    scratch_store_dword off, v0, off offset:4 sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v0, 15
-; GFX940-NEXT:    s_add_i32 s0, s0, 4
-; GFX940-NEXT:    scratch_store_dword off, v0, s0 sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    scratch_load_dword v0, off, s0 sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    s_endpgm
+; GFX942-LABEL: store_load_large_imm_offset_kernel:
+; GFX942:       ; %bb.0: ; %bb
+; GFX942-NEXT:    v_mov_b32_e32 v0, 13
+; GFX942-NEXT:    s_movk_i32 s0, 0x3e80
+; GFX942-NEXT:    scratch_store_dword off, v0, off offset:4 sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v0, 15
+; GFX942-NEXT:    s_add_i32 s0, s0, 4
+; GFX942-NEXT:    scratch_store_dword off, v0, s0 sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    scratch_load_dword v0, off, s0 sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    s_endpgm
 ;
 ; GFX11-LABEL: store_load_large_imm_offset_kernel:
 ; GFX11:       ; %bb.0: ; %bb
@@ -1870,19 +1870,19 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
 ; UNALIGNED_GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; UNALIGNED_GFX10-NEXT:    s_endpgm
 ;
-; UNALIGNED_GFX940-LABEL: store_load_large_imm_offset_kernel:
-; UNALIGNED_GFX940:       ; %bb.0: ; %bb
-; UNALIGNED_GFX940-NEXT:    v_mov_b32_e32 v0, 13
-; UNALIGNED_GFX940-NEXT:    s_movk_i32 s0, 0x3e80
-; UNALIGNED_GFX940-NEXT:    scratch_store_dword off, v0, off offset:4 sc0 sc1
-; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
-; UNALIGNED_GFX940-NEXT:    v_mov_b32_e32 v0, 15
-; UNALIGNED_GFX940-NEXT:    s_add_i32 s0, s0, 4
-; UNALIGNED_GFX940-NEXT:    scratch_store_dword off, v0, s0 sc0 sc1
-; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
-; UNALIGNED_GFX940-NEXT:    scratch_load_dword v0, off, s0 sc0 sc1
-; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
-; UNALIGNED_GFX940-NEXT:    s_endpgm
+; UNALIGNED_GFX942-LABEL: store_load_large_imm_offset_kernel:
+; UNALIGNED_GFX942:       ; %bb.0: ; %bb
+; UNALIGNED_GFX942-NEXT:    v_mov_b32_e32 v0, 13
+; UNALIGNED_GFX942-NEXT:    s_movk_i32 s0, 0x3e80
+; UNALIGNED_GFX942-NEXT:    scratch_store_dword off, v0, off offset:4 sc0 sc1
+; UNALIGNED_GFX942-NEXT:    s_waitcnt vmcnt(0)
+; UNALIGNED_GFX942-NEXT:    v_mov_b32_e32 v0, 15
+; UNALIGNED_GFX942-NEXT:    s_add_i32 s0, s0, 4
+; UNALIGNED_GFX942-NEXT:    scratch_store_dword off, v0, s0 sc0 sc1
+; UNALIGNED_GFX942-NEXT:    s_waitcnt vmcnt(0)
+; UNALIGNED_GFX942-NEXT:    scratch_load_dword v0, off, s0 sc0 sc1
+; UNALIGNED_GFX942-NEXT:    s_waitcnt vmcnt(0)
+; UNALIGNED_GFX942-NEXT:    s_endpgm
 ;
 ; UNALIGNED_GFX11-LABEL: store_load_large_imm_offset_kernel:
 ; UNALIGNED_GFX11:       ; %bb.0: ; %bb
@@ -1952,21 +1952,21 @@ define void @store_load_large_imm_offset_foo() {
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: store_load_large_imm_offset_foo:
-; GFX940:       ; %bb.0: ; %bb
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    s_movk_i32 s0, 0x3e80
-; GFX940-NEXT:    v_mov_b32_e32 v0, 13
-; GFX940-NEXT:    s_add_i32 s1, s32, s0
-; GFX940-NEXT:    scratch_store_dword off, v0, s32 offset:4 sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v0, 15
-; GFX940-NEXT:    s_add_i32 s0, s1, 4
-; GFX940-NEXT:    scratch_store_dword off, v0, s0 sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    scratch_load_dword v0, off, s0 sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: store_load_large_imm_offset_foo:
+; GFX942:       ; %bb.0: ; %bb
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    s_movk_i32 s0, 0x3e80
+; GFX942-NEXT:    v_mov_b32_e32 v0, 13
+; GFX942-NEXT:    s_add_i32 s1, s32, s0
+; GFX942-NEXT:    scratch_store_dword off, v0, s32 offset:4 sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v0, 15
+; GFX942-NEXT:    s_add_i32 s0, s1, 4
+; GFX942-NEXT:    scratch_store_dword off, v0, s0 sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    scratch_load_dword v0, off, s0 sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: store_load_large_imm_offset_foo:
 ; GFX11:       ; %bb.0: ; %bb
@@ -2033,21 +2033,21 @@ define void @store_load_large_imm_offset_foo() {
 ; UNALIGNED_GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; UNALIGNED_GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
-; UNALIGNED_GFX940-LABEL: store_load_large_imm_offset_foo:
-; UNALIGNED_GFX940:       ; %bb.0: ; %bb
-; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; UNALIGNED_GFX940-NEXT:    s_movk_i32 s0, 0x3e80
-; UNALIGNED_GFX940-NEXT:    v_mov_b32_e32 v0, 13
-; UNALIGNED_GFX940-NEXT:    s_add_i32 s1, s32, s0
-; UNALIGNED_GFX940-NEXT:    scratch_store_dword off, v0, s32 offset:4 sc0 sc1
-; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
-; UNALIGNED_GFX940-NEXT:    v_mov_b32_e32 v0, 15
-; UNALIGNED_GFX940-NEXT:    s_add_i32 s0, s1, 4
-; UNALIGNED_GFX940-NEXT:    scratch_store_dword off, v0, s0 sc0 sc1
-; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
-; UNALIGNED_GFX940-NEXT:    scratch_load_dword v0, off, s0 sc0 sc1
-; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
-; UNALIGNED_GFX940-NEXT:    s_setpc_b64 s[30:31]
+; UNALIGNED_GFX942-LABEL: store_load_large_imm_offset_foo:
+; UNALIGNED_GFX942:       ; %bb.0: ; %bb
+; UNALIGNED_GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; UNALIGNED_GFX942-NEXT:    s_movk_i32 s0, 0x3e80
+; UNALIGNED_GFX942-NEXT:    v_mov_b32_e32 v0, 13
+; UNALIGNED_GFX942-NEXT:    s_add_i32 s1, s32, s0
+; UNALIGNED_GFX942-NEXT:    scratch_store_dword off, v0, s32 offset:4 sc0 sc1
+; UNALIGNED_GFX942-NEXT:    s_waitcnt vmcnt(0)
+; UNALIGNED_GFX942-NEXT:    v_mov_b32_e32 v0, 15
+; UNALIGNED_GFX942-NEXT:    s_add_i32 s0, s1, 4
+; UNALIGNED_GFX942-NEXT:    scratch_store_dword off, v0, s0 sc0 sc1
+; UNALIGNED_GFX942-NEXT:    s_waitcnt vmcnt(0)
+; UNALIGNED_GFX942-NEXT:    scratch_load_dword v0, off, s0 sc0 sc1
+; UNALIGNED_GFX942-NEXT:    s_waitcnt vmcnt(0)
+; UNALIGNED_GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; UNALIGNED_GFX11-LABEL: store_load_large_imm_offset_foo:
 ; UNALIGNED_GFX11:       ; %bb.0: ; %bb
@@ -2123,18 +2123,18 @@ define amdgpu_kernel void @store_load_vidx_sidx_offset(i32 %sidx) {
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    s_endpgm
 ;
-; GFX940-LABEL: store_load_vidx_sidx_offset:
-; GFX940:       ; %bb.0: ; %bb
-; GFX940-NEXT:    s_load_dword s0, s[4:5], 0x0
-; GFX940-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
-; GFX940-NEXT:    v_mov_b32_e32 v1, 15
-; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-NEXT:    v_add_lshl_u32 v0, s0, v0, 2
-; GFX940-NEXT:    scratch_store_dword v0, v1, off offset:1024 sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    scratch_load_dword v0, v0, off offset:1024 sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    s_endpgm
+; GFX942-LABEL: store_load_vidx_sidx_offset:
+; GFX942:       ; %bb.0: ; %bb
+; GFX942-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX942-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
+; GFX942-NEXT:    v_mov_b32_e32 v1, 15
+; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-NEXT:    v_add_lshl_u32 v0, s0, v0, 2
+; GFX942-NEXT:    scratch_store_dword v0, v1, off offset:1024 sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    scratch_load_dword v0, v0, off offset:1024 sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    s_endpgm
 ;
 ; GFX11-LABEL: store_load_vidx_sidx_offset:
 ; GFX11:       ; %bb.0: ; %bb
@@ -2192,18 +2192,18 @@ define amdgpu_kernel void @store_load_vidx_sidx_offset(i32 %sidx) {
 ; UNALIGNED_GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; UNALIGNED_GFX10-NEXT:    s_endpgm
 ;
-; UNALIGNED_GFX940-LABEL: store_load_vidx_sidx_offset:
-; UNALIGNED_GFX940:       ; %bb.0: ; %bb
-; UNALIGNED_GFX940-NEXT:    s_load_dword s0, s[4:5], 0x0
-; UNALIGNED_GFX940-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
-; UNALIGNED_GFX940-NEXT:    v_mov_b32_e32 v1, 15
-; UNALIGNED_GFX940-NEXT:    s_waitcnt lgkmcnt(0)
-; UNALIGNED_GFX940-NEXT:    v_add_lshl_u32 v0, s0, v0, 2
-; UNALIGNED_GFX940-NEXT:    scratch_store_dword v0, v1, off offset:1024 sc0 sc1
-; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
-; UNALIGNED_GFX940-NEXT:    scratch_load_dword v0, v0, off offset:1024 sc0 sc1
-; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
-; UNALIGNED_GFX940-NEXT:    s_endpgm
+; UNALIGNED_GFX942-LABEL: store_load_vidx_sidx_offset:
+; UNALIGNED_GFX942:       ; %bb.0: ; %bb
+; UNALIGNED_GFX942-NEXT:    s_load_dword s0, s[4:5], 0x0
+; UNALIGNED_GFX942-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
+; UNALIGNED_GFX942-NEXT:    v_mov_b32_e32 v1, 15
+; UNALIGNED_GFX942-NEXT:    s_waitcnt lgkmcnt(0)
+; UNALIGNED_GFX942-NEXT:    v_add_lshl_u32 v0, s0, v0, 2
+; UNALIGNED_GFX942-NEXT:    scratch_store_dword v0, v1, off offset:1024 sc0 sc1
+; UNALIGNED_GFX942-NEXT:    s_waitcnt vmcnt(0)
+; UNALIGNED_GFX942-NEXT:    scratch_load_dword v0, v0, off offset:1024 sc0 sc1
+; UNALIGNED_GFX942-NEXT:    s_waitcnt vmcnt(0)
+; UNALIGNED_GFX942-NEXT:    s_endpgm
 ;
 ; UNALIGNED_GFX11-LABEL: store_load_vidx_sidx_offset:
 ; UNALIGNED_GFX11:       ; %bb.0: ; %bb
@@ -2264,15 +2264,15 @@ define void @store_load_i64_aligned(ptr addrspace(5) nocapture %arg) {
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: store_load_i64_aligned:
-; GFX940:       ; %bb.0: ; %bb
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b64_e32 v[2:3], 15
-; GFX940-NEXT:    scratch_store_dwordx2 v0, v[2:3], off sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    scratch_load_dwordx2 v[0:1], v0, off sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: store_load_i64_aligned:
+; GFX942:       ; %bb.0: ; %bb
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b64_e32 v[2:3], 15
+; GFX942-NEXT:    scratch_store_dwordx2 v0, v[2:3], off sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    scratch_load_dwordx2 v[0:1], v0, off sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: store_load_i64_aligned:
 ; GFX11:       ; %bb.0: ; %bb
@@ -2323,15 +2323,15 @@ define void @store_load_i64_aligned(ptr addrspace(5) nocapture %arg) {
 ; UNALIGNED_GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; UNALIGNED_GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
-; UNALIGNED_GFX940-LABEL: store_load_i64_aligned:
-; UNALIGNED_GFX940:       ; %bb.0: ; %bb
-; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; UNALIGNED_GFX940-NEXT:    v_mov_b64_e32 v[2:3], 15
-; UNALIGNED_GFX940-NEXT:    scratch_store_dwordx2 v0, v[2:3], off sc0 sc1
-; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
-; UNALIGNED_GFX940-NEXT:    scratch_load_dwordx2 v[0:1], v0, off sc0 sc1
-; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
-; UNALIGNED_GFX940-NEXT:    s_setpc_b64 s[30:31]
+; UNALIGNED_GFX942-LABEL: store_load_i64_aligned:
+; UNALIGNED_GFX942:       ; %bb.0: ; %bb
+; UNALIGNED_GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; UNALIGNED_GFX942-NEXT:    v_mov_b64_e32 v[2:3], 15
+; UNALIGNED_GFX942-NEXT:    scratch_store_dwordx2 v0, v[2:3], off sc0 sc1
+; UNALIGNED_GFX942-NEXT:    s_waitcnt vmcnt(0)
+; UNALIGNED_GFX942-NEXT:    scratch_load_dwordx2 v[0:1], v0, off sc0 sc1
+; UNALIGNED_GFX942-NEXT:    s_waitcnt vmcnt(0)
+; UNALIGNED_GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; UNALIGNED_GFX11-LABEL: store_load_i64_aligned:
 ; UNALIGNED_GFX11:       ; %bb.0: ; %bb
@@ -2388,15 +2388,15 @@ define void @store_load_i64_unaligned(ptr addrspace(5) nocapture %arg) {
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: store_load_i64_unaligned:
-; GFX940:       ; %bb.0: ; %bb
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b64_e32 v[2:3], 15
-; GFX940-NEXT:    scratch_store_dwordx2 v0, v[2:3], off sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    scratch_load_dwordx2 v[0:1], v0, off sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: store_load_i64_unaligned:
+; GFX942:       ; %bb.0: ; %bb
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b64_e32 v[2:3], 15
+; GFX942-NEXT:    scratch_store_dwordx2 v0, v[2:3], off sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    scratch_load_dwordx2 v[0:1], v0, off sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: store_load_i64_unaligned:
 ; GFX11:       ; %bb.0: ; %bb
@@ -2525,59 +2525,59 @@ define void @store_load_i64_unaligned(ptr addrspace(5) nocapture %arg) {
 ; UNALIGNED_GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; UNALIGNED_GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
-; UNALIGNED_GFX940-LABEL: store_load_i64_unaligned:
-; UNALIGNED_GFX940:       ; %bb.0: ; %bb
-; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; UNALIGNED_GFX940-NEXT:    v_mov_b32_e32 v4, 15
-; UNALIGNED_GFX940-NEXT:    v_add_u32_e32 v1, 4, v0
-; UNALIGNED_GFX940-NEXT:    v_add_u32_e32 v2, 2, v0
-; UNALIGNED_GFX940-NEXT:    v_add_u32_e32 v3, 1, v0
-; UNALIGNED_GFX940-NEXT:    scratch_store_byte v0, v4, off sc0 sc1
-; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
-; UNALIGNED_GFX940-NEXT:    v_mov_b32_e32 v4, 0
-; UNALIGNED_GFX940-NEXT:    v_add_u32_e32 v6, 6, v0
-; UNALIGNED_GFX940-NEXT:    scratch_store_byte v3, v4, off sc0 sc1
-; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
-; UNALIGNED_GFX940-NEXT:    v_add_u32_e32 v5, 3, v0
-; UNALIGNED_GFX940-NEXT:    scratch_store_byte v2, v4, off sc0 sc1
-; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
-; UNALIGNED_GFX940-NEXT:    scratch_store_byte v5, v4, off sc0 sc1
-; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
-; UNALIGNED_GFX940-NEXT:    v_add_u32_e32 v7, 5, v0
-; UNALIGNED_GFX940-NEXT:    scratch_store_byte v1, v4, off sc0 sc1
-; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
-; UNALIGNED_GFX940-NEXT:    scratch_store_byte v7, v4, off sc0 sc1
-; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
-; UNALIGNED_GFX940-NEXT:    v_add_u32_e32 v8, 7, v0
-; UNALIGNED_GFX940-NEXT:    scratch_store_byte v6, v4, off sc0 sc1
-; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
-; UNALIGNED_GFX940-NEXT:    scratch_store_byte v8, v4, off sc0 sc1
-; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
-; UNALIGNED_GFX940-NEXT:    scratch_load_ubyte v4, v0, off sc0 sc1
-; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
-; UNALIGNED_GFX940-NEXT:    ; kill: killed $vgpr7
-; UNALIGNED_GFX940-NEXT:    ; kill: killed $vgpr2
-; UNALIGNED_GFX940-NEXT:    ; kill: killed $vgpr6
-; UNALIGNED_GFX940-NEXT:    ; kill: killed $vgpr1
-; UNALIGNED_GFX940-NEXT:    ; kill: killed $vgpr3
-; UNALIGNED_GFX940-NEXT:    ; kill: killed $vgpr5
-; UNALIGNED_GFX940-NEXT:    ; kill: killed $vgpr8
-; UNALIGNED_GFX940-NEXT:    ; kill: killed $vgpr0
-; UNALIGNED_GFX940-NEXT:    scratch_load_ubyte v4, v3, off sc0 sc1
-; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
-; UNALIGNED_GFX940-NEXT:    scratch_load_ubyte v4, v2, off sc0 sc1
-; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
-; UNALIGNED_GFX940-NEXT:    scratch_load_ubyte v4, v5, off sc0 sc1
-; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
-; UNALIGNED_GFX940-NEXT:    scratch_load_ubyte v4, v1, off sc0 sc1
-; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
-; UNALIGNED_GFX940-NEXT:    scratch_load_ubyte v4, v7, off sc0 sc1
-; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
-; UNALIGNED_GFX940-NEXT:    scratch_load_ubyte v4, v6, off sc0 sc1
-; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
-; UNALIGNED_GFX940-NEXT:    scratch_load_ubyte v4, v8, off sc0 sc1
-; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
-; UNALIGNED_GFX940-NEXT:    s_setpc_b64 s[30:31]
+; UNALIGNED_GFX942-LABEL: store_load_i64_unaligned:
+; UNALIGNED_GFX942:       ; %bb.0: ; %bb
+; UNALIGNED_GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; UNALIGNED_GFX942-NEXT:    v_mov_b32_e32 v4, 15
+; UNALIGNED_GFX942-NEXT:    v_add_u32_e32 v1, 4, v0
+; UNALIGNED_GFX942-NEXT:    v_add_u32_e32 v2, 2, v0
+; UNALIGNED_GFX942-NEXT:    v_add_u32_e32 v3, 1, v0
+; UNALIGNED_GFX942-NEXT:    scratch_store_byte v0, v4, off sc0 sc1
+; UNALIGNED_GFX942-NEXT:    s_waitcnt vmcnt(0)
+; UNALIGNED_GFX942-NEXT:    v_mov_b32_e32 v4, 0
+; UNALIGNED_GFX942-NEXT:    v_add_u32_e32 v6, 6, v0
+; UNALIGNED_GFX942-NEXT:    scratch_store_byte v3, v4, off sc0 sc1
+; UNALIGNED_GFX942-NEXT:    s_waitcnt vmcnt(0)
+; UNALIGNED_GFX942-NEXT:    v_add_u32_e32 v5, 3, v0
+; UNALIGNED_GFX942-NEXT:    scratch_store_byte v2, v4, off sc0 sc1
+; UNALIGNED_GFX942-NEXT:    s_waitcnt vmcnt(0)
+; UNALIGNED_GFX942-NEXT:    scratch_store_byte v5, v4, off sc0 sc1
+; UNALIGNED_GFX942-NEXT:    s_waitcnt vmcnt(0)
+; UNALIGNED_GFX942-NEXT:    v_add_u32_e32 v7, 5, v0
+; UNALIGNED_GFX942-NEXT:    scratch_store_byte v1, v4, off sc0 sc1
+; UNALIGNED_GFX942-NEXT:    s_waitcnt vmcnt(0)
+; UNALIGNED_GFX942-NEXT:    scratch_store_byte v7, v4, off sc0 sc1
+; UNALIGNED_GFX942-NEXT:    s_waitcnt vmcnt(0)
+; UNALIGNED_GFX942-NEXT:    v_add_u32_e32 v8, 7, v0
+; UNALIGNED_GFX942-NEXT:    scratch_store_byte v6, v4, off sc0 sc1
+; UNALIGNED_GFX942-NEXT:    s_waitcnt vmcnt(0)
+; UNALIGNED_GFX942-NEXT:    scratch_store_byte v8, v4, off sc0 sc1
+; UNALIGNED_GFX942-NEXT:    s_waitcnt vmcnt(0)
+; UNALIGNED_GFX942-NEXT:    scratch_load_ubyte v4, v0, off sc0 sc1
+; UNALIGNED_GFX942-NEXT:    s_waitcnt vmcnt(0)
+; UNALIGNED_GFX942-NEXT:    ; kill: killed $vgpr7
+; UNALIGNED_GFX942-NEXT:    ; kill: killed $vgpr2
+; UNALIGNED_GFX942-NEXT:    ; kill: killed $vgpr6
+; UNALIGNED_GFX942-NEXT:    ; kill: killed $vgpr1
+; UNALIGNED_GFX942-NEXT:    ; kill: killed $vgpr3
+; UNALIGNED_GFX942-NEXT:    ; kill: killed $vgpr5
+; UNALIGNED_GFX942-NEXT:    ; kill: killed $vgpr8
+; UNALIGNED_GFX942-NEXT:    ; kill: killed $vgpr0
+; UNALIGNED_GFX942-NEXT:    scratch_load_ubyte v4, v3, off sc0 sc1
+; UNALIGNED_GFX942-NEXT:    s_waitcnt vmcnt(0)
+; UNALIGNED_GFX942-NEXT:    scratch_load_ubyte v4, v2, off sc0 sc1
+; UNALIGNED_GFX942-NEXT:    s_waitcnt vmcnt(0)
+; UNALIGNED_GFX942-NEXT:    scratch_load_ubyte v4, v5, off sc0 sc1
+; UNALIGNED_GFX942-NEXT:    s_waitcnt vmcnt(0)
+; UNALIGNED_GFX942-NEXT:    scratch_load_ubyte v4, v1, off sc0 sc1
+; UNALIGNED_GFX942-NEXT:    s_waitcnt vmcnt(0)
+; UNALIGNED_GFX942-NEXT:    scratch_load_ubyte v4, v7, off sc0 sc1
+; UNALIGNED_GFX942-NEXT:    s_waitcnt vmcnt(0)
+; UNALIGNED_GFX942-NEXT:    scratch_load_ubyte v4, v6, off sc0 sc1
+; UNALIGNED_GFX942-NEXT:    s_waitcnt vmcnt(0)
+; UNALIGNED_GFX942-NEXT:    scratch_load_ubyte v4, v8, off sc0 sc1
+; UNALIGNED_GFX942-NEXT:    s_waitcnt vmcnt(0)
+; UNALIGNED_GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; UNALIGNED_GFX11-LABEL: store_load_i64_unaligned:
 ; UNALIGNED_GFX11:       ; %bb.0: ; %bb
@@ -2703,20 +2703,20 @@ define void @store_load_v3i32_unaligned(ptr addrspace(5) nocapture %arg) {
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: store_load_v3i32_unaligned:
-; GFX940:       ; %bb.0: ; %bb
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    s_mov_b32 s2, 3
-; GFX940-NEXT:    s_mov_b32 s1, 2
-; GFX940-NEXT:    s_mov_b32 s0, 1
-; GFX940-NEXT:    v_mov_b32_e32 v4, s2
-; GFX940-NEXT:    v_mov_b32_e32 v3, s1
-; GFX940-NEXT:    v_mov_b32_e32 v2, s0
-; GFX940-NEXT:    scratch_store_dwordx3 v0, v[2:4], off sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    scratch_load_dwordx3 v[0:2], v0, off sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: store_load_v3i32_unaligned:
+; GFX942:       ; %bb.0: ; %bb
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    s_mov_b32 s2, 3
+; GFX942-NEXT:    s_mov_b32 s1, 2
+; GFX942-NEXT:    s_mov_b32 s0, 1
+; GFX942-NEXT:    v_mov_b32_e32 v4, s2
+; GFX942-NEXT:    v_mov_b32_e32 v3, s1
+; GFX942-NEXT:    v_mov_b32_e32 v2, s0
+; GFX942-NEXT:    scratch_store_dwordx3 v0, v[2:4], off sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    scratch_load_dwordx3 v[0:2], v0, off sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: store_load_v3i32_unaligned:
 ; GFX11:       ; %bb.0: ; %bb
@@ -2900,85 +2900,85 @@ define void @store_load_v3i32_unaligned(ptr addrspace(5) nocapture %arg) {
 ; UNALIGNED_GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; UNALIGNED_GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
-; UNALIGNED_GFX940-LABEL: store_load_v3i32_unaligned:
-; UNALIGNED_GFX940:       ; %bb.0: ; %bb
-; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; UNALIGNED_GFX940-NEXT:    v_mov_b32_e32 v3, 1
-; UNALIGNED_GFX940-NEXT:    v_mov_b32_e32 v1, 2
-; UNALIGNED_GFX940-NEXT:    v_add_u32_e32 v2, 2, v0
-; UNALIGNED_GFX940-NEXT:    v_add_u32_e32 v4, 1, v0
-; UNALIGNED_GFX940-NEXT:    scratch_store_byte v0, v3, off sc0 sc1
-; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
-; UNALIGNED_GFX940-NEXT:    v_mov_b32_e32 v3, 0
-; UNALIGNED_GFX940-NEXT:    v_add_u32_e32 v6, 4, v0
-; UNALIGNED_GFX940-NEXT:    v_add_u32_e32 v7, 6, v0
-; UNALIGNED_GFX940-NEXT:    v_add_u32_e32 v9, 8, v0
-; UNALIGNED_GFX940-NEXT:    v_add_u32_e32 v10, 10, v0
-; UNALIGNED_GFX940-NEXT:    v_mov_b32_e32 v12, 3
-; UNALIGNED_GFX940-NEXT:    scratch_store_byte v4, v3, off sc0 sc1
-; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
-; UNALIGNED_GFX940-NEXT:    v_add_u32_e32 v5, 3, v0
-; UNALIGNED_GFX940-NEXT:    scratch_store_byte v2, v3, off sc0 sc1
-; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
-; UNALIGNED_GFX940-NEXT:    scratch_store_byte v5, v3, off sc0 sc1
-; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
-; UNALIGNED_GFX940-NEXT:    v_add_u32_e32 v8, 5, v0
-; UNALIGNED_GFX940-NEXT:    scratch_store_byte v6, v1, off sc0 sc1
-; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
-; UNALIGNED_GFX940-NEXT:    scratch_store_byte v8, v3, off sc0 sc1
-; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
-; UNALIGNED_GFX940-NEXT:    v_add_u32_e32 v1, 7, v0
-; UNALIGNED_GFX940-NEXT:    scratch_store_byte v7, v3, off sc0 sc1
-; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
-; UNALIGNED_GFX940-NEXT:    scratch_store_byte v1, v3, off sc0 sc1
-; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
-; UNALIGNED_GFX940-NEXT:    v_add_u32_e32 v11, 9, v0
-; UNALIGNED_GFX940-NEXT:    scratch_store_byte v9, v12, off sc0 sc1
-; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
-; UNALIGNED_GFX940-NEXT:    scratch_store_byte v11, v3, off sc0 sc1
-; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
-; UNALIGNED_GFX940-NEXT:    v_add_u32_e32 v12, 11, v0
-; UNALIGNED_GFX940-NEXT:    scratch_store_byte v10, v3, off sc0 sc1
-; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
-; UNALIGNED_GFX940-NEXT:    scratch_store_byte v12, v3, off sc0 sc1
-; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
-; UNALIGNED_GFX940-NEXT:    scratch_load_ubyte v3, v0, off sc0 sc1
-; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
-; UNALIGNED_GFX940-NEXT:    ; kill: killed $vgpr12
-; UNALIGNED_GFX940-NEXT:    ; kill: killed $vgpr4
-; UNALIGNED_GFX940-NEXT:    ; kill: killed $vgpr11
-; UNALIGNED_GFX940-NEXT:    ; kill: killed $vgpr7
-; UNALIGNED_GFX940-NEXT:    ; kill: killed $vgpr6
-; UNALIGNED_GFX940-NEXT:    ; kill: killed $vgpr10
-; UNALIGNED_GFX940-NEXT:    ; kill: killed $vgpr5
-; UNALIGNED_GFX940-NEXT:    ; kill: killed $vgpr9
-; UNALIGNED_GFX940-NEXT:    ; kill: killed $vgpr1
-; UNALIGNED_GFX940-NEXT:    ; kill: killed $vgpr8
-; UNALIGNED_GFX940-NEXT:    ; kill: killed $vgpr2
-; UNALIGNED_GFX940-NEXT:    ; kill: killed $vgpr0
-; UNALIGNED_GFX940-NEXT:    scratch_load_ubyte v3, v4, off sc0 sc1
-; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
-; UNALIGNED_GFX940-NEXT:    scratch_load_ubyte v3, v2, off sc0 sc1
-; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
-; UNALIGNED_GFX940-NEXT:    scratch_load_ubyte v3, v5, off sc0 sc1
-; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
-; UNALIGNED_GFX940-NEXT:    scratch_load_ubyte v3, v6, off sc0 sc1
-; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
-; UNALIGNED_GFX940-NEXT:    scratch_load_ubyte v3, v8, off sc0 sc1
-; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
-; UNALIGNED_GFX940-NEXT:    scratch_load_ubyte v3, v7, off sc0 sc1
-; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
-; UNALIGNED_GFX940-NEXT:    scratch_load_ubyte v3, v1, off sc0 sc1
-; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
-; UNALIGNED_GFX940-NEXT:    scratch_load_ubyte v3, v9, off sc0 sc1
-; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
-; UNALIGNED_GFX940-NEXT:    scratch_load_ubyte v3, v11, off sc0 sc1
-; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
-; UNALIGNED_GFX940-NEXT:    scratch_load_ubyte v3, v10, off sc0 sc1
-; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
-; UNALIGNED_GFX940-NEXT:    scratch_load_ubyte v3, v12, off sc0 sc1
-; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
-; UNALIGNED_GFX940-NEXT:    s_setpc_b64 s[30:31]
+; UNALIGNED_GFX942-LABEL: store_load_v3i32_unaligned:
+; UNALIGNED_GFX942:       ; %bb.0: ; %bb
+; UNALIGNED_GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; UNALIGNED_GFX942-NEXT:    v_mov_b32_e32 v3, 1
+; UNALIGNED_GFX942-NEXT:    v_mov_b32_e32 v1, 2
+; UNALIGNED_GFX942-NEXT:    v_add_u32_e32 v2, 2, v0
+; UNALIGNED_GFX942-NEXT:    v_add_u32_e32 v4, 1, v0
+; UNALIGNED_GFX942-NEXT:    scratch_store_byte v0, v3, off sc0 sc1
+; UNALIGNED_GFX942-NEXT:    s_waitcnt vmcnt(0)
+; UNALIGNED_GFX942-NEXT:    v_mov_b32_e32 v3, 0
+; UNALIGNED_GFX942-NEXT:    v_add_u32_e32 v6, 4, v0
+; UNALIGNED_GFX942-NEXT:    v_add_u32_e32 v7, 6, v0
+; UNALIGNED_GFX942-NEXT:    v_add_u32_e32 v9, 8, v0
+; UNALIGNED_GFX942-NEXT:    v_add_u32_e32 v10, 10, v0
+; UNALIGNED_GFX942-NEXT:    v_mov_b32_e32 v12, 3
+; UNALIGNED_GFX942-NEXT:    scratch_store_byte v4, v3, off sc0 sc1
+; UNALIGNED_GFX942-NEXT:    s_waitcnt vmcnt(0)
+; UNALIGNED_GFX942-NEXT:    v_add_u32_e32 v5, 3, v0
+; UNALIGNED_GFX942-NEXT:    scratch_store_byte v2, v3, off sc0 sc1
+; UNALIGNED_GFX942-NEXT:    s_waitcnt vmcnt(0)
+; UNALIGNED_GFX942-NEXT:    scratch_store_byte v5, v3, off sc0 sc1
+; UNALIGNED_GFX942-NEXT:    s_waitcnt vmcnt(0)
+; UNALIGNED_GFX942-NEXT:    v_add_u32_e32 v8, 5, v0
+; UNALIGNED_GFX942-NEXT:    scratch_store_byte v6, v1, off sc0 sc1
+; UNALIGNED_GFX942-NEXT:    s_waitcnt vmcnt(0)
+; UNALIGNED_GFX942-NEXT:    scratch_store_byte v8, v3, off sc0 sc1
+; UNALIGNED_GFX942-NEXT:    s_waitcnt vmcnt(0)
+; UNALIGNED_GFX942-NEXT:    v_add_u32_e32 v1, 7, v0
+; UNALIGNED_GFX942-NEXT:    scratch_store_byte v7, v3, off sc0 sc1
+; UNALIGNED_GFX942-NEXT:    s_waitcnt vmcnt(0)
+; UNALIGNED_GFX942-NEXT:    scratch_store_byte v1, v3, off sc0 sc1
+; UNALIGNED_GFX942-NEXT:    s_waitcnt vmcnt(0)
+; UNALIGNED_GFX942-NEXT:    v_add_u32_e32 v11, 9, v0
+; UNALIGNED_GFX942-NEXT:    scratch_store_byte v9, v12, off sc0 sc1
+; UNALIGNED_GFX942-NEXT:    s_waitcnt vmcnt(0)
+; UNALIGNED_GFX942-NEXT:    scratch_store_byte v11, v3, off sc0 sc1
+; UNALIGNED_GFX942-NEXT:    s_waitcnt vmcnt(0)
+; UNALIGNED_GFX942-NEXT:    v_add_u32_e32 v12, 11, v0
+; UNALIGNED_GFX942-NEXT:    scratch_store_byte v10, v3, off sc0 sc1
+; UNALIGNED_GFX942-NEXT:    s_waitcnt vmcnt(0)
+; UNALIGNED_GFX942-NEXT:    scratch_store_byte v12, v3, off sc0 sc1
+; UNALIGNED_GFX942-NEXT:    s_waitcnt vmcnt(0)
+; UNALIGNED_GFX942-NEXT:    scratch_load_ubyte v3, v0, off sc0 sc1
+; UNALIGNED_GFX942-NEXT:    s_waitcnt vmcnt(0)
+; UNALIGNED_GFX942-NEXT:    ; kill: killed $vgpr12
+; UNALIGNED_GFX942-NEXT:    ; kill: killed $vgpr4
+; UNALIGNED_GFX942-NEXT:    ; kill: killed $vgpr11
+; UNALIGNED_GFX942-NEXT:    ; kill: killed $vgpr7
+; UNALIGNED_GFX942-NEXT:    ; kill: killed $vgpr6
+; UNALIGNED_GFX942-NEXT:    ; kill: killed $vgpr10
+; UNALIGNED_GFX942-NEXT:    ; kill: killed $vgpr5
+; UNALIGNED_GFX942-NEXT:    ; kill: killed $vgpr9
+; UNALIGNED_GFX942-NEXT:    ; kill: killed $vgpr1
+; UNALIGNED_GFX942-NEXT:    ; kill: killed $vgpr8
+; UNALIGNED_GFX942-NEXT:    ; kill: killed $vgpr2
+; UNALIGNED_GFX942-NEXT:    ; kill: killed $vgpr0
+; UNALIGNED_GFX942-NEXT:    scratch_load_ubyte v3, v4, off sc0 sc1
+; UNALIGNED_GFX942-NEXT:    s_waitcnt vmcnt(0)
+; UNALIGNED_GFX942-NEXT:    scratch_load_ubyte v3, v2, off sc0 sc1
+; UNALIGNED_GFX942-NEXT:    s_waitcnt vmcnt(0)
+; UNALIGNED_GFX942-NEXT:    scratch_load_ubyte v3, v5, off sc0 sc1
+; UNALIGNED_GFX942-NEXT:    s_waitcnt vmcnt(0)
+; UNALIGNED_GFX942-NEXT:    scratch_load_ubyte v3, v6, off sc0 sc1
+; UNALIGNED_GFX942-NEXT:    s_waitcnt vmcnt(0)
+; UNALIGNED_GFX942-NEXT:    scratch_load_ubyte v3, v8, off sc0 sc1
+; UNALIGNED_GFX942-NEXT:    s_waitcnt vmcnt(0)
+; UNALIGNED_GFX942-NEXT:    scratch_load_ubyte v3, v7, off sc0 sc1
+; UNALIGNED_GFX942-NEXT:    s_waitcnt vmcnt(0)
+; UNALIGNED_GFX942-NEXT:    scratch_load_ubyte v3, v1, off sc0 sc1
+; UNALIGNED_GFX942-NEXT:    s_waitcnt vmcnt(0)
+; UNALIGNED_GFX942-NEXT:    scratch_load_ubyte v3, v9, off sc0 sc1
+; UNALIGNED_GFX942-NEXT:    s_waitcnt vmcnt(0)
+; UNALIGNED_GFX942-NEXT:    scratch_load_ubyte v3, v11, off sc0 sc1
+; UNALIGNED_GFX942-NEXT:    s_waitcnt vmcnt(0)
+; UNALIGNED_GFX942-NEXT:    scratch_load_ubyte v3, v10, off sc0 sc1
+; UNALIGNED_GFX942-NEXT:    s_waitcnt vmcnt(0)
+; UNALIGNED_GFX942-NEXT:    scratch_load_ubyte v3, v12, off sc0 sc1
+; UNALIGNED_GFX942-NEXT:    s_waitcnt vmcnt(0)
+; UNALIGNED_GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; UNALIGNED_GFX11-LABEL: store_load_v3i32_unaligned:
 ; UNALIGNED_GFX11:       ; %bb.0: ; %bb
@@ -3146,20 +3146,20 @@ define void @store_load_v4i32_unaligned(ptr addrspace(5) nocapture %arg) {
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: store_load_v4i32_unaligned:
-; GFX940:       ; %bb.0: ; %bb
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    s_mov_b32 s3, 4
-; GFX940-NEXT:    s_mov_b32 s2, 3
-; GFX940-NEXT:    s_mov_b32 s1, 2
-; GFX940-NEXT:    s_mov_b32 s0, 1
-; GFX940-NEXT:    v_mov_b64_e32 v[4:5], s[2:3]
-; GFX940-NEXT:    v_mov_b64_e32 v[2:3], s[0:1]
-; GFX940-NEXT:    scratch_store_dwordx4 v0, v[2:5], off sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    scratch_load_dwordx4 v[0:3], v0, off sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: store_load_v4i32_unaligned:
+; GFX942:       ; %bb.0: ; %bb
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    s_mov_b32 s3, 4
+; GFX942-NEXT:    s_mov_b32 s2, 3
+; GFX942-NEXT:    s_mov_b32 s1, 2
+; GFX942-NEXT:    s_mov_b32 s0, 1
+; GFX942-NEXT:    v_mov_b64_e32 v[4:5], s[2:3]
+; GFX942-NEXT:    v_mov_b64_e32 v[2:3], s[0:1]
+; GFX942-NEXT:    scratch_store_dwordx4 v0, v[2:5], off sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    scratch_load_dwordx4 v[0:3], v0, off sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: store_load_v4i32_unaligned:
 ; GFX11:       ; %bb.0: ; %bb
@@ -3390,109 +3390,109 @@ define void @store_load_v4i32_unaligned(ptr addrspace(5) nocapture %arg) {
 ; UNALIGNED_GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; UNALIGNED_GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
-; UNALIGNED_GFX940-LABEL: store_load_v4i32_unaligned:
-; UNALIGNED_GFX940:       ; %bb.0: ; %bb
-; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; UNALIGNED_GFX940-NEXT:    v_mov_b32_e32 v3, 1
-; UNALIGNED_GFX940-NEXT:    v_mov_b32_e32 v1, 2
-; UNALIGNED_GFX940-NEXT:    v_add_u32_e32 v2, 2, v0
-; UNALIGNED_GFX940-NEXT:    v_add_u32_e32 v4, 1, v0
-; UNALIGNED_GFX940-NEXT:    scratch_store_byte v0, v3, off sc0 sc1
-; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
-; UNALIGNED_GFX940-NEXT:    v_mov_b32_e32 v3, 0
-; UNALIGNED_GFX940-NEXT:    v_mov_b32_e32 v6, 4
-; UNALIGNED_GFX940-NEXT:    v_add_u32_e32 v7, 4, v0
-; UNALIGNED_GFX940-NEXT:    v_add_u32_e32 v8, 6, v0
-; UNALIGNED_GFX940-NEXT:    v_add_u32_e32 v10, 8, v0
-; UNALIGNED_GFX940-NEXT:    v_add_u32_e32 v11, 10, v0
-; UNALIGNED_GFX940-NEXT:    v_mov_b32_e32 v13, 3
-; UNALIGNED_GFX940-NEXT:    v_add_u32_e32 v14, 12, v0
-; UNALIGNED_GFX940-NEXT:    v_add_u32_e32 v15, 14, v0
-; UNALIGNED_GFX940-NEXT:    scratch_store_byte v4, v3, off sc0 sc1
-; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
-; UNALIGNED_GFX940-NEXT:    v_add_u32_e32 v5, 3, v0
-; UNALIGNED_GFX940-NEXT:    scratch_store_byte v2, v3, off sc0 sc1
-; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
-; UNALIGNED_GFX940-NEXT:    scratch_store_byte v5, v3, off sc0 sc1
-; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
-; UNALIGNED_GFX940-NEXT:    v_add_u32_e32 v9, 5, v0
-; UNALIGNED_GFX940-NEXT:    scratch_store_byte v7, v1, off sc0 sc1
-; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
-; UNALIGNED_GFX940-NEXT:    scratch_store_byte v9, v3, off sc0 sc1
-; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
-; UNALIGNED_GFX940-NEXT:    v_add_u32_e32 v1, 7, v0
-; UNALIGNED_GFX940-NEXT:    scratch_store_byte v8, v3, off sc0 sc1
-; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
-; UNALIGNED_GFX940-NEXT:    scratch_store_byte v1, v3, off sc0 sc1
-; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
-; UNALIGNED_GFX940-NEXT:    v_add_u32_e32 v12, 9, v0
-; UNALIGNED_GFX940-NEXT:    scratch_store_byte v10, v13, off sc0 sc1
-; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
-; UNALIGNED_GFX940-NEXT:    scratch_store_byte v12, v3, off sc0 sc1
-; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
-; UNALIGNED_GFX940-NEXT:    v_add_u32_e32 v13, 11, v0
-; UNALIGNED_GFX940-NEXT:    scratch_store_byte v11, v3, off sc0 sc1
-; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
-; UNALIGNED_GFX940-NEXT:    scratch_store_byte v13, v3, off sc0 sc1
-; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
-; UNALIGNED_GFX940-NEXT:    v_add_u32_e32 v16, 13, v0
-; UNALIGNED_GFX940-NEXT:    scratch_store_byte v14, v6, off sc0 sc1
-; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
-; UNALIGNED_GFX940-NEXT:    scratch_store_byte v16, v3, off sc0 sc1
-; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
-; UNALIGNED_GFX940-NEXT:    v_add_u32_e32 v6, 15, v0
-; UNALIGNED_GFX940-NEXT:    scratch_store_byte v15, v3, off sc0 sc1
-; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
-; UNALIGNED_GFX940-NEXT:    scratch_store_byte v6, v3, off sc0 sc1
-; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
-; UNALIGNED_GFX940-NEXT:    scratch_load_ubyte v3, v0, off sc0 sc1
-; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
-; UNALIGNED_GFX940-NEXT:    scratch_load_ubyte v3, v4, off sc0 sc1
-; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
-; UNALIGNED_GFX940-NEXT:    scratch_load_ubyte v3, v2, off sc0 sc1
-; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
-; UNALIGNED_GFX940-NEXT:    scratch_load_ubyte v3, v5, off sc0 sc1
-; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
-; UNALIGNED_GFX940-NEXT:    scratch_load_ubyte v3, v7, off sc0 sc1
-; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
-; UNALIGNED_GFX940-NEXT:    scratch_load_ubyte v3, v9, off sc0 sc1
-; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
-; UNALIGNED_GFX940-NEXT:    scratch_load_ubyte v3, v8, off sc0 sc1
-; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
-; UNALIGNED_GFX940-NEXT:    scratch_load_ubyte v3, v1, off sc0 sc1
-; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
-; UNALIGNED_GFX940-NEXT:    scratch_load_ubyte v3, v10, off sc0 sc1
-; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
-; UNALIGNED_GFX940-NEXT:    scratch_load_ubyte v3, v12, off sc0 sc1
-; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
-; UNALIGNED_GFX940-NEXT:    scratch_load_ubyte v3, v11, off sc0 sc1
-; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
-; UNALIGNED_GFX940-NEXT:    scratch_load_ubyte v3, v13, off sc0 sc1
-; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
-; UNALIGNED_GFX940-NEXT:    scratch_load_ubyte v3, v14, off sc0 sc1
-; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
-; UNALIGNED_GFX940-NEXT:    scratch_load_ubyte v3, v16, off sc0 sc1
-; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
-; UNALIGNED_GFX940-NEXT:    scratch_load_ubyte v3, v15, off sc0 sc1
-; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
-; UNALIGNED_GFX940-NEXT:    ; kill: killed $vgpr2
-; UNALIGNED_GFX940-NEXT:    ; kill: killed $vgpr1
-; UNALIGNED_GFX940-NEXT:    ; kill: killed $vgpr9
-; UNALIGNED_GFX940-NEXT:    ; kill: killed $vgpr16
-; UNALIGNED_GFX940-NEXT:    ; kill: killed $vgpr11
-; UNALIGNED_GFX940-NEXT:    ; kill: killed $vgpr4
-; UNALIGNED_GFX940-NEXT:    ; kill: killed $vgpr15
-; UNALIGNED_GFX940-NEXT:    ; kill: killed $vgpr10
-; UNALIGNED_GFX940-NEXT:    ; kill: killed $vgpr7
-; UNALIGNED_GFX940-NEXT:    ; kill: killed $vgpr13
-; UNALIGNED_GFX940-NEXT:    ; kill: killed $vgpr5
-; UNALIGNED_GFX940-NEXT:    ; kill: killed $vgpr14
-; UNALIGNED_GFX940-NEXT:    ; kill: killed $vgpr12
-; UNALIGNED_GFX940-NEXT:    ; kill: killed $vgpr8
-; UNALIGNED_GFX940-NEXT:    ; kill: killed $vgpr0
-; UNALIGNED_GFX940-NEXT:    scratch_load_ubyte v0, v6, off sc0 sc1
-; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
-; UNALIGNED_GFX940-NEXT:    s_setpc_b64 s[30:31]
+; UNALIGNED_GFX942-LABEL: store_load_v4i32_unaligned:
+; UNALIGNED_GFX942:       ; %bb.0: ; %bb
+; UNALIGNED_GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; UNALIGNED_GFX942-NEXT:    v_mov_b32_e32 v3, 1
+; UNALIGNED_GFX942-NEXT:    v_mov_b32_e32 v1, 2
+; UNALIGNED_GFX942-NEXT:    v_add_u32_e32 v2, 2, v0
+; UNALIGNED_GFX942-NEXT:    v_add_u32_e32 v4, 1, v0
+; UNALIGNED_GFX942-NEXT:    scratch_store_byte v0, v3, off sc0 sc1
+; UNALIGNED_GFX942-NEXT:    s_waitcnt vmcnt(0)
+; UNALIGNED_GFX942-NEXT:    v_mov_b32_e32 v3, 0
+; UNALIGNED_GFX942-NEXT:    v_mov_b32_e32 v6, 4
+; UNALIGNED_GFX942-NEXT:    v_add_u32_e32 v7, 4, v0
+; UNALIGNED_GFX942-NEXT:    v_add_u32_e32 v8, 6, v0
+; UNALIGNED_GFX942-NEXT:    v_add_u32_e32 v10, 8, v0
+; UNALIGNED_GFX942-NEXT:    v_add_u32_e32 v11, 10, v0
+; UNALIGNED_GFX942-NEXT:    v_mov_b32_e32 v13, 3
+; UNALIGNED_GFX942-NEXT:    v_add_u32_e32 v14, 12, v0
+; UNALIGNED_GFX942-NEXT:    v_add_u32_e32 v15, 14, v0
+; UNALIGNED_GFX942-NEXT:    scratch_store_byte v4, v3, off sc0 sc1
+; UNALIGNED_GFX942-NEXT:    s_waitcnt vmcnt(0)
+; UNALIGNED_GFX942-NEXT:    v_add_u32_e32 v5, 3, v0
+; UNALIGNED_GFX942-NEXT:    scratch_store_byte v2, v3, off sc0 sc1
+; UNALIGNED_GFX942-NEXT:    s_waitcnt vmcnt(0)
+; UNALIGNED_GFX942-NEXT:    scratch_store_byte v5, v3, off sc0 sc1
+; UNALIGNED_GFX942-NEXT:    s_waitcnt vmcnt(0)
+; UNALIGNED_GFX942-NEXT:    v_add_u32_e32 v9, 5, v0
+; UNALIGNED_GFX942-NEXT:    scratch_store_byte v7, v1, off sc0 sc1
+; UNALIGNED_GFX942-NEXT:    s_waitcnt vmcnt(0)
+; UNALIGNED_GFX942-NEXT:    scratch_store_byte v9, v3, off sc0 sc1
+; UNALIGNED_GFX942-NEXT:    s_waitcnt vmcnt(0)
+; UNALIGNED_GFX942-NEXT:    v_add_u32_e32 v1, 7, v0
+; UNALIGNED_GFX942-NEXT:    scratch_store_byte v8, v3, off sc0 sc1
+; UNALIGNED_GFX942-NEXT:    s_waitcnt vmcnt(0)
+; UNALIGNED_GFX942-NEXT:    scratch_store_byte v1, v3, off sc0 sc1
+; UNALIGNED_GFX942-NEXT:    s_waitcnt vmcnt(0)
+; UNALIGNED_GFX942-NEXT:    v_add_u32_e32 v12, 9, v0
+; UNALIGNED_GFX942-NEXT:    scratch_store_byte v10, v13, off sc0 sc1
+; UNALIGNED_GFX942-NEXT:    s_waitcnt vmcnt(0)
+; UNALIGNED_GFX942-NEXT:    scratch_store_byte v12, v3, off sc0 sc1
+; UNALIGNED_GFX942-NEXT:    s_waitcnt vmcnt(0)
+; UNALIGNED_GFX942-NEXT:    v_add_u32_e32 v13, 11, v0
+; UNALIGNED_GFX942-NEXT:    scratch_store_byte v11, v3, off sc0 sc1
+; UNALIGNED_GFX942-NEXT:    s_waitcnt vmcnt(0)
+; UNALIGNED_GFX942-NEXT:    scratch_store_byte v13, v3, off sc0 sc1
+; UNALIGNED_GFX942-NEXT:    s_waitcnt vmcnt(0)
+; UNALIGNED_GFX942-NEXT:    v_add_u32_e32 v16, 13, v0
+; UNALIGNED_GFX942-NEXT:    scratch_store_byte v14, v6, off sc0 sc1
+; UNALIGNED_GFX942-NEXT:    s_waitcnt vmcnt(0)
+; UNALIGNED_GFX942-NEXT:    scratch_store_byte v16, v3, off sc0 sc1
+; UNALIGNED_GFX942-NEXT:    s_waitcnt vmcnt(0)
+; UNALIGNED_GFX942-NEXT:    v_add_u32_e32 v6, 15, v0
+; UNALIGNED_GFX942-NEXT:    scratch_store_byte v15, v3, off sc0 sc1
+; UNALIGNED_GFX942-NEXT:    s_waitcnt vmcnt(0)
+; UNALIGNED_GFX942-NEXT:    scratch_store_byte v6, v3, off sc0 sc1
+; UNALIGNED_GFX942-NEXT:    s_waitcnt vmcnt(0)
+; UNALIGNED_GFX942-NEXT:    scratch_load_ubyte v3, v0, off sc0 sc1
+; UNALIGNED_GFX942-NEXT:    s_waitcnt vmcnt(0)
+; UNALIGNED_GFX942-NEXT:    scratch_load_ubyte v3, v4, off sc0 sc1
+; UNALIGNED_GFX942-NEXT:    s_waitcnt vmcnt(0)
+; UNALIGNED_GFX942-NEXT:    scratch_load_ubyte v3, v2, off sc0 sc1
+; UNALIGNED_GFX942-NEXT:    s_waitcnt vmcnt(0)
+; UNALIGNED_GFX942-NEXT:    scratch_load_ubyte v3, v5, off sc0 sc1
+; UNALIGNED_GFX942-NEXT:    s_waitcnt vmcnt(0)
+; UNALIGNED_GFX942-NEXT:    scratch_load_ubyte v3, v7, off sc0 sc1
+; UNALIGNED_GFX942-NEXT:    s_waitcnt vmcnt(0)
+; UNALIGNED_GFX942-NEXT:    scratch_load_ubyte v3, v9, off sc0 sc1
+; UNALIGNED_GFX942-NEXT:    s_waitcnt vmcnt(0)
+; UNALIGNED_GFX942-NEXT:    scratch_load_ubyte v3, v8, off sc0 sc1
+; UNALIGNED_GFX942-NEXT:    s_waitcnt vmcnt(0)
+; UNALIGNED_GFX942-NEXT:    scratch_load_ubyte v3, v1, off sc0 sc1
+; UNALIGNED_GFX942-NEXT:    s_waitcnt vmcnt(0)
+; UNALIGNED_GFX942-NEXT:    scratch_load_ubyte v3, v10, off sc0 sc1
+; UNALIGNED_GFX942-NEXT:    s_waitcnt vmcnt(0)
+; UNALIGNED_GFX942-NEXT:    scratch_load_ubyte v3, v12, off sc0 sc1
+; UNALIGNED_GFX942-NEXT:    s_waitcnt vmcnt(0)
+; UNALIGNED_GFX942-NEXT:    scratch_load_ubyte v3, v11, off sc0 sc1
+; UNALIGNED_GFX942-NEXT:    s_waitcnt vmcnt(0)
+; UNALIGNED_GFX942-NEXT:    scratch_load_ubyte v3, v13, off sc0 sc1
+; UNALIGNED_GFX942-NEXT:    s_waitcnt vmcnt(0)
+; UNALIGNED_GFX942-NEXT:    scratch_load_ubyte v3, v14, off sc0 sc1
+; UNALIGNED_GFX942-NEXT:    s_waitcnt vmcnt(0)
+; UNALIGNED_GFX942-NEXT:    scratch_load_ubyte v3, v16, off sc0 sc1
+; UNALIGNED_GFX942-NEXT:    s_waitcnt vmcnt(0)
+; UNALIGNED_GFX942-NEXT:    scratch_load_ubyte v3, v15, off sc0 sc1
+; UNALIGNED_GFX942-NEXT:    s_waitcnt vmcnt(0)
+; UNALIGNED_GFX942-NEXT:    ; kill: killed $vgpr2
+; UNALIGNED_GFX942-NEXT:    ; kill: killed $vgpr1
+; UNALIGNED_GFX942-NEXT:    ; kill: killed $vgpr9
+; UNALIGNED_GFX942-NEXT:    ; kill: killed $vgpr16
+; UNALIGNED_GFX942-NEXT:    ; kill: killed $vgpr11
+; UNALIGNED_GFX942-NEXT:    ; kill: killed $vgpr4
+; UNALIGNED_GFX942-NEXT:    ; kill: killed $vgpr15
+; UNALIGNED_GFX942-NEXT:    ; kill: killed $vgpr10
+; UNALIGNED_GFX942-NEXT:    ; kill: killed $vgpr7
+; UNALIGNED_GFX942-NEXT:    ; kill: killed $vgpr13
+; UNALIGNED_GFX942-NEXT:    ; kill: killed $vgpr5
+; UNALIGNED_GFX942-NEXT:    ; kill: killed $vgpr14
+; UNALIGNED_GFX942-NEXT:    ; kill: killed $vgpr12
+; UNALIGNED_GFX942-NEXT:    ; kill: killed $vgpr8
+; UNALIGNED_GFX942-NEXT:    ; kill: killed $vgpr0
+; UNALIGNED_GFX942-NEXT:    scratch_load_ubyte v0, v6, off sc0 sc1
+; UNALIGNED_GFX942-NEXT:    s_waitcnt vmcnt(0)
+; UNALIGNED_GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; UNALIGNED_GFX11-LABEL: store_load_v4i32_unaligned:
 ; UNALIGNED_GFX11:       ; %bb.0: ; %bb
@@ -3685,13 +3685,13 @@ define amdgpu_gs void @sgpr_base_large_offset(ptr addrspace(1) %out, ptr addrspa
 ; GFX10-NEXT:    global_store_dword v[0:1], v2, off
 ; GFX10-NEXT:    s_endpgm
 ;
-; GFX940-LABEL: sgpr_base_large_offset:
-; GFX940:       ; %bb.0: ; %entry
-; GFX940-NEXT:    s_add_u32 s0, s0, 0xffe8
-; GFX940-NEXT:    scratch_load_dword v2, off, s0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    global_store_dword v[0:1], v2, off sc0 sc1
-; GFX940-NEXT:    s_endpgm
+; GFX942-LABEL: sgpr_base_large_offset:
+; GFX942:       ; %bb.0: ; %entry
+; GFX942-NEXT:    s_add_u32 s0, s0, 0xffe8
+; GFX942-NEXT:    scratch_load_dword v2, off, s0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    global_store_dword v[0:1], v2, off sc0 sc1
+; GFX942-NEXT:    s_endpgm
 ;
 ; GFX11-LABEL: sgpr_base_large_offset:
 ; GFX11:       ; %bb.0: ; %entry
@@ -3730,13 +3730,13 @@ define amdgpu_gs void @sgpr_base_large_offset(ptr addrspace(1) %out, ptr addrspa
 ; UNALIGNED_GFX10-NEXT:    global_store_dword v[0:1], v2, off
 ; UNALIGNED_GFX10-NEXT:    s_endpgm
 ;
-; UNALIGNED_GFX940-LABEL: sgpr_base_large_offset:
-; UNALIGNED_GFX940:       ; %bb.0: ; %entry
-; UNALIGNED_GFX940-NEXT:    s_add_u32 s0, s0, 0xffe8
-; UNALIGNED_GFX940-NEXT:    scratch_load_dword v2, off, s0
-; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
-; UNALIGNED_GFX940-NEXT:    global_store_dword v[0:1], v2, off sc0 sc1
-; UNALIGNED_GFX940-NEXT:    s_endpgm
+; UNALIGNED_GFX942-LABEL: sgpr_base_large_offset:
+; UNALIGNED_GFX942:       ; %bb.0: ; %entry
+; UNALIGNED_GFX942-NEXT:    s_add_u32 s0, s0, 0xffe8
+; UNALIGNED_GFX942-NEXT:    scratch_load_dword v2, off, s0
+; UNALIGNED_GFX942-NEXT:    s_waitcnt vmcnt(0)
+; UNALIGNED_GFX942-NEXT:    global_store_dword v[0:1], v2, off sc0 sc1
+; UNALIGNED_GFX942-NEXT:    s_endpgm
 ;
 ; UNALIGNED_GFX11-LABEL: sgpr_base_large_offset:
 ; UNALIGNED_GFX11:       ; %bb.0: ; %entry
@@ -3784,14 +3784,14 @@ define amdgpu_gs void @sgpr_base_large_offset_split(ptr addrspace(1) %out, ptr a
 ; GFX10-NEXT:    global_store_dword v[0:1], v2, off
 ; GFX10-NEXT:    s_endpgm
 ;
-; GFX940-LABEL: sgpr_base_large_offset_split:
-; GFX940:       ; %bb.0: ; %entry
-; GFX940-NEXT:    s_and_b32 s0, s0, -4
-; GFX940-NEXT:    s_add_u32 s0, s0, 0x100ffe8
-; GFX940-NEXT:    scratch_load_dword v2, off, s0 sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    global_store_dword v[0:1], v2, off sc0 sc1
-; GFX940-NEXT:    s_endpgm
+; GFX942-LABEL: sgpr_base_large_offset_split:
+; GFX942:       ; %bb.0: ; %entry
+; GFX942-NEXT:    s_and_b32 s0, s0, -4
+; GFX942-NEXT:    s_add_u32 s0, s0, 0x100ffe8
+; GFX942-NEXT:    scratch_load_dword v2, off, s0 sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    global_store_dword v[0:1], v2, off sc0 sc1
+; GFX942-NEXT:    s_endpgm
 ;
 ; GFX11-LABEL: sgpr_base_large_offset_split:
 ; GFX11:       ; %bb.0: ; %entry
@@ -3837,14 +3837,14 @@ define amdgpu_gs void @sgpr_base_large_offset_split(ptr addrspace(1) %out, ptr a
 ; UNALIGNED_GFX10-NEXT:    global_store_dword v[0:1], v2, off
 ; UNALIGNED_GFX10-NEXT:    s_endpgm
 ;
-; UNALIGNED_GFX940-LABEL: sgpr_base_large_offset_split:
-; UNALIGNED_GFX940:       ; %bb.0: ; %entry
-; UNALIGNED_GFX940-NEXT:    s_and_b32 s0, s0, -4
-; UNALIGNED_GFX940-NEXT:    s_add_u32 s0, s0, 0x100ffe8
-; UNALIGNED_GFX940-NEXT:    scratch_load_dword v2, off, s0 sc0 sc1
-; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
-; UNALIGNED_GFX940-NEXT:    global_store_dword v[0:1], v2, off sc0 sc1
-; UNALIGNED_GFX940-NEXT:    s_endpgm
+; UNALIGNED_GFX942-LABEL: sgpr_base_large_offset_split:
+; UNALIGNED_GFX942:       ; %bb.0: ; %entry
+; UNALIGNED_GFX942-NEXT:    s_and_b32 s0, s0, -4
+; UNALIGNED_GFX942-NEXT:    s_add_u32 s0, s0, 0x100ffe8
+; UNALIGNED_GFX942-NEXT:    scratch_load_dword v2, off, s0 sc0 sc1
+; UNALIGNED_GFX942-NEXT:    s_waitcnt vmcnt(0)
+; UNALIGNED_GFX942-NEXT:    global_store_dword v[0:1], v2, off sc0 sc1
+; UNALIGNED_GFX942-NEXT:    s_endpgm
 ;
 ; UNALIGNED_GFX11-LABEL: sgpr_base_large_offset_split:
 ; UNALIGNED_GFX11:       ; %bb.0: ; %entry
@@ -3902,15 +3902,15 @@ define amdgpu_gs void @sgpr_base_plus_sgpr_plus_vgpr_plus_large_imm_offset(ptr a
 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX10-NEXT:    s_endpgm
 ;
-; GFX940-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_large_imm_offset:
-; GFX940:       ; %bb.0: ; %bb
-; GFX940-NEXT:    v_add_u32_e32 v0, s1, v0
-; GFX940-NEXT:    v_mov_b32_e32 v1, 0xffe8
-; GFX940-NEXT:    v_add3_u32 v0, s0, v0, v1
-; GFX940-NEXT:    v_mov_b32_e32 v1, 15
-; GFX940-NEXT:    scratch_store_dword v0, v1, off sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    s_endpgm
+; GFX942-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_large_imm_offset:
+; GFX942:       ; %bb.0: ; %bb
+; GFX942-NEXT:    v_add_u32_e32 v0, s1, v0
+; GFX942-NEXT:    v_mov_b32_e32 v1, 0xffe8
+; GFX942-NEXT:    v_add3_u32 v0, s0, v0, v1
+; GFX942-NEXT:    v_mov_b32_e32 v1, 15
+; GFX942-NEXT:    scratch_store_dword v0, v1, off sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    s_endpgm
 ;
 ; GFX11-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_large_imm_offset:
 ; GFX11:       ; %bb.0: ; %bb
@@ -3955,15 +3955,15 @@ define amdgpu_gs void @sgpr_base_plus_sgpr_plus_vgpr_plus_large_imm_offset(ptr a
 ; UNALIGNED_GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
 ; UNALIGNED_GFX10-NEXT:    s_endpgm
 ;
-; UNALIGNED_GFX940-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_large_imm_offset:
-; UNALIGNED_GFX940:       ; %bb.0: ; %bb
-; UNALIGNED_GFX940-NEXT:    v_add_u32_e32 v0, s1, v0
-; UNALIGNED_GFX940-NEXT:    v_mov_b32_e32 v1, 0xffe8
-; UNALIGNED_GFX940-NEXT:    v_add3_u32 v0, s0, v0, v1
-; UNALIGNED_GFX940-NEXT:    v_mov_b32_e32 v1, 15
-; UNALIGNED_GFX940-NEXT:    scratch_store_dword v0, v1, off sc0 sc1
-; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
-; UNALIGNED_GFX940-NEXT:    s_endpgm
+; UNALIGNED_GFX942-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_large_imm_offset:
+; UNALIGNED_GFX942:       ; %bb.0: ; %bb
+; UNALIGNED_GFX942-NEXT:    v_add_u32_e32 v0, s1, v0
+; UNALIGNED_GFX942-NEXT:    v_mov_b32_e32 v1, 0xffe8
+; UNALIGNED_GFX942-NEXT:    v_add3_u32 v0, s0, v0, v1
+; UNALIGNED_GFX942-NEXT:    v_mov_b32_e32 v1, 15
+; UNALIGNED_GFX942-NEXT:    scratch_store_dword v0, v1, off sc0 sc1
+; UNALIGNED_GFX942-NEXT:    s_waitcnt vmcnt(0)
+; UNALIGNED_GFX942-NEXT:    s_endpgm
 ;
 ; UNALIGNED_GFX11-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_large_imm_offset:
 ; UNALIGNED_GFX11:       ; %bb.0: ; %bb
@@ -4015,14 +4015,14 @@ define amdgpu_gs void @sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset(pt
 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX10-NEXT:    s_endpgm
 ;
-; GFX940-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset:
-; GFX940:       ; %bb.0: ; %bb
-; GFX940-NEXT:    v_add_u32_e32 v0, s1, v0
-; GFX940-NEXT:    v_add3_u32 v0, s0, v0, -16
-; GFX940-NEXT:    v_mov_b32_e32 v1, 15
-; GFX940-NEXT:    scratch_store_dword v0, v1, off sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    s_endpgm
+; GFX942-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset:
+; GFX942:       ; %bb.0: ; %bb
+; GFX942-NEXT:    v_add_u32_e32 v0, s1, v0
+; GFX942-NEXT:    v_add3_u32 v0, s0, v0, -16
+; GFX942-NEXT:    v_mov_b32_e32 v1, 15
+; GFX942-NEXT:    scratch_store_dword v0, v1, off sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    s_endpgm
 ;
 ; GFX11-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset:
 ; GFX11:       ; %bb.0: ; %bb
@@ -4066,14 +4066,14 @@ define amdgpu_gs void @sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset(pt
 ; UNALIGNED_GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
 ; UNALIGNED_GFX10-NEXT:    s_endpgm
 ;
-; UNALIGNED_GFX940-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset:
-; UNALIGNED_GFX940:       ; %bb.0: ; %bb
-; UNALIGNED_GFX940-NEXT:    v_add_u32_e32 v0, s1, v0
-; UNALIGNED_GFX940-NEXT:    v_add3_u32 v0, s0, v0, -16
-; UNALIGNED_GFX940-NEXT:    v_mov_b32_e32 v1, 15
-; UNALIGNED_GFX940-NEXT:    scratch_store_dword v0, v1, off sc0 sc1
-; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
-; UNALIGNED_GFX940-NEXT:    s_endpgm
+; UNALIGNED_GFX942-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset:
+; UNALIGNED_GFX942:       ; %bb.0: ; %bb
+; UNALIGNED_GFX942-NEXT:    v_add_u32_e32 v0, s1, v0
+; UNALIGNED_GFX942-NEXT:    v_add3_u32 v0, s0, v0, -16
+; UNALIGNED_GFX942-NEXT:    v_mov_b32_e32 v1, 15
+; UNALIGNED_GFX942-NEXT:    scratch_store_dword v0, v1, off sc0 sc1
+; UNALIGNED_GFX942-NEXT:    s_waitcnt vmcnt(0)
+; UNALIGNED_GFX942-NEXT:    s_endpgm
 ;
 ; UNALIGNED_GFX11-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset:
 ; UNALIGNED_GFX11:       ; %bb.0: ; %bb
@@ -4122,13 +4122,13 @@ define amdgpu_gs void @sgpr_base_negative_offset(ptr addrspace(1) %out, ptr addr
 ; GFX10-NEXT:    global_store_dword v[0:1], v2, off
 ; GFX10-NEXT:    s_endpgm
 ;
-; GFX940-LABEL: sgpr_base_negative_offset:
-; GFX940:       ; %bb.0: ; %entry
-; GFX940-NEXT:    s_add_u32 s0, s0, 0xffffffe8
-; GFX940-NEXT:    scratch_load_dword v2, off, s0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    global_store_dword v[0:1], v2, off sc0 sc1
-; GFX940-NEXT:    s_endpgm
+; GFX942-LABEL: sgpr_base_negative_offset:
+; GFX942:       ; %bb.0: ; %entry
+; GFX942-NEXT:    s_add_u32 s0, s0, 0xffffffe8
+; GFX942-NEXT:    scratch_load_dword v2, off, s0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    global_store_dword v[0:1], v2, off sc0 sc1
+; GFX942-NEXT:    s_endpgm
 ;
 ; GFX11-LABEL: sgpr_base_negative_offset:
 ; GFX11:       ; %bb.0: ; %entry
@@ -4165,13 +4165,13 @@ define amdgpu_gs void @sgpr_base_negative_offset(ptr addrspace(1) %out, ptr addr
 ; UNALIGNED_GFX10-NEXT:    global_store_dword v[0:1], v2, off
 ; UNALIGNED_GFX10-NEXT:    s_endpgm
 ;
-; UNALIGNED_GFX940-LABEL: sgpr_base_negative_offset:
-; UNALIGNED_GFX940:       ; %bb.0: ; %entry
-; UNALIGNED_GFX940-NEXT:    s_add_u32 s0, s0, 0xffffffe8
-; UNALIGNED_GFX940-NEXT:    scratch_load_dword v2, off, s0
-; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
-; UNALIGNED_GFX940-NEXT:    global_store_dword v[0:1], v2, off sc0 sc1
-; UNALIGNED_GFX940-NEXT:    s_endpgm
+; UNALIGNED_GFX942-LABEL: sgpr_base_negative_offset:
+; UNALIGNED_GFX942:       ; %bb.0: ; %entry
+; UNALIGNED_GFX942-NEXT:    s_add_u32 s0, s0, 0xffffffe8
+; UNALIGNED_GFX942-NEXT:    scratch_load_dword v2, off, s0
+; UNALIGNED_GFX942-NEXT:    s_waitcnt vmcnt(0)
+; UNALIGNED_GFX942-NEXT:    global_store_dword v[0:1], v2, off sc0 sc1
+; UNALIGNED_GFX942-NEXT:    s_endpgm
 ;
 ; UNALIGNED_GFX11-LABEL: sgpr_base_negative_offset:
 ; UNALIGNED_GFX11:       ; %bb.0: ; %entry
diff --git llvm/test/CodeGen/AMDGPU/GlobalISel/fp-atomics-gfx940.ll llvm/test/CodeGen/AMDGPU/GlobalISel/fp-atomics-gfx940.ll
deleted file mode 100644
index 676298670f1f..000000000000
--- llvm/test/CodeGen/AMDGPU/GlobalISel/fp-atomics-gfx940.ll
+++ /dev/null
@@ -1,132 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx940 < %s | FileCheck %s -check-prefix=GFX940
-
-define amdgpu_kernel void @flat_atomic_fadd_f32_noret_pat(ptr %ptr) {
-; GFX940-LABEL: flat_atomic_fadd_f32_noret_pat:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX940-NEXT:    v_mov_b32_e32 v2, 4.0
-; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b64_e32 v[0:1], s[0:1]
-; GFX940-NEXT:    buffer_wbl2 sc0 sc1
-; GFX940-NEXT:    flat_atomic_add_f32 v[0:1], v2 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc0 sc1
-; GFX940-NEXT:    s_endpgm
-  %ret = atomicrmw fadd ptr %ptr, float 4.0 seq_cst, !amdgpu.no.remote.memory !0
-  ret void
-}
-
-define amdgpu_kernel void @flat_atomic_fadd_f32_noret_pat_ieee(ptr %ptr) #0 {
-; GFX940-LABEL: flat_atomic_fadd_f32_noret_pat_ieee:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX940-NEXT:    v_mov_b32_e32 v2, 4.0
-; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b64_e32 v[0:1], s[0:1]
-; GFX940-NEXT:    buffer_wbl2 sc0 sc1
-; GFX940-NEXT:    flat_atomic_add_f32 v[0:1], v2 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc0 sc1
-; GFX940-NEXT:    s_endpgm
-  %ret = atomicrmw fadd ptr %ptr, float 4.0 seq_cst, !amdgpu.no.remote.memory !0
-  ret void
-}
-
-define float @flat_atomic_fadd_f32_rtn_pat(ptr %ptr, float %data) {
-; GFX940-LABEL: flat_atomic_fadd_f32_rtn_pat:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v2, 4.0
-; GFX940-NEXT:    buffer_wbl2 sc0 sc1
-; GFX940-NEXT:    flat_atomic_add_f32 v0, v[0:1], v2 sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc0 sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
-  %ret = atomicrmw fadd ptr %ptr, float 4.0 seq_cst, !amdgpu.no.remote.memory !0
-  ret float %ret
-}
-
-define <2 x half> @local_atomic_fadd_ret_v2f16_offset(ptr addrspace(3) %ptr, <2 x half> %val) {
-; GFX940-LABEL: local_atomic_fadd_ret_v2f16_offset:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    ds_pk_add_rtn_f16 v0, v0, v1 offset:65532
-; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
-  %gep = getelementptr <2 x half>, ptr addrspace(3) %ptr, i32 16383
-  %result = atomicrmw fadd ptr addrspace(3) %gep, <2 x half> %val seq_cst
-  ret <2 x half> %result
-}
-
-define void @local_atomic_fadd_noret_v2f16_offset(ptr addrspace(3) %ptr, <2 x half> %val) {
-; GFX940-LABEL: local_atomic_fadd_noret_v2f16_offset:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    ds_pk_add_f16 v0, v1 offset:65532
-; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
-  %gep = getelementptr <2 x half>, ptr addrspace(3) %ptr, i32 16383
-  %unused = atomicrmw fadd ptr addrspace(3) %gep, <2 x half> %val seq_cst
-  ret void
-}
-
-define <2 x half> @global_atomic_fadd_ret_v2f16_agent_offset(ptr addrspace(1) %ptr, <2 x half> %val) {
-; GFX940-LABEL: global_atomic_fadd_ret_v2f16_agent_offset:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    global_atomic_pk_add_f16 v0, v[0:1], v2, off offset:1024 sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
-  %gep = getelementptr <2 x half>, ptr addrspace(1) %ptr, i32 256
-  %result = atomicrmw fadd ptr addrspace(1) %gep, <2 x half> %val syncscope("agent") seq_cst
-  ret <2 x half> %result
-}
-
-define void @global_atomic_fadd_noret_v2f16_agent_offset(ptr addrspace(1) %ptr, <2 x half> %val) {
-; GFX940-LABEL: global_atomic_fadd_noret_v2f16_agent_offset:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    global_atomic_pk_add_f16 v[0:1], v2, off offset:1024
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
-  %gep = getelementptr <2 x half>, ptr addrspace(1) %ptr, i32 256
-  %unused = atomicrmw fadd ptr addrspace(1) %gep, <2 x half> %val syncscope("agent") seq_cst
-  ret void
-}
-
-define <2 x half> @flat_atomic_fadd_ret_v2f16_agent_offset(ptr %ptr, <2 x half> %val) {
-; GFX940-LABEL: flat_atomic_fadd_ret_v2f16_agent_offset:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_pk_add_f16 v0, v[0:1], v2 offset:1024 sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
-  %gep = getelementptr <2 x half>, ptr %ptr, i32 256
-  %result = atomicrmw fadd ptr %gep, <2 x half> %val syncscope("agent") seq_cst
-  ret <2 x half> %result
-}
-
-define void @flat_atomic_fadd_noret_v2f16_agent_offset(ptr %ptr, <2 x half> %val) {
-; GFX940-LABEL: flat_atomic_fadd_noret_v2f16_agent_offset:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_pk_add_f16 v[0:1], v2 offset:1024
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
-  %gep = getelementptr <2 x half>, ptr %ptr, i32 256
-  %unused = atomicrmw fadd ptr %gep, <2 x half> %val syncscope("agent") seq_cst
-  ret void
-}
-
-attributes #0 = { "denormal-fp-math-f32"="ieee,ieee" }
-
-!0 = !{}
diff --git llvm/test/CodeGen/AMDGPU/GlobalISel/fp-atomics-gfx942.ll llvm/test/CodeGen/AMDGPU/GlobalISel/fp-atomics-gfx942.ll
new file mode 100644
index 000000000000..6792612ded36
--- /dev/null
+++ llvm/test/CodeGen/AMDGPU/GlobalISel/fp-atomics-gfx942.ll
@@ -0,0 +1,132 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx942 < %s | FileCheck %s -check-prefix=GFX942
+
+define amdgpu_kernel void @flat_atomic_fadd_f32_noret_pat(ptr %ptr) {
+; GFX942-LABEL: flat_atomic_fadd_f32_noret_pat:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX942-NEXT:    v_mov_b32_e32 v2, 4.0
+; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b64_e32 v[0:1], s[0:1]
+; GFX942-NEXT:    buffer_wbl2 sc0 sc1
+; GFX942-NEXT:    flat_atomic_add_f32 v[0:1], v2 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc0 sc1
+; GFX942-NEXT:    s_endpgm
+  %ret = atomicrmw fadd ptr %ptr, float 4.0 seq_cst, !amdgpu.no.remote.memory !0
+  ret void
+}
+
+define amdgpu_kernel void @flat_atomic_fadd_f32_noret_pat_ieee(ptr %ptr) #0 {
+; GFX942-LABEL: flat_atomic_fadd_f32_noret_pat_ieee:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX942-NEXT:    v_mov_b32_e32 v2, 4.0
+; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b64_e32 v[0:1], s[0:1]
+; GFX942-NEXT:    buffer_wbl2 sc0 sc1
+; GFX942-NEXT:    flat_atomic_add_f32 v[0:1], v2 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc0 sc1
+; GFX942-NEXT:    s_endpgm
+  %ret = atomicrmw fadd ptr %ptr, float 4.0 seq_cst, !amdgpu.no.remote.memory !0
+  ret void
+}
+
+define float @flat_atomic_fadd_f32_rtn_pat(ptr %ptr, float %data) {
+; GFX942-LABEL: flat_atomic_fadd_f32_rtn_pat:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v2, 4.0
+; GFX942-NEXT:    buffer_wbl2 sc0 sc1
+; GFX942-NEXT:    flat_atomic_add_f32 v0, v[0:1], v2 sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc0 sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
+  %ret = atomicrmw fadd ptr %ptr, float 4.0 seq_cst, !amdgpu.no.remote.memory !0
+  ret float %ret
+}
+
+define <2 x half> @local_atomic_fadd_ret_v2f16_offset(ptr addrspace(3) %ptr, <2 x half> %val) {
+; GFX942-LABEL: local_atomic_fadd_ret_v2f16_offset:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    ds_pk_add_rtn_f16 v0, v0, v1 offset:65532
+; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
+  %gep = getelementptr <2 x half>, ptr addrspace(3) %ptr, i32 16383
+  %result = atomicrmw fadd ptr addrspace(3) %gep, <2 x half> %val seq_cst
+  ret <2 x half> %result
+}
+
+define void @local_atomic_fadd_noret_v2f16_offset(ptr addrspace(3) %ptr, <2 x half> %val) {
+; GFX942-LABEL: local_atomic_fadd_noret_v2f16_offset:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    ds_pk_add_f16 v0, v1 offset:65532
+; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
+  %gep = getelementptr <2 x half>, ptr addrspace(3) %ptr, i32 16383
+  %unused = atomicrmw fadd ptr addrspace(3) %gep, <2 x half> %val seq_cst
+  ret void
+}
+
+define <2 x half> @global_atomic_fadd_ret_v2f16_agent_offset(ptr addrspace(1) %ptr, <2 x half> %val) {
+; GFX942-LABEL: global_atomic_fadd_ret_v2f16_agent_offset:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    global_atomic_pk_add_f16 v0, v[0:1], v2, off offset:1024 sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
+  %gep = getelementptr <2 x half>, ptr addrspace(1) %ptr, i32 256
+  %result = atomicrmw fadd ptr addrspace(1) %gep, <2 x half> %val syncscope("agent") seq_cst
+  ret <2 x half> %result
+}
+
+define void @global_atomic_fadd_noret_v2f16_agent_offset(ptr addrspace(1) %ptr, <2 x half> %val) {
+; GFX942-LABEL: global_atomic_fadd_noret_v2f16_agent_offset:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    global_atomic_pk_add_f16 v[0:1], v2, off offset:1024
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
+  %gep = getelementptr <2 x half>, ptr addrspace(1) %ptr, i32 256
+  %unused = atomicrmw fadd ptr addrspace(1) %gep, <2 x half> %val syncscope("agent") seq_cst
+  ret void
+}
+
+define <2 x half> @flat_atomic_fadd_ret_v2f16_agent_offset(ptr %ptr, <2 x half> %val) {
+; GFX942-LABEL: flat_atomic_fadd_ret_v2f16_agent_offset:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_pk_add_f16 v0, v[0:1], v2 offset:1024 sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
+  %gep = getelementptr <2 x half>, ptr %ptr, i32 256
+  %result = atomicrmw fadd ptr %gep, <2 x half> %val syncscope("agent") seq_cst
+  ret <2 x half> %result
+}
+
+define void @flat_atomic_fadd_noret_v2f16_agent_offset(ptr %ptr, <2 x half> %val) {
+; GFX942-LABEL: flat_atomic_fadd_noret_v2f16_agent_offset:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_pk_add_f16 v[0:1], v2 offset:1024
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
+  %gep = getelementptr <2 x half>, ptr %ptr, i32 256
+  %unused = atomicrmw fadd ptr %gep, <2 x half> %val syncscope("agent") seq_cst
+  ret void
+}
+
+attributes #0 = { "denormal-fp-math-f32"="ieee,ieee" }
+
+!0 = !{}
diff --git llvm/test/CodeGen/AMDGPU/GlobalISel/fp64-atomics-gfx90a.ll llvm/test/CodeGen/AMDGPU/GlobalISel/fp64-atomics-gfx90a.ll
index 8ff2f59964ab..7c54a8abe989 100644
--- llvm/test/CodeGen/AMDGPU/GlobalISel/fp64-atomics-gfx90a.ll
+++ llvm/test/CodeGen/AMDGPU/GlobalISel/fp64-atomics-gfx90a.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck %s -check-prefix=GFX90A
-; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx940 -verify-machineinstrs < %s | FileCheck %s -check-prefix=GFX940
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx942 -verify-machineinstrs < %s | FileCheck %s -check-prefix=GFX942
 
 declare double @llvm.amdgcn.struct.buffer.atomic.fadd.f64(double, <4 x i32>, i32, i32, i32, i32 immarg)
 declare double @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f64(double, ptr addrspace(8), i32, i32, i32, i32 immarg)
@@ -27,16 +27,16 @@ define amdgpu_kernel void @raw_buffer_atomic_add_noret_f64(<4 x i32> %rsrc, doub
 ; GFX90A-NEXT:    buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 offen
 ; GFX90A-NEXT:    s_endpgm
 ;
-; GFX940-LABEL: raw_buffer_atomic_add_noret_f64:
-; GFX940:       ; %bb.0: ; %main_body
-; GFX940-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
-; GFX940-NEXT:    s_load_dword s8, s[4:5], 0x3c
-; GFX940-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b64_e32 v[0:1], s[6:7]
-; GFX940-NEXT:    v_mov_b32_e32 v2, s8
-; GFX940-NEXT:    buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 offen
-; GFX940-NEXT:    s_endpgm
+; GFX942-LABEL: raw_buffer_atomic_add_noret_f64:
+; GFX942:       ; %bb.0: ; %main_body
+; GFX942-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
+; GFX942-NEXT:    s_load_dword s8, s[4:5], 0x3c
+; GFX942-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b64_e32 v[0:1], s[6:7]
+; GFX942-NEXT:    v_mov_b32_e32 v2, s8
+; GFX942-NEXT:    buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 offen
+; GFX942-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.raw.buffer.atomic.fadd.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0)
   ret void
@@ -50,12 +50,12 @@ define amdgpu_ps void @raw_buffer_atomic_add_rtn_f64(<4 x i32> inreg %rsrc, doub
 ; GFX90A-NEXT:    flat_store_dwordx2 v[0:1], v[0:1]
 ; GFX90A-NEXT:    s_endpgm
 ;
-; GFX940-LABEL: raw_buffer_atomic_add_rtn_f64:
-; GFX940:       ; %bb.0: ; %main_body
-; GFX940-NEXT:    buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 offen sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    flat_store_dwordx2 v[0:1], v[0:1] sc0 sc1
-; GFX940-NEXT:    s_endpgm
+; GFX942-LABEL: raw_buffer_atomic_add_rtn_f64:
+; GFX942:       ; %bb.0: ; %main_body
+; GFX942-NEXT:    buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 offen sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    flat_store_dwordx2 v[0:1], v[0:1] sc0 sc1
+; GFX942-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.raw.buffer.atomic.fadd.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0)
   store double %ret, ptr undef
@@ -78,20 +78,20 @@ define amdgpu_kernel void @raw_buffer_atomic_add_rtn_f64_off4_slc(<4 x i32> %rsr
 ; GFX90A-NEXT:    global_store_dwordx2 v2, v[0:1], s[8:9]
 ; GFX90A-NEXT:    s_endpgm
 ;
-; GFX940-LABEL: raw_buffer_atomic_add_rtn_f64_off4_slc:
-; GFX940:       ; %bb.0: ; %main_body
-; GFX940-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
-; GFX940-NEXT:    s_load_dword s10, s[4:5], 0x3c
-; GFX940-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX940-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x44
-; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b64_e32 v[0:1], s[6:7]
-; GFX940-NEXT:    v_mov_b32_e32 v2, s10
-; GFX940-NEXT:    buffer_atomic_add_f64 v[0:1], v2, s[0:3], 4 offen sc0 nt
-; GFX940-NEXT:    v_mov_b32_e32 v2, 0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    global_store_dwordx2 v2, v[0:1], s[8:9] sc0 sc1
-; GFX940-NEXT:    s_endpgm
+; GFX942-LABEL: raw_buffer_atomic_add_rtn_f64_off4_slc:
+; GFX942:       ; %bb.0: ; %main_body
+; GFX942-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
+; GFX942-NEXT:    s_load_dword s10, s[4:5], 0x3c
+; GFX942-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX942-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x44
+; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b64_e32 v[0:1], s[6:7]
+; GFX942-NEXT:    v_mov_b32_e32 v2, s10
+; GFX942-NEXT:    buffer_atomic_add_f64 v[0:1], v2, s[0:3], 4 offen sc0 nt
+; GFX942-NEXT:    v_mov_b32_e32 v2, 0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    global_store_dwordx2 v2, v[0:1], s[8:9] sc0 sc1
+; GFX942-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.raw.buffer.atomic.fadd.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i32 2)
   store double %ret, ptr addrspace(1) %out, align 8
@@ -110,16 +110,16 @@ define amdgpu_kernel void @raw_ptr_buffer_atomic_add_noret_f64(ptr addrspace(8)
 ; GFX90A-NEXT:    buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 offen
 ; GFX90A-NEXT:    s_endpgm
 ;
-; GFX940-LABEL: raw_ptr_buffer_atomic_add_noret_f64:
-; GFX940:       ; %bb.0: ; %main_body
-; GFX940-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
-; GFX940-NEXT:    s_load_dword s8, s[4:5], 0x3c
-; GFX940-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b64_e32 v[0:1], s[6:7]
-; GFX940-NEXT:    v_mov_b32_e32 v2, s8
-; GFX940-NEXT:    buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 offen
-; GFX940-NEXT:    s_endpgm
+; GFX942-LABEL: raw_ptr_buffer_atomic_add_noret_f64:
+; GFX942:       ; %bb.0: ; %main_body
+; GFX942-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
+; GFX942-NEXT:    s_load_dword s8, s[4:5], 0x3c
+; GFX942-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b64_e32 v[0:1], s[6:7]
+; GFX942-NEXT:    v_mov_b32_e32 v2, s8
+; GFX942-NEXT:    buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 offen
+; GFX942-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0)
   ret void
@@ -133,12 +133,12 @@ define amdgpu_ps void @raw_ptr_buffer_atomic_add_rtn_f64(ptr addrspace(8) inreg
 ; GFX90A-NEXT:    flat_store_dwordx2 v[0:1], v[0:1]
 ; GFX90A-NEXT:    s_endpgm
 ;
-; GFX940-LABEL: raw_ptr_buffer_atomic_add_rtn_f64:
-; GFX940:       ; %bb.0: ; %main_body
-; GFX940-NEXT:    buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 offen sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    flat_store_dwordx2 v[0:1], v[0:1] sc0 sc1
-; GFX940-NEXT:    s_endpgm
+; GFX942-LABEL: raw_ptr_buffer_atomic_add_rtn_f64:
+; GFX942:       ; %bb.0: ; %main_body
+; GFX942-NEXT:    buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 offen sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    flat_store_dwordx2 v[0:1], v[0:1] sc0 sc1
+; GFX942-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0)
   store double %ret, ptr undef
@@ -161,20 +161,20 @@ define amdgpu_kernel void @raw_ptr_buffer_atomic_add_rtn_f64_off4_slc(ptr addrsp
 ; GFX90A-NEXT:    global_store_dwordx2 v2, v[0:1], s[8:9]
 ; GFX90A-NEXT:    s_endpgm
 ;
-; GFX940-LABEL: raw_ptr_buffer_atomic_add_rtn_f64_off4_slc:
-; GFX940:       ; %bb.0: ; %main_body
-; GFX940-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
-; GFX940-NEXT:    s_load_dword s10, s[4:5], 0x3c
-; GFX940-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX940-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x44
-; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b64_e32 v[0:1], s[6:7]
-; GFX940-NEXT:    v_mov_b32_e32 v2, s10
-; GFX940-NEXT:    buffer_atomic_add_f64 v[0:1], v2, s[0:3], 4 offen sc0 nt
-; GFX940-NEXT:    v_mov_b32_e32 v2, 0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    global_store_dwordx2 v2, v[0:1], s[8:9] sc0 sc1
-; GFX940-NEXT:    s_endpgm
+; GFX942-LABEL: raw_ptr_buffer_atomic_add_rtn_f64_off4_slc:
+; GFX942:       ; %bb.0: ; %main_body
+; GFX942-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
+; GFX942-NEXT:    s_load_dword s10, s[4:5], 0x3c
+; GFX942-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX942-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x44
+; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b64_e32 v[0:1], s[6:7]
+; GFX942-NEXT:    v_mov_b32_e32 v2, s10
+; GFX942-NEXT:    buffer_atomic_add_f64 v[0:1], v2, s[0:3], 4 offen sc0 nt
+; GFX942-NEXT:    v_mov_b32_e32 v2, 0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    global_store_dwordx2 v2, v[0:1], s[8:9] sc0 sc1
+; GFX942-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 4, i32 2)
   store double %ret, ptr addrspace(1) %out, align 8
@@ -193,16 +193,16 @@ define amdgpu_kernel void @struct_buffer_atomic_add_noret_f64(<4 x i32> %rsrc, d
 ; GFX90A-NEXT:    buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 idxen
 ; GFX90A-NEXT:    s_endpgm
 ;
-; GFX940-LABEL: struct_buffer_atomic_add_noret_f64:
-; GFX940:       ; %bb.0: ; %main_body
-; GFX940-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
-; GFX940-NEXT:    s_load_dword s8, s[4:5], 0x3c
-; GFX940-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b64_e32 v[0:1], s[6:7]
-; GFX940-NEXT:    v_mov_b32_e32 v2, s8
-; GFX940-NEXT:    buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 idxen
-; GFX940-NEXT:    s_endpgm
+; GFX942-LABEL: struct_buffer_atomic_add_noret_f64:
+; GFX942:       ; %bb.0: ; %main_body
+; GFX942-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
+; GFX942-NEXT:    s_load_dword s8, s[4:5], 0x3c
+; GFX942-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b64_e32 v[0:1], s[6:7]
+; GFX942-NEXT:    v_mov_b32_e32 v2, s8
+; GFX942-NEXT:    buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 idxen
+; GFX942-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.struct.buffer.atomic.fadd.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 0)
   ret void
@@ -216,12 +216,12 @@ define amdgpu_ps void @struct_buffer_atomic_add_rtn_f64(<4 x i32> inreg %rsrc, d
 ; GFX90A-NEXT:    flat_store_dwordx2 v[0:1], v[0:1]
 ; GFX90A-NEXT:    s_endpgm
 ;
-; GFX940-LABEL: struct_buffer_atomic_add_rtn_f64:
-; GFX940:       ; %bb.0: ; %main_body
-; GFX940-NEXT:    buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 idxen sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    flat_store_dwordx2 v[0:1], v[0:1] sc0 sc1
-; GFX940-NEXT:    s_endpgm
+; GFX942-LABEL: struct_buffer_atomic_add_rtn_f64:
+; GFX942:       ; %bb.0: ; %main_body
+; GFX942-NEXT:    buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 idxen sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    flat_store_dwordx2 v[0:1], v[0:1] sc0 sc1
+; GFX942-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.struct.buffer.atomic.fadd.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 0)
   store double %ret, ptr undef
@@ -244,20 +244,20 @@ define amdgpu_kernel void @struct_buffer_atomic_add_rtn_f64_off4_slc(<4 x i32> %
 ; GFX90A-NEXT:    global_store_dwordx2 v2, v[0:1], s[8:9]
 ; GFX90A-NEXT:    s_endpgm
 ;
-; GFX940-LABEL: struct_buffer_atomic_add_rtn_f64_off4_slc:
-; GFX940:       ; %bb.0: ; %main_body
-; GFX940-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
-; GFX940-NEXT:    s_load_dword s10, s[4:5], 0x3c
-; GFX940-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX940-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x44
-; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b64_e32 v[0:1], s[6:7]
-; GFX940-NEXT:    v_mov_b32_e32 v2, s10
-; GFX940-NEXT:    buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 idxen offset:4 sc0 nt
-; GFX940-NEXT:    v_mov_b32_e32 v2, 0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    global_store_dwordx2 v2, v[0:1], s[8:9] sc0 sc1
-; GFX940-NEXT:    s_endpgm
+; GFX942-LABEL: struct_buffer_atomic_add_rtn_f64_off4_slc:
+; GFX942:       ; %bb.0: ; %main_body
+; GFX942-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
+; GFX942-NEXT:    s_load_dword s10, s[4:5], 0x3c
+; GFX942-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX942-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x44
+; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b64_e32 v[0:1], s[6:7]
+; GFX942-NEXT:    v_mov_b32_e32 v2, s10
+; GFX942-NEXT:    buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 idxen offset:4 sc0 nt
+; GFX942-NEXT:    v_mov_b32_e32 v2, 0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    global_store_dwordx2 v2, v[0:1], s[8:9] sc0 sc1
+; GFX942-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.struct.buffer.atomic.fadd.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i32 0, i32 2)
   store double %ret, ptr addrspace(1) %out, align 8
@@ -276,16 +276,16 @@ define amdgpu_kernel void @struct_ptr_buffer_atomic_add_noret_f64(ptr addrspace(
 ; GFX90A-NEXT:    buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 idxen
 ; GFX90A-NEXT:    s_endpgm
 ;
-; GFX940-LABEL: struct_ptr_buffer_atomic_add_noret_f64:
-; GFX940:       ; %bb.0: ; %main_body
-; GFX940-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
-; GFX940-NEXT:    s_load_dword s8, s[4:5], 0x3c
-; GFX940-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b64_e32 v[0:1], s[6:7]
-; GFX940-NEXT:    v_mov_b32_e32 v2, s8
-; GFX940-NEXT:    buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 idxen
-; GFX940-NEXT:    s_endpgm
+; GFX942-LABEL: struct_ptr_buffer_atomic_add_noret_f64:
+; GFX942:       ; %bb.0: ; %main_body
+; GFX942-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
+; GFX942-NEXT:    s_load_dword s8, s[4:5], 0x3c
+; GFX942-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b64_e32 v[0:1], s[6:7]
+; GFX942-NEXT:    v_mov_b32_e32 v2, s8
+; GFX942-NEXT:    buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 idxen
+; GFX942-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0, i32 0)
   ret void
@@ -299,12 +299,12 @@ define amdgpu_ps void @struct_ptr_buffer_atomic_add_rtn_f64(ptr addrspace(8) inr
 ; GFX90A-NEXT:    flat_store_dwordx2 v[0:1], v[0:1]
 ; GFX90A-NEXT:    s_endpgm
 ;
-; GFX940-LABEL: struct_ptr_buffer_atomic_add_rtn_f64:
-; GFX940:       ; %bb.0: ; %main_body
-; GFX940-NEXT:    buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 idxen sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    flat_store_dwordx2 v[0:1], v[0:1] sc0 sc1
-; GFX940-NEXT:    s_endpgm
+; GFX942-LABEL: struct_ptr_buffer_atomic_add_rtn_f64:
+; GFX942:       ; %bb.0: ; %main_body
+; GFX942-NEXT:    buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 idxen sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    flat_store_dwordx2 v[0:1], v[0:1] sc0 sc1
+; GFX942-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0, i32 0)
   store double %ret, ptr undef
@@ -327,20 +327,20 @@ define amdgpu_kernel void @struct_ptr_buffer_atomic_add_rtn_f64_off4_slc(ptr add
 ; GFX90A-NEXT:    global_store_dwordx2 v2, v[0:1], s[8:9]
 ; GFX90A-NEXT:    s_endpgm
 ;
-; GFX940-LABEL: struct_ptr_buffer_atomic_add_rtn_f64_off4_slc:
-; GFX940:       ; %bb.0: ; %main_body
-; GFX940-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
-; GFX940-NEXT:    s_load_dword s10, s[4:5], 0x3c
-; GFX940-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX940-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x44
-; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b64_e32 v[0:1], s[6:7]
-; GFX940-NEXT:    v_mov_b32_e32 v2, s10
-; GFX940-NEXT:    buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 idxen offset:4 sc0 nt
-; GFX940-NEXT:    v_mov_b32_e32 v2, 0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    global_store_dwordx2 v2, v[0:1], s[8:9] sc0 sc1
-; GFX940-NEXT:    s_endpgm
+; GFX942-LABEL: struct_ptr_buffer_atomic_add_rtn_f64_off4_slc:
+; GFX942:       ; %bb.0: ; %main_body
+; GFX942-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
+; GFX942-NEXT:    s_load_dword s10, s[4:5], 0x3c
+; GFX942-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX942-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x44
+; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b64_e32 v[0:1], s[6:7]
+; GFX942-NEXT:    v_mov_b32_e32 v2, s10
+; GFX942-NEXT:    buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 idxen offset:4 sc0 nt
+; GFX942-NEXT:    v_mov_b32_e32 v2, 0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    global_store_dwordx2 v2, v[0:1], s[8:9] sc0 sc1
+; GFX942-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 4, i32 0, i32 2)
   store double %ret, ptr addrspace(1) %out, align 8
@@ -359,16 +359,16 @@ define amdgpu_kernel void @raw_buffer_atomic_min_noret_f64(<4 x i32> %rsrc, doub
 ; GFX90A-NEXT:    buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 offen
 ; GFX90A-NEXT:    s_endpgm
 ;
-; GFX940-LABEL: raw_buffer_atomic_min_noret_f64:
-; GFX940:       ; %bb.0: ; %main_body
-; GFX940-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
-; GFX940-NEXT:    s_load_dword s8, s[4:5], 0x3c
-; GFX940-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b64_e32 v[0:1], s[6:7]
-; GFX940-NEXT:    v_mov_b32_e32 v2, s8
-; GFX940-NEXT:    buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 offen
-; GFX940-NEXT:    s_endpgm
+; GFX942-LABEL: raw_buffer_atomic_min_noret_f64:
+; GFX942:       ; %bb.0: ; %main_body
+; GFX942-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
+; GFX942-NEXT:    s_load_dword s8, s[4:5], 0x3c
+; GFX942-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b64_e32 v[0:1], s[6:7]
+; GFX942-NEXT:    v_mov_b32_e32 v2, s8
+; GFX942-NEXT:    buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 offen
+; GFX942-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.raw.buffer.atomic.fmin.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0)
   ret void
@@ -382,12 +382,12 @@ define amdgpu_ps void @raw_buffer_atomic_min_rtn_f64(<4 x i32> inreg %rsrc, doub
 ; GFX90A-NEXT:    flat_store_dwordx2 v[0:1], v[0:1]
 ; GFX90A-NEXT:    s_endpgm
 ;
-; GFX940-LABEL: raw_buffer_atomic_min_rtn_f64:
-; GFX940:       ; %bb.0: ; %main_body
-; GFX940-NEXT:    buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 offen sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    flat_store_dwordx2 v[0:1], v[0:1] sc0 sc1
-; GFX940-NEXT:    s_endpgm
+; GFX942-LABEL: raw_buffer_atomic_min_rtn_f64:
+; GFX942:       ; %bb.0: ; %main_body
+; GFX942-NEXT:    buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 offen sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    flat_store_dwordx2 v[0:1], v[0:1] sc0 sc1
+; GFX942-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.raw.buffer.atomic.fmin.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0)
   store double %ret, ptr undef
@@ -410,20 +410,20 @@ define amdgpu_kernel void @raw_buffer_atomic_min_rtn_f64_off4_slc(<4 x i32> %rsr
 ; GFX90A-NEXT:    global_store_dwordx2 v2, v[0:1], s[8:9]
 ; GFX90A-NEXT:    s_endpgm
 ;
-; GFX940-LABEL: raw_buffer_atomic_min_rtn_f64_off4_slc:
-; GFX940:       ; %bb.0: ; %main_body
-; GFX940-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
-; GFX940-NEXT:    s_load_dword s10, s[4:5], 0x3c
-; GFX940-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX940-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x44
-; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b64_e32 v[0:1], s[6:7]
-; GFX940-NEXT:    v_mov_b32_e32 v2, s10
-; GFX940-NEXT:    buffer_atomic_min_f64 v[0:1], v2, s[0:3], 4 offen sc0 nt
-; GFX940-NEXT:    v_mov_b32_e32 v2, 0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    global_store_dwordx2 v2, v[0:1], s[8:9] sc0 sc1
-; GFX940-NEXT:    s_endpgm
+; GFX942-LABEL: raw_buffer_atomic_min_rtn_f64_off4_slc:
+; GFX942:       ; %bb.0: ; %main_body
+; GFX942-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
+; GFX942-NEXT:    s_load_dword s10, s[4:5], 0x3c
+; GFX942-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX942-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x44
+; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b64_e32 v[0:1], s[6:7]
+; GFX942-NEXT:    v_mov_b32_e32 v2, s10
+; GFX942-NEXT:    buffer_atomic_min_f64 v[0:1], v2, s[0:3], 4 offen sc0 nt
+; GFX942-NEXT:    v_mov_b32_e32 v2, 0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    global_store_dwordx2 v2, v[0:1], s[8:9] sc0 sc1
+; GFX942-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.raw.buffer.atomic.fmin.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i32 2)
   store double %ret, ptr addrspace(1) %out, align 8
@@ -442,16 +442,16 @@ define amdgpu_kernel void @raw_ptr_buffer_atomic_min_noret_f64(ptr addrspace(8)
 ; GFX90A-NEXT:    buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 offen
 ; GFX90A-NEXT:    s_endpgm
 ;
-; GFX940-LABEL: raw_ptr_buffer_atomic_min_noret_f64:
-; GFX940:       ; %bb.0: ; %main_body
-; GFX940-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
-; GFX940-NEXT:    s_load_dword s8, s[4:5], 0x3c
-; GFX940-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b64_e32 v[0:1], s[6:7]
-; GFX940-NEXT:    v_mov_b32_e32 v2, s8
-; GFX940-NEXT:    buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 offen
-; GFX940-NEXT:    s_endpgm
+; GFX942-LABEL: raw_ptr_buffer_atomic_min_noret_f64:
+; GFX942:       ; %bb.0: ; %main_body
+; GFX942-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
+; GFX942-NEXT:    s_load_dword s8, s[4:5], 0x3c
+; GFX942-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b64_e32 v[0:1], s[6:7]
+; GFX942-NEXT:    v_mov_b32_e32 v2, s8
+; GFX942-NEXT:    buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 offen
+; GFX942-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fmin.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0)
   ret void
@@ -465,12 +465,12 @@ define amdgpu_ps void @raw_ptr_buffer_atomic_min_rtn_f64(ptr addrspace(8) inreg
 ; GFX90A-NEXT:    flat_store_dwordx2 v[0:1], v[0:1]
 ; GFX90A-NEXT:    s_endpgm
 ;
-; GFX940-LABEL: raw_ptr_buffer_atomic_min_rtn_f64:
-; GFX940:       ; %bb.0: ; %main_body
-; GFX940-NEXT:    buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 offen sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    flat_store_dwordx2 v[0:1], v[0:1] sc0 sc1
-; GFX940-NEXT:    s_endpgm
+; GFX942-LABEL: raw_ptr_buffer_atomic_min_rtn_f64:
+; GFX942:       ; %bb.0: ; %main_body
+; GFX942-NEXT:    buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 offen sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    flat_store_dwordx2 v[0:1], v[0:1] sc0 sc1
+; GFX942-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fmin.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0)
   store double %ret, ptr undef
@@ -493,20 +493,20 @@ define amdgpu_kernel void @raw_ptr_buffer_atomic_min_rtn_f64_off4_slc(ptr addrsp
 ; GFX90A-NEXT:    global_store_dwordx2 v2, v[0:1], s[8:9]
 ; GFX90A-NEXT:    s_endpgm
 ;
-; GFX940-LABEL: raw_ptr_buffer_atomic_min_rtn_f64_off4_slc:
-; GFX940:       ; %bb.0: ; %main_body
-; GFX940-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
-; GFX940-NEXT:    s_load_dword s10, s[4:5], 0x3c
-; GFX940-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX940-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x44
-; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b64_e32 v[0:1], s[6:7]
-; GFX940-NEXT:    v_mov_b32_e32 v2, s10
-; GFX940-NEXT:    buffer_atomic_min_f64 v[0:1], v2, s[0:3], 4 offen sc0 nt
-; GFX940-NEXT:    v_mov_b32_e32 v2, 0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    global_store_dwordx2 v2, v[0:1], s[8:9] sc0 sc1
-; GFX940-NEXT:    s_endpgm
+; GFX942-LABEL: raw_ptr_buffer_atomic_min_rtn_f64_off4_slc:
+; GFX942:       ; %bb.0: ; %main_body
+; GFX942-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
+; GFX942-NEXT:    s_load_dword s10, s[4:5], 0x3c
+; GFX942-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX942-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x44
+; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b64_e32 v[0:1], s[6:7]
+; GFX942-NEXT:    v_mov_b32_e32 v2, s10
+; GFX942-NEXT:    buffer_atomic_min_f64 v[0:1], v2, s[0:3], 4 offen sc0 nt
+; GFX942-NEXT:    v_mov_b32_e32 v2, 0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    global_store_dwordx2 v2, v[0:1], s[8:9] sc0 sc1
+; GFX942-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fmin.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 4, i32 2)
   store double %ret, ptr addrspace(1) %out, align 8
@@ -525,16 +525,16 @@ define amdgpu_kernel void @struct_buffer_atomic_min_noret_f64(<4 x i32> %rsrc, d
 ; GFX90A-NEXT:    buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 idxen
 ; GFX90A-NEXT:    s_endpgm
 ;
-; GFX940-LABEL: struct_buffer_atomic_min_noret_f64:
-; GFX940:       ; %bb.0: ; %main_body
-; GFX940-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
-; GFX940-NEXT:    s_load_dword s8, s[4:5], 0x3c
-; GFX940-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b64_e32 v[0:1], s[6:7]
-; GFX940-NEXT:    v_mov_b32_e32 v2, s8
-; GFX940-NEXT:    buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 idxen
-; GFX940-NEXT:    s_endpgm
+; GFX942-LABEL: struct_buffer_atomic_min_noret_f64:
+; GFX942:       ; %bb.0: ; %main_body
+; GFX942-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
+; GFX942-NEXT:    s_load_dword s8, s[4:5], 0x3c
+; GFX942-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b64_e32 v[0:1], s[6:7]
+; GFX942-NEXT:    v_mov_b32_e32 v2, s8
+; GFX942-NEXT:    buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 idxen
+; GFX942-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.struct.buffer.atomic.fmin.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 0)
   ret void
@@ -548,12 +548,12 @@ define amdgpu_ps void @struct_buffer_atomic_min_rtn_f64(<4 x i32> inreg %rsrc, d
 ; GFX90A-NEXT:    flat_store_dwordx2 v[0:1], v[0:1]
 ; GFX90A-NEXT:    s_endpgm
 ;
-; GFX940-LABEL: struct_buffer_atomic_min_rtn_f64:
-; GFX940:       ; %bb.0: ; %main_body
-; GFX940-NEXT:    buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 idxen sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    flat_store_dwordx2 v[0:1], v[0:1] sc0 sc1
-; GFX940-NEXT:    s_endpgm
+; GFX942-LABEL: struct_buffer_atomic_min_rtn_f64:
+; GFX942:       ; %bb.0: ; %main_body
+; GFX942-NEXT:    buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 idxen sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    flat_store_dwordx2 v[0:1], v[0:1] sc0 sc1
+; GFX942-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.struct.buffer.atomic.fmin.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 0)
   store double %ret, ptr undef
@@ -576,20 +576,20 @@ define amdgpu_kernel void @struct_buffer_atomic_min_rtn_f64_off4_slc(<4 x i32> %
 ; GFX90A-NEXT:    global_store_dwordx2 v2, v[0:1], s[8:9]
 ; GFX90A-NEXT:    s_endpgm
 ;
-; GFX940-LABEL: struct_buffer_atomic_min_rtn_f64_off4_slc:
-; GFX940:       ; %bb.0: ; %main_body
-; GFX940-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
-; GFX940-NEXT:    s_load_dword s10, s[4:5], 0x3c
-; GFX940-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX940-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x44
-; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b64_e32 v[0:1], s[6:7]
-; GFX940-NEXT:    v_mov_b32_e32 v2, s10
-; GFX940-NEXT:    buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 idxen offset:4 sc0 nt
-; GFX940-NEXT:    v_mov_b32_e32 v2, 0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    global_store_dwordx2 v2, v[0:1], s[8:9] sc0 sc1
-; GFX940-NEXT:    s_endpgm
+; GFX942-LABEL: struct_buffer_atomic_min_rtn_f64_off4_slc:
+; GFX942:       ; %bb.0: ; %main_body
+; GFX942-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
+; GFX942-NEXT:    s_load_dword s10, s[4:5], 0x3c
+; GFX942-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX942-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x44
+; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b64_e32 v[0:1], s[6:7]
+; GFX942-NEXT:    v_mov_b32_e32 v2, s10
+; GFX942-NEXT:    buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 idxen offset:4 sc0 nt
+; GFX942-NEXT:    v_mov_b32_e32 v2, 0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    global_store_dwordx2 v2, v[0:1], s[8:9] sc0 sc1
+; GFX942-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.struct.buffer.atomic.fmin.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i32 0, i32 2)
   store double %ret, ptr addrspace(1) %out, align 8
@@ -608,16 +608,16 @@ define amdgpu_kernel void @struct_ptr_buffer_atomic_min_noret_f64(ptr addrspace(
 ; GFX90A-NEXT:    buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 idxen
 ; GFX90A-NEXT:    s_endpgm
 ;
-; GFX940-LABEL: struct_ptr_buffer_atomic_min_noret_f64:
-; GFX940:       ; %bb.0: ; %main_body
-; GFX940-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
-; GFX940-NEXT:    s_load_dword s8, s[4:5], 0x3c
-; GFX940-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b64_e32 v[0:1], s[6:7]
-; GFX940-NEXT:    v_mov_b32_e32 v2, s8
-; GFX940-NEXT:    buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 idxen
-; GFX940-NEXT:    s_endpgm
+; GFX942-LABEL: struct_ptr_buffer_atomic_min_noret_f64:
+; GFX942:       ; %bb.0: ; %main_body
+; GFX942-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
+; GFX942-NEXT:    s_load_dword s8, s[4:5], 0x3c
+; GFX942-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b64_e32 v[0:1], s[6:7]
+; GFX942-NEXT:    v_mov_b32_e32 v2, s8
+; GFX942-NEXT:    buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 idxen
+; GFX942-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fmin.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0, i32 0)
   ret void
@@ -631,12 +631,12 @@ define amdgpu_ps void @struct_ptr_buffer_atomic_min_rtn_f64(ptr addrspace(8) inr
 ; GFX90A-NEXT:    flat_store_dwordx2 v[0:1], v[0:1]
 ; GFX90A-NEXT:    s_endpgm
 ;
-; GFX940-LABEL: struct_ptr_buffer_atomic_min_rtn_f64:
-; GFX940:       ; %bb.0: ; %main_body
-; GFX940-NEXT:    buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 idxen sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    flat_store_dwordx2 v[0:1], v[0:1] sc0 sc1
-; GFX940-NEXT:    s_endpgm
+; GFX942-LABEL: struct_ptr_buffer_atomic_min_rtn_f64:
+; GFX942:       ; %bb.0: ; %main_body
+; GFX942-NEXT:    buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 idxen sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    flat_store_dwordx2 v[0:1], v[0:1] sc0 sc1
+; GFX942-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fmin.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0, i32 0)
   store double %ret, ptr undef
@@ -659,20 +659,20 @@ define amdgpu_kernel void @struct_ptr_buffer_atomic_min_rtn_f64_off4_slc(ptr add
 ; GFX90A-NEXT:    global_store_dwordx2 v2, v[0:1], s[8:9]
 ; GFX90A-NEXT:    s_endpgm
 ;
-; GFX940-LABEL: struct_ptr_buffer_atomic_min_rtn_f64_off4_slc:
-; GFX940:       ; %bb.0: ; %main_body
-; GFX940-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
-; GFX940-NEXT:    s_load_dword s10, s[4:5], 0x3c
-; GFX940-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX940-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x44
-; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b64_e32 v[0:1], s[6:7]
-; GFX940-NEXT:    v_mov_b32_e32 v2, s10
-; GFX940-NEXT:    buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 idxen offset:4 sc0 nt
-; GFX940-NEXT:    v_mov_b32_e32 v2, 0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    global_store_dwordx2 v2, v[0:1], s[8:9] sc0 sc1
-; GFX940-NEXT:    s_endpgm
+; GFX942-LABEL: struct_ptr_buffer_atomic_min_rtn_f64_off4_slc:
+; GFX942:       ; %bb.0: ; %main_body
+; GFX942-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
+; GFX942-NEXT:    s_load_dword s10, s[4:5], 0x3c
+; GFX942-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX942-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x44
+; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b64_e32 v[0:1], s[6:7]
+; GFX942-NEXT:    v_mov_b32_e32 v2, s10
+; GFX942-NEXT:    buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 idxen offset:4 sc0 nt
+; GFX942-NEXT:    v_mov_b32_e32 v2, 0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    global_store_dwordx2 v2, v[0:1], s[8:9] sc0 sc1
+; GFX942-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fmin.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 4, i32 0, i32 2)
   store double %ret, ptr addrspace(1) %out, align 8
@@ -691,16 +691,16 @@ define amdgpu_kernel void @raw_buffer_atomic_max_noret_f64(<4 x i32> %rsrc, doub
 ; GFX90A-NEXT:    buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 offen
 ; GFX90A-NEXT:    s_endpgm
 ;
-; GFX940-LABEL: raw_buffer_atomic_max_noret_f64:
-; GFX940:       ; %bb.0: ; %main_body
-; GFX940-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
-; GFX940-NEXT:    s_load_dword s8, s[4:5], 0x3c
-; GFX940-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b64_e32 v[0:1], s[6:7]
-; GFX940-NEXT:    v_mov_b32_e32 v2, s8
-; GFX940-NEXT:    buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 offen
-; GFX940-NEXT:    s_endpgm
+; GFX942-LABEL: raw_buffer_atomic_max_noret_f64:
+; GFX942:       ; %bb.0: ; %main_body
+; GFX942-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
+; GFX942-NEXT:    s_load_dword s8, s[4:5], 0x3c
+; GFX942-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b64_e32 v[0:1], s[6:7]
+; GFX942-NEXT:    v_mov_b32_e32 v2, s8
+; GFX942-NEXT:    buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 offen
+; GFX942-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.raw.buffer.atomic.fmax.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0)
   ret void
@@ -714,12 +714,12 @@ define amdgpu_ps void @raw_buffer_atomic_max_rtn_f64(<4 x i32> inreg %rsrc, doub
 ; GFX90A-NEXT:    flat_store_dwordx2 v[0:1], v[0:1]
 ; GFX90A-NEXT:    s_endpgm
 ;
-; GFX940-LABEL: raw_buffer_atomic_max_rtn_f64:
-; GFX940:       ; %bb.0: ; %main_body
-; GFX940-NEXT:    buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 offen sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    flat_store_dwordx2 v[0:1], v[0:1] sc0 sc1
-; GFX940-NEXT:    s_endpgm
+; GFX942-LABEL: raw_buffer_atomic_max_rtn_f64:
+; GFX942:       ; %bb.0: ; %main_body
+; GFX942-NEXT:    buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 offen sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    flat_store_dwordx2 v[0:1], v[0:1] sc0 sc1
+; GFX942-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.raw.buffer.atomic.fmax.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0)
   store double %ret, ptr undef
@@ -742,20 +742,20 @@ define amdgpu_kernel void @raw_buffer_atomic_max_rtn_f64_off4_slc(<4 x i32> %rsr
 ; GFX90A-NEXT:    global_store_dwordx2 v2, v[0:1], s[8:9]
 ; GFX90A-NEXT:    s_endpgm
 ;
-; GFX940-LABEL: raw_buffer_atomic_max_rtn_f64_off4_slc:
-; GFX940:       ; %bb.0: ; %main_body
-; GFX940-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
-; GFX940-NEXT:    s_load_dword s10, s[4:5], 0x3c
-; GFX940-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX940-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x44
-; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b64_e32 v[0:1], s[6:7]
-; GFX940-NEXT:    v_mov_b32_e32 v2, s10
-; GFX940-NEXT:    buffer_atomic_max_f64 v[0:1], v2, s[0:3], 4 offen sc0 nt
-; GFX940-NEXT:    v_mov_b32_e32 v2, 0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    global_store_dwordx2 v2, v[0:1], s[8:9] sc0 sc1
-; GFX940-NEXT:    s_endpgm
+; GFX942-LABEL: raw_buffer_atomic_max_rtn_f64_off4_slc:
+; GFX942:       ; %bb.0: ; %main_body
+; GFX942-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
+; GFX942-NEXT:    s_load_dword s10, s[4:5], 0x3c
+; GFX942-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX942-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x44
+; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b64_e32 v[0:1], s[6:7]
+; GFX942-NEXT:    v_mov_b32_e32 v2, s10
+; GFX942-NEXT:    buffer_atomic_max_f64 v[0:1], v2, s[0:3], 4 offen sc0 nt
+; GFX942-NEXT:    v_mov_b32_e32 v2, 0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    global_store_dwordx2 v2, v[0:1], s[8:9] sc0 sc1
+; GFX942-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.raw.buffer.atomic.fmax.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i32 2)
   store double %ret, ptr addrspace(1) %out, align 8
@@ -774,16 +774,16 @@ define amdgpu_kernel void @raw_ptr_buffer_atomic_max_noret_f64(ptr addrspace(8)
 ; GFX90A-NEXT:    buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 offen
 ; GFX90A-NEXT:    s_endpgm
 ;
-; GFX940-LABEL: raw_ptr_buffer_atomic_max_noret_f64:
-; GFX940:       ; %bb.0: ; %main_body
-; GFX940-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
-; GFX940-NEXT:    s_load_dword s8, s[4:5], 0x3c
-; GFX940-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b64_e32 v[0:1], s[6:7]
-; GFX940-NEXT:    v_mov_b32_e32 v2, s8
-; GFX940-NEXT:    buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 offen
-; GFX940-NEXT:    s_endpgm
+; GFX942-LABEL: raw_ptr_buffer_atomic_max_noret_f64:
+; GFX942:       ; %bb.0: ; %main_body
+; GFX942-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
+; GFX942-NEXT:    s_load_dword s8, s[4:5], 0x3c
+; GFX942-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b64_e32 v[0:1], s[6:7]
+; GFX942-NEXT:    v_mov_b32_e32 v2, s8
+; GFX942-NEXT:    buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 offen
+; GFX942-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fmax.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0)
   ret void
@@ -797,12 +797,12 @@ define amdgpu_ps void @raw_ptr_buffer_atomic_max_rtn_f64(ptr addrspace(8) inreg
 ; GFX90A-NEXT:    flat_store_dwordx2 v[0:1], v[0:1]
 ; GFX90A-NEXT:    s_endpgm
 ;
-; GFX940-LABEL: raw_ptr_buffer_atomic_max_rtn_f64:
-; GFX940:       ; %bb.0: ; %main_body
-; GFX940-NEXT:    buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 offen sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    flat_store_dwordx2 v[0:1], v[0:1] sc0 sc1
-; GFX940-NEXT:    s_endpgm
+; GFX942-LABEL: raw_ptr_buffer_atomic_max_rtn_f64:
+; GFX942:       ; %bb.0: ; %main_body
+; GFX942-NEXT:    buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 offen sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    flat_store_dwordx2 v[0:1], v[0:1] sc0 sc1
+; GFX942-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fmax.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0)
   store double %ret, ptr undef
@@ -825,20 +825,20 @@ define amdgpu_kernel void @raw_ptr_buffer_atomic_max_rtn_f64_off4_slc(ptr addrsp
 ; GFX90A-NEXT:    global_store_dwordx2 v2, v[0:1], s[8:9]
 ; GFX90A-NEXT:    s_endpgm
 ;
-; GFX940-LABEL: raw_ptr_buffer_atomic_max_rtn_f64_off4_slc:
-; GFX940:       ; %bb.0: ; %main_body
-; GFX940-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
-; GFX940-NEXT:    s_load_dword s10, s[4:5], 0x3c
-; GFX940-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX940-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x44
-; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b64_e32 v[0:1], s[6:7]
-; GFX940-NEXT:    v_mov_b32_e32 v2, s10
-; GFX940-NEXT:    buffer_atomic_max_f64 v[0:1], v2, s[0:3], 4 offen sc0 nt
-; GFX940-NEXT:    v_mov_b32_e32 v2, 0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    global_store_dwordx2 v2, v[0:1], s[8:9] sc0 sc1
-; GFX940-NEXT:    s_endpgm
+; GFX942-LABEL: raw_ptr_buffer_atomic_max_rtn_f64_off4_slc:
+; GFX942:       ; %bb.0: ; %main_body
+; GFX942-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
+; GFX942-NEXT:    s_load_dword s10, s[4:5], 0x3c
+; GFX942-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX942-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x44
+; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b64_e32 v[0:1], s[6:7]
+; GFX942-NEXT:    v_mov_b32_e32 v2, s10
+; GFX942-NEXT:    buffer_atomic_max_f64 v[0:1], v2, s[0:3], 4 offen sc0 nt
+; GFX942-NEXT:    v_mov_b32_e32 v2, 0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    global_store_dwordx2 v2, v[0:1], s[8:9] sc0 sc1
+; GFX942-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fmax.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 4, i32 2)
   store double %ret, ptr addrspace(1) %out, align 8
@@ -857,16 +857,16 @@ define amdgpu_kernel void @struct_buffer_atomic_max_noret_f64(<4 x i32> %rsrc, d
 ; GFX90A-NEXT:    buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 idxen
 ; GFX90A-NEXT:    s_endpgm
 ;
-; GFX940-LABEL: struct_buffer_atomic_max_noret_f64:
-; GFX940:       ; %bb.0: ; %main_body
-; GFX940-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
-; GFX940-NEXT:    s_load_dword s8, s[4:5], 0x3c
-; GFX940-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b64_e32 v[0:1], s[6:7]
-; GFX940-NEXT:    v_mov_b32_e32 v2, s8
-; GFX940-NEXT:    buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 idxen
-; GFX940-NEXT:    s_endpgm
+; GFX942-LABEL: struct_buffer_atomic_max_noret_f64:
+; GFX942:       ; %bb.0: ; %main_body
+; GFX942-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
+; GFX942-NEXT:    s_load_dword s8, s[4:5], 0x3c
+; GFX942-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b64_e32 v[0:1], s[6:7]
+; GFX942-NEXT:    v_mov_b32_e32 v2, s8
+; GFX942-NEXT:    buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 idxen
+; GFX942-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.struct.buffer.atomic.fmax.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 0)
   ret void
@@ -880,12 +880,12 @@ define amdgpu_ps void @struct_buffer_atomic_max_rtn_f64(<4 x i32> inreg %rsrc, d
 ; GFX90A-NEXT:    flat_store_dwordx2 v[0:1], v[0:1]
 ; GFX90A-NEXT:    s_endpgm
 ;
-; GFX940-LABEL: struct_buffer_atomic_max_rtn_f64:
-; GFX940:       ; %bb.0: ; %main_body
-; GFX940-NEXT:    buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 idxen sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    flat_store_dwordx2 v[0:1], v[0:1] sc0 sc1
-; GFX940-NEXT:    s_endpgm
+; GFX942-LABEL: struct_buffer_atomic_max_rtn_f64:
+; GFX942:       ; %bb.0: ; %main_body
+; GFX942-NEXT:    buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 idxen sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    flat_store_dwordx2 v[0:1], v[0:1] sc0 sc1
+; GFX942-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.struct.buffer.atomic.fmax.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 0)
   store double %ret, ptr undef
@@ -908,20 +908,20 @@ define amdgpu_kernel void @struct_buffer_atomic_max_rtn_f64_off4_slc(<4 x i32> %
 ; GFX90A-NEXT:    global_store_dwordx2 v2, v[0:1], s[8:9]
 ; GFX90A-NEXT:    s_endpgm
 ;
-; GFX940-LABEL: struct_buffer_atomic_max_rtn_f64_off4_slc:
-; GFX940:       ; %bb.0: ; %main_body
-; GFX940-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
-; GFX940-NEXT:    s_load_dword s10, s[4:5], 0x3c
-; GFX940-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX940-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x44
-; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b64_e32 v[0:1], s[6:7]
-; GFX940-NEXT:    v_mov_b32_e32 v2, s10
-; GFX940-NEXT:    buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 idxen offset:4 sc0 nt
-; GFX940-NEXT:    v_mov_b32_e32 v2, 0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    global_store_dwordx2 v2, v[0:1], s[8:9] sc0 sc1
-; GFX940-NEXT:    s_endpgm
+; GFX942-LABEL: struct_buffer_atomic_max_rtn_f64_off4_slc:
+; GFX942:       ; %bb.0: ; %main_body
+; GFX942-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
+; GFX942-NEXT:    s_load_dword s10, s[4:5], 0x3c
+; GFX942-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX942-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x44
+; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b64_e32 v[0:1], s[6:7]
+; GFX942-NEXT:    v_mov_b32_e32 v2, s10
+; GFX942-NEXT:    buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 idxen offset:4 sc0 nt
+; GFX942-NEXT:    v_mov_b32_e32 v2, 0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    global_store_dwordx2 v2, v[0:1], s[8:9] sc0 sc1
+; GFX942-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.struct.buffer.atomic.fmax.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i32 0, i32 2)
   store double %ret, ptr addrspace(1) %out, align 8
@@ -940,16 +940,16 @@ define amdgpu_kernel void @struct_ptr_buffer_atomic_max_noret_f64(ptr addrspace(
 ; GFX90A-NEXT:    buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 idxen
 ; GFX90A-NEXT:    s_endpgm
 ;
-; GFX940-LABEL: struct_ptr_buffer_atomic_max_noret_f64:
-; GFX940:       ; %bb.0: ; %main_body
-; GFX940-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
-; GFX940-NEXT:    s_load_dword s8, s[4:5], 0x3c
-; GFX940-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b64_e32 v[0:1], s[6:7]
-; GFX940-NEXT:    v_mov_b32_e32 v2, s8
-; GFX940-NEXT:    buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 idxen
-; GFX940-NEXT:    s_endpgm
+; GFX942-LABEL: struct_ptr_buffer_atomic_max_noret_f64:
+; GFX942:       ; %bb.0: ; %main_body
+; GFX942-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
+; GFX942-NEXT:    s_load_dword s8, s[4:5], 0x3c
+; GFX942-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b64_e32 v[0:1], s[6:7]
+; GFX942-NEXT:    v_mov_b32_e32 v2, s8
+; GFX942-NEXT:    buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 idxen
+; GFX942-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fmax.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0, i32 0)
   ret void
@@ -963,12 +963,12 @@ define amdgpu_ps void @struct_ptr_buffer_atomic_max_rtn_f64(ptr addrspace(8) inr
 ; GFX90A-NEXT:    flat_store_dwordx2 v[0:1], v[0:1]
 ; GFX90A-NEXT:    s_endpgm
 ;
-; GFX940-LABEL: struct_ptr_buffer_atomic_max_rtn_f64:
-; GFX940:       ; %bb.0: ; %main_body
-; GFX940-NEXT:    buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 idxen sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    flat_store_dwordx2 v[0:1], v[0:1] sc0 sc1
-; GFX940-NEXT:    s_endpgm
+; GFX942-LABEL: struct_ptr_buffer_atomic_max_rtn_f64:
+; GFX942:       ; %bb.0: ; %main_body
+; GFX942-NEXT:    buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 idxen sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    flat_store_dwordx2 v[0:1], v[0:1] sc0 sc1
+; GFX942-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fmax.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0, i32 0)
   store double %ret, ptr undef
@@ -991,20 +991,20 @@ define amdgpu_kernel void @struct_ptr_buffer_atomic_max_rtn_f64_off4_slc(ptr add
 ; GFX90A-NEXT:    global_store_dwordx2 v2, v[0:1], s[8:9]
 ; GFX90A-NEXT:    s_endpgm
 ;
-; GFX940-LABEL: struct_ptr_buffer_atomic_max_rtn_f64_off4_slc:
-; GFX940:       ; %bb.0: ; %main_body
-; GFX940-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
-; GFX940-NEXT:    s_load_dword s10, s[4:5], 0x3c
-; GFX940-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX940-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x44
-; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b64_e32 v[0:1], s[6:7]
-; GFX940-NEXT:    v_mov_b32_e32 v2, s10
-; GFX940-NEXT:    buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 idxen offset:4 sc0 nt
-; GFX940-NEXT:    v_mov_b32_e32 v2, 0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    global_store_dwordx2 v2, v[0:1], s[8:9] sc0 sc1
-; GFX940-NEXT:    s_endpgm
+; GFX942-LABEL: struct_ptr_buffer_atomic_max_rtn_f64_off4_slc:
+; GFX942:       ; %bb.0: ; %main_body
+; GFX942-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
+; GFX942-NEXT:    s_load_dword s10, s[4:5], 0x3c
+; GFX942-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX942-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x44
+; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b64_e32 v[0:1], s[6:7]
+; GFX942-NEXT:    v_mov_b32_e32 v2, s10
+; GFX942-NEXT:    buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 idxen offset:4 sc0 nt
+; GFX942-NEXT:    v_mov_b32_e32 v2, 0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    global_store_dwordx2 v2, v[0:1], s[8:9] sc0 sc1
+; GFX942-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fmax.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 4, i32 0, i32 2)
   store double %ret, ptr addrspace(1) %out, align 8
@@ -1035,27 +1035,27 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat(ptr addrspace(1) %pt
 ; GFX90A-NEXT:  .LBB36_2:
 ; GFX90A-NEXT:    s_endpgm
 ;
-; GFX940-LABEL: global_atomic_fadd_f64_noret_pat:
-; GFX940:       ; %bb.0: ; %main_body
-; GFX940-NEXT:    s_mov_b64 s[0:1], exec
-; GFX940-NEXT:    v_mbcnt_lo_u32_b32 v0, s0, 0
-; GFX940-NEXT:    v_mbcnt_hi_u32_b32 v0, s1, v0
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
-; GFX940-NEXT:    s_and_saveexec_b64 s[2:3], vcc
-; GFX940-NEXT:    s_cbranch_execz .LBB36_2
-; GFX940-NEXT:  ; %bb.1:
-; GFX940-NEXT:    s_load_dwordx2 s[2:3], s[4:5], 0x24
-; GFX940-NEXT:    s_bcnt1_i32_b64 s0, s[0:1]
-; GFX940-NEXT:    v_cvt_f64_u32_e32 v[0:1], s0
-; GFX940-NEXT:    v_mul_f64 v[0:1], v[0:1], 4.0
-; GFX940-NEXT:    v_mov_b32_e32 v2, 0
-; GFX940-NEXT:    buffer_wbl2 sc0 sc1
-; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-NEXT:    global_atomic_add_f64 v2, v[0:1], s[2:3] sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc0 sc1
-; GFX940-NEXT:  .LBB36_2:
-; GFX940-NEXT:    s_endpgm
+; GFX942-LABEL: global_atomic_fadd_f64_noret_pat:
+; GFX942:       ; %bb.0: ; %main_body
+; GFX942-NEXT:    s_mov_b64 s[0:1], exec
+; GFX942-NEXT:    v_mbcnt_lo_u32_b32 v0, s0, 0
+; GFX942-NEXT:    v_mbcnt_hi_u32_b32 v0, s1, v0
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
+; GFX942-NEXT:    s_and_saveexec_b64 s[2:3], vcc
+; GFX942-NEXT:    s_cbranch_execz .LBB36_2
+; GFX942-NEXT:  ; %bb.1:
+; GFX942-NEXT:    s_load_dwordx2 s[2:3], s[4:5], 0x24
+; GFX942-NEXT:    s_bcnt1_i32_b64 s0, s[0:1]
+; GFX942-NEXT:    v_cvt_f64_u32_e32 v[0:1], s0
+; GFX942-NEXT:    v_mul_f64 v[0:1], v[0:1], 4.0
+; GFX942-NEXT:    v_mov_b32_e32 v2, 0
+; GFX942-NEXT:    buffer_wbl2 sc0 sc1
+; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-NEXT:    global_atomic_add_f64 v2, v[0:1], s[2:3] sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc0 sc1
+; GFX942-NEXT:  .LBB36_2:
+; GFX942-NEXT:    s_endpgm
 main_body:
   %ret = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 seq_cst, !amdgpu.no.fine.grained.memory !0
   ret void
@@ -1083,27 +1083,27 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_agent(ptr addrspace(
 ; GFX90A-NEXT:  .LBB37_2:
 ; GFX90A-NEXT:    s_endpgm
 ;
-; GFX940-LABEL: global_atomic_fadd_f64_noret_pat_agent:
-; GFX940:       ; %bb.0: ; %main_body
-; GFX940-NEXT:    s_mov_b64 s[0:1], exec
-; GFX940-NEXT:    v_mbcnt_lo_u32_b32 v0, s0, 0
-; GFX940-NEXT:    v_mbcnt_hi_u32_b32 v0, s1, v0
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
-; GFX940-NEXT:    s_and_saveexec_b64 s[2:3], vcc
-; GFX940-NEXT:    s_cbranch_execz .LBB37_2
-; GFX940-NEXT:  ; %bb.1:
-; GFX940-NEXT:    s_load_dwordx2 s[2:3], s[4:5], 0x24
-; GFX940-NEXT:    s_bcnt1_i32_b64 s0, s[0:1]
-; GFX940-NEXT:    v_cvt_f64_u32_e32 v[0:1], s0
-; GFX940-NEXT:    v_mul_f64 v[0:1], v[0:1], 4.0
-; GFX940-NEXT:    v_mov_b32_e32 v2, 0
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-NEXT:    global_atomic_add_f64 v2, v[0:1], s[2:3]
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:  .LBB37_2:
-; GFX940-NEXT:    s_endpgm
+; GFX942-LABEL: global_atomic_fadd_f64_noret_pat_agent:
+; GFX942:       ; %bb.0: ; %main_body
+; GFX942-NEXT:    s_mov_b64 s[0:1], exec
+; GFX942-NEXT:    v_mbcnt_lo_u32_b32 v0, s0, 0
+; GFX942-NEXT:    v_mbcnt_hi_u32_b32 v0, s1, v0
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
+; GFX942-NEXT:    s_and_saveexec_b64 s[2:3], vcc
+; GFX942-NEXT:    s_cbranch_execz .LBB37_2
+; GFX942-NEXT:  ; %bb.1:
+; GFX942-NEXT:    s_load_dwordx2 s[2:3], s[4:5], 0x24
+; GFX942-NEXT:    s_bcnt1_i32_b64 s0, s[0:1]
+; GFX942-NEXT:    v_cvt_f64_u32_e32 v[0:1], s0
+; GFX942-NEXT:    v_mul_f64 v[0:1], v[0:1], 4.0
+; GFX942-NEXT:    v_mov_b32_e32 v2, 0
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-NEXT:    global_atomic_add_f64 v2, v[0:1], s[2:3]
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:  .LBB37_2:
+; GFX942-NEXT:    s_endpgm
 main_body:
   %ret = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
   ret void
@@ -1133,27 +1133,27 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_system(ptr addrspace
 ; GFX90A-NEXT:  .LBB38_2:
 ; GFX90A-NEXT:    s_endpgm
 ;
-; GFX940-LABEL: global_atomic_fadd_f64_noret_pat_system:
-; GFX940:       ; %bb.0: ; %main_body
-; GFX940-NEXT:    s_mov_b64 s[0:1], exec
-; GFX940-NEXT:    v_mbcnt_lo_u32_b32 v0, s0, 0
-; GFX940-NEXT:    v_mbcnt_hi_u32_b32 v0, s1, v0
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
-; GFX940-NEXT:    s_and_saveexec_b64 s[2:3], vcc
-; GFX940-NEXT:    s_cbranch_execz .LBB38_2
-; GFX940-NEXT:  ; %bb.1:
-; GFX940-NEXT:    s_load_dwordx2 s[2:3], s[4:5], 0x24
-; GFX940-NEXT:    s_bcnt1_i32_b64 s0, s[0:1]
-; GFX940-NEXT:    v_cvt_f64_u32_e32 v[0:1], s0
-; GFX940-NEXT:    v_mul_f64 v[0:1], v[0:1], 4.0
-; GFX940-NEXT:    v_mov_b32_e32 v2, 0
-; GFX940-NEXT:    buffer_wbl2 sc0 sc1
-; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-NEXT:    global_atomic_add_f64 v2, v[0:1], s[2:3] sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc0 sc1
-; GFX940-NEXT:  .LBB38_2:
-; GFX940-NEXT:    s_endpgm
+; GFX942-LABEL: global_atomic_fadd_f64_noret_pat_system:
+; GFX942:       ; %bb.0: ; %main_body
+; GFX942-NEXT:    s_mov_b64 s[0:1], exec
+; GFX942-NEXT:    v_mbcnt_lo_u32_b32 v0, s0, 0
+; GFX942-NEXT:    v_mbcnt_hi_u32_b32 v0, s1, v0
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
+; GFX942-NEXT:    s_and_saveexec_b64 s[2:3], vcc
+; GFX942-NEXT:    s_cbranch_execz .LBB38_2
+; GFX942-NEXT:  ; %bb.1:
+; GFX942-NEXT:    s_load_dwordx2 s[2:3], s[4:5], 0x24
+; GFX942-NEXT:    s_bcnt1_i32_b64 s0, s[0:1]
+; GFX942-NEXT:    v_cvt_f64_u32_e32 v[0:1], s0
+; GFX942-NEXT:    v_mul_f64 v[0:1], v[0:1], 4.0
+; GFX942-NEXT:    v_mov_b32_e32 v2, 0
+; GFX942-NEXT:    buffer_wbl2 sc0 sc1
+; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-NEXT:    global_atomic_add_f64 v2, v[0:1], s[2:3] sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc0 sc1
+; GFX942-NEXT:  .LBB38_2:
+; GFX942-NEXT:    s_endpgm
 main_body:
   %ret = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 syncscope("one-as") seq_cst, !amdgpu.no.fine.grained.memory !0
   ret void
@@ -1181,27 +1181,27 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_flush(ptr addrspace(
 ; GFX90A-NEXT:  .LBB39_2:
 ; GFX90A-NEXT:    s_endpgm
 ;
-; GFX940-LABEL: global_atomic_fadd_f64_noret_pat_flush:
-; GFX940:       ; %bb.0: ; %main_body
-; GFX940-NEXT:    s_mov_b64 s[0:1], exec
-; GFX940-NEXT:    v_mbcnt_lo_u32_b32 v0, s0, 0
-; GFX940-NEXT:    v_mbcnt_hi_u32_b32 v0, s1, v0
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
-; GFX940-NEXT:    s_and_saveexec_b64 s[2:3], vcc
-; GFX940-NEXT:    s_cbranch_execz .LBB39_2
-; GFX940-NEXT:  ; %bb.1:
-; GFX940-NEXT:    s_load_dwordx2 s[2:3], s[4:5], 0x24
-; GFX940-NEXT:    s_bcnt1_i32_b64 s0, s[0:1]
-; GFX940-NEXT:    v_cvt_f64_u32_e32 v[0:1], s0
-; GFX940-NEXT:    v_mul_f64 v[0:1], v[0:1], 4.0
-; GFX940-NEXT:    v_mov_b32_e32 v2, 0
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-NEXT:    global_atomic_add_f64 v2, v[0:1], s[2:3]
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:  .LBB39_2:
-; GFX940-NEXT:    s_endpgm
+; GFX942-LABEL: global_atomic_fadd_f64_noret_pat_flush:
+; GFX942:       ; %bb.0: ; %main_body
+; GFX942-NEXT:    s_mov_b64 s[0:1], exec
+; GFX942-NEXT:    v_mbcnt_lo_u32_b32 v0, s0, 0
+; GFX942-NEXT:    v_mbcnt_hi_u32_b32 v0, s1, v0
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
+; GFX942-NEXT:    s_and_saveexec_b64 s[2:3], vcc
+; GFX942-NEXT:    s_cbranch_execz .LBB39_2
+; GFX942-NEXT:  ; %bb.1:
+; GFX942-NEXT:    s_load_dwordx2 s[2:3], s[4:5], 0x24
+; GFX942-NEXT:    s_bcnt1_i32_b64 s0, s[0:1]
+; GFX942-NEXT:    v_cvt_f64_u32_e32 v[0:1], s0
+; GFX942-NEXT:    v_mul_f64 v[0:1], v[0:1], 4.0
+; GFX942-NEXT:    v_mov_b32_e32 v2, 0
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-NEXT:    global_atomic_add_f64 v2, v[0:1], s[2:3]
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:  .LBB39_2:
+; GFX942-NEXT:    s_endpgm
 main_body:
   %ret = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
   ret void
@@ -1220,15 +1220,15 @@ define double @global_atomic_fadd_f64_rtn_pat(ptr addrspace(1) %ptr, double %dat
 ; GFX90A-NEXT:    buffer_wbinvl1_vol
 ; GFX90A-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: global_atomic_fadd_f64_rtn_pat:
-; GFX940:       ; %bb.0: ; %main_body
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b64_e32 v[2:3], 4.0
-; GFX940-NEXT:    buffer_wbl2 sc0 sc1
-; GFX940-NEXT:    global_atomic_add_f64 v[0:1], v[0:1], v[2:3], off sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc0 sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: global_atomic_fadd_f64_rtn_pat:
+; GFX942:       ; %bb.0: ; %main_body
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b64_e32 v[2:3], 4.0
+; GFX942-NEXT:    buffer_wbl2 sc0 sc1
+; GFX942-NEXT:    global_atomic_add_f64 v[0:1], v[0:1], v[2:3], off sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc0 sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 main_body:
   %ret = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 seq_cst, !amdgpu.no.fine.grained.memory !0
   ret double %ret
@@ -1245,15 +1245,15 @@ define double @global_atomic_fadd_f64_rtn_pat_agent(ptr addrspace(1) %ptr, doubl
 ; GFX90A-NEXT:    buffer_wbinvl1_vol
 ; GFX90A-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: global_atomic_fadd_f64_rtn_pat_agent:
-; GFX940:       ; %bb.0: ; %main_body
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b64_e32 v[2:3], 4.0
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    global_atomic_add_f64 v[0:1], v[0:1], v[2:3], off sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: global_atomic_fadd_f64_rtn_pat_agent:
+; GFX942:       ; %bb.0: ; %main_body
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b64_e32 v[2:3], 4.0
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    global_atomic_add_f64 v[0:1], v[0:1], v[2:3], off sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 main_body:
   %ret = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
   ret double %ret
@@ -1272,15 +1272,15 @@ define double @global_atomic_fadd_f64_rtn_pat_system(ptr addrspace(1) %ptr, doub
 ; GFX90A-NEXT:    buffer_wbinvl1_vol
 ; GFX90A-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: global_atomic_fadd_f64_rtn_pat_system:
-; GFX940:       ; %bb.0: ; %main_body
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b64_e32 v[2:3], 4.0
-; GFX940-NEXT:    buffer_wbl2 sc0 sc1
-; GFX940-NEXT:    global_atomic_add_f64 v[0:1], v[0:1], v[2:3], off sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc0 sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: global_atomic_fadd_f64_rtn_pat_system:
+; GFX942:       ; %bb.0: ; %main_body
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b64_e32 v[2:3], 4.0
+; GFX942-NEXT:    buffer_wbl2 sc0 sc1
+; GFX942-NEXT:    global_atomic_add_f64 v[0:1], v[0:1], v[2:3], off sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc0 sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 main_body:
   %ret = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 syncscope("one-as") seq_cst, !amdgpu.no.fine.grained.memory !0
   ret double %ret
@@ -1308,27 +1308,27 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_agent_safe(ptr addrs
 ; GFX90A-NEXT:  .LBB43_2:
 ; GFX90A-NEXT:    s_endpgm
 ;
-; GFX940-LABEL: global_atomic_fadd_f64_noret_pat_agent_safe:
-; GFX940:       ; %bb.0: ; %main_body
-; GFX940-NEXT:    s_mov_b64 s[0:1], exec
-; GFX940-NEXT:    v_mbcnt_lo_u32_b32 v0, s0, 0
-; GFX940-NEXT:    v_mbcnt_hi_u32_b32 v0, s1, v0
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
-; GFX940-NEXT:    s_and_saveexec_b64 s[2:3], vcc
-; GFX940-NEXT:    s_cbranch_execz .LBB43_2
-; GFX940-NEXT:  ; %bb.1:
-; GFX940-NEXT:    s_load_dwordx2 s[2:3], s[4:5], 0x24
-; GFX940-NEXT:    s_bcnt1_i32_b64 s0, s[0:1]
-; GFX940-NEXT:    v_cvt_f64_u32_e32 v[0:1], s0
-; GFX940-NEXT:    v_mul_f64 v[0:1], v[0:1], 4.0
-; GFX940-NEXT:    v_mov_b32_e32 v2, 0
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-NEXT:    global_atomic_add_f64 v2, v[0:1], s[2:3]
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:  .LBB43_2:
-; GFX940-NEXT:    s_endpgm
+; GFX942-LABEL: global_atomic_fadd_f64_noret_pat_agent_safe:
+; GFX942:       ; %bb.0: ; %main_body
+; GFX942-NEXT:    s_mov_b64 s[0:1], exec
+; GFX942-NEXT:    v_mbcnt_lo_u32_b32 v0, s0, 0
+; GFX942-NEXT:    v_mbcnt_hi_u32_b32 v0, s1, v0
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
+; GFX942-NEXT:    s_and_saveexec_b64 s[2:3], vcc
+; GFX942-NEXT:    s_cbranch_execz .LBB43_2
+; GFX942-NEXT:  ; %bb.1:
+; GFX942-NEXT:    s_load_dwordx2 s[2:3], s[4:5], 0x24
+; GFX942-NEXT:    s_bcnt1_i32_b64 s0, s[0:1]
+; GFX942-NEXT:    v_cvt_f64_u32_e32 v[0:1], s0
+; GFX942-NEXT:    v_mul_f64 v[0:1], v[0:1], 4.0
+; GFX942-NEXT:    v_mov_b32_e32 v2, 0
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-NEXT:    global_atomic_add_f64 v2, v[0:1], s[2:3]
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:  .LBB43_2:
+; GFX942-NEXT:    s_endpgm
 main_body:
   %ret = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
   ret void
@@ -1349,17 +1349,17 @@ define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat(ptr %ptr) #1 {
 ; GFX90A-NEXT:    buffer_wbinvl1_vol
 ; GFX90A-NEXT:    s_endpgm
 ;
-; GFX940-LABEL: flat_atomic_fadd_f64_noret_pat:
-; GFX940:       ; %bb.0: ; %main_body
-; GFX940-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX940-NEXT:    v_mov_b64_e32 v[2:3], 4.0
-; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b64_e32 v[0:1], s[0:1]
-; GFX940-NEXT:    buffer_wbl2 sc0 sc1
-; GFX940-NEXT:    flat_atomic_add_f64 v[0:1], v[2:3] sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc0 sc1
-; GFX940-NEXT:    s_endpgm
+; GFX942-LABEL: flat_atomic_fadd_f64_noret_pat:
+; GFX942:       ; %bb.0: ; %main_body
+; GFX942-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX942-NEXT:    v_mov_b64_e32 v[2:3], 4.0
+; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b64_e32 v[0:1], s[0:1]
+; GFX942-NEXT:    buffer_wbl2 sc0 sc1
+; GFX942-NEXT:    flat_atomic_add_f64 v[0:1], v[2:3] sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc0 sc1
+; GFX942-NEXT:    s_endpgm
 main_body:
   %ret = atomicrmw fadd ptr %ptr, double 4.0 seq_cst, !noalias.addrspace !1, !amdgpu.no.fine.grained.memory !0
   ret void
@@ -1378,17 +1378,17 @@ define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_agent(ptr %ptr) #1 {
 ; GFX90A-NEXT:    buffer_wbinvl1_vol
 ; GFX90A-NEXT:    s_endpgm
 ;
-; GFX940-LABEL: flat_atomic_fadd_f64_noret_pat_agent:
-; GFX940:       ; %bb.0: ; %main_body
-; GFX940-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX940-NEXT:    v_mov_b64_e32 v[2:3], 4.0
-; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b64_e32 v[0:1], s[0:1]
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_add_f64 v[0:1], v[2:3]
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_endpgm
+; GFX942-LABEL: flat_atomic_fadd_f64_noret_pat_agent:
+; GFX942:       ; %bb.0: ; %main_body
+; GFX942-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX942-NEXT:    v_mov_b64_e32 v[2:3], 4.0
+; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b64_e32 v[0:1], s[0:1]
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_add_f64 v[0:1], v[2:3]
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_endpgm
 main_body:
   %ret = atomicrmw fadd ptr %ptr, double 4.0 syncscope("agent") seq_cst, !noalias.addrspace !1, !amdgpu.no.fine.grained.memory !0
   ret void
@@ -1409,17 +1409,17 @@ define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_system(ptr %ptr) #1 {
 ; GFX90A-NEXT:    buffer_wbinvl1_vol
 ; GFX90A-NEXT:    s_endpgm
 ;
-; GFX940-LABEL: flat_atomic_fadd_f64_noret_pat_system:
-; GFX940:       ; %bb.0: ; %main_body
-; GFX940-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX940-NEXT:    v_mov_b64_e32 v[2:3], 4.0
-; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b64_e32 v[0:1], s[0:1]
-; GFX940-NEXT:    buffer_wbl2 sc0 sc1
-; GFX940-NEXT:    flat_atomic_add_f64 v[0:1], v[2:3] sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc0 sc1
-; GFX940-NEXT:    s_endpgm
+; GFX942-LABEL: flat_atomic_fadd_f64_noret_pat_system:
+; GFX942:       ; %bb.0: ; %main_body
+; GFX942-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX942-NEXT:    v_mov_b64_e32 v[2:3], 4.0
+; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b64_e32 v[0:1], s[0:1]
+; GFX942-NEXT:    buffer_wbl2 sc0 sc1
+; GFX942-NEXT:    flat_atomic_add_f64 v[0:1], v[2:3] sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc0 sc1
+; GFX942-NEXT:    s_endpgm
 main_body:
   %ret = atomicrmw fadd ptr %ptr, double 4.0 syncscope("one-as") seq_cst, !noalias.addrspace !1, !amdgpu.no.fine.grained.memory !0
   ret void
@@ -1438,15 +1438,15 @@ define double @flat_atomic_fadd_f64_rtn_pat(ptr %ptr) #1 {
 ; GFX90A-NEXT:    buffer_wbinvl1_vol
 ; GFX90A-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_atomic_fadd_f64_rtn_pat:
-; GFX940:       ; %bb.0: ; %main_body
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b64_e32 v[2:3], 4.0
-; GFX940-NEXT:    buffer_wbl2 sc0 sc1
-; GFX940-NEXT:    flat_atomic_add_f64 v[0:1], v[0:1], v[2:3] sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc0 sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_atomic_fadd_f64_rtn_pat:
+; GFX942:       ; %bb.0: ; %main_body
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b64_e32 v[2:3], 4.0
+; GFX942-NEXT:    buffer_wbl2 sc0 sc1
+; GFX942-NEXT:    flat_atomic_add_f64 v[0:1], v[0:1], v[2:3] sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc0 sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 main_body:
   %ret = atomicrmw fadd ptr %ptr, double 4.0 seq_cst, !noalias.addrspace !1, !amdgpu.no.fine.grained.memory !0
   ret double %ret
@@ -1463,15 +1463,15 @@ define double @flat_atomic_fadd_f64_rtn_pat_agent(ptr %ptr) #1 {
 ; GFX90A-NEXT:    buffer_wbinvl1_vol
 ; GFX90A-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_atomic_fadd_f64_rtn_pat_agent:
-; GFX940:       ; %bb.0: ; %main_body
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b64_e32 v[2:3], 4.0
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_add_f64 v[0:1], v[0:1], v[2:3] sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_atomic_fadd_f64_rtn_pat_agent:
+; GFX942:       ; %bb.0: ; %main_body
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b64_e32 v[2:3], 4.0
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_add_f64 v[0:1], v[0:1], v[2:3] sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 main_body:
   %ret = atomicrmw fadd ptr %ptr, double 4.0 syncscope("agent") seq_cst, !noalias.addrspace !1, !amdgpu.no.fine.grained.memory !0
   ret double %ret
@@ -1491,16 +1491,16 @@ define double @flat_atomic_fadd_f64_rtn_pat_system(ptr %ptr) #1 {
 ; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX90A-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_atomic_fadd_f64_rtn_pat_system:
-; GFX940:       ; %bb.0: ; %main_body
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b64_e32 v[2:3], 4.0
-; GFX940-NEXT:    buffer_wbl2 sc0 sc1
-; GFX940-NEXT:    flat_atomic_add_f64 v[0:1], v[0:1], v[2:3] sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc0 sc1
-; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_atomic_fadd_f64_rtn_pat_system:
+; GFX942:       ; %bb.0: ; %main_body
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b64_e32 v[2:3], 4.0
+; GFX942-NEXT:    buffer_wbl2 sc0 sc1
+; GFX942-NEXT:    flat_atomic_add_f64 v[0:1], v[0:1], v[2:3] sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc0 sc1
+; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 main_body:
   %ret = atomicrmw fadd ptr %ptr, double 4.0 syncscope("one-as") seq_cst, !noalias.addrspace !1, !amdgpu.no.fine.grained.memory !0
   ret double %ret
@@ -1519,17 +1519,17 @@ define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_agent_safe(ptr %ptr) {
 ; GFX90A-NEXT:    buffer_wbinvl1_vol
 ; GFX90A-NEXT:    s_endpgm
 ;
-; GFX940-LABEL: flat_atomic_fadd_f64_noret_pat_agent_safe:
-; GFX940:       ; %bb.0: ; %main_body
-; GFX940-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX940-NEXT:    v_mov_b64_e32 v[2:3], 4.0
-; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b64_e32 v[0:1], s[0:1]
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_add_f64 v[0:1], v[2:3]
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_endpgm
+; GFX942-LABEL: flat_atomic_fadd_f64_noret_pat_agent_safe:
+; GFX942:       ; %bb.0: ; %main_body
+; GFX942-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX942-NEXT:    v_mov_b64_e32 v[2:3], 4.0
+; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b64_e32 v[0:1], s[0:1]
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_add_f64 v[0:1], v[2:3]
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_endpgm
 main_body:
   %ret = atomicrmw fadd ptr %ptr, double 4.0 syncscope("agent") seq_cst, !noalias.addrspace !1, !amdgpu.no.fine.grained.memory !0
   ret void
@@ -1556,25 +1556,25 @@ define amdgpu_kernel void @local_atomic_fadd_f64_noret_pat(ptr addrspace(3) %ptr
 ; GFX90A-NEXT:  .LBB51_2:
 ; GFX90A-NEXT:    s_endpgm
 ;
-; GFX940-LABEL: local_atomic_fadd_f64_noret_pat:
-; GFX940:       ; %bb.0: ; %main_body
-; GFX940-NEXT:    s_mov_b64 s[0:1], exec
-; GFX940-NEXT:    v_mbcnt_lo_u32_b32 v0, s0, 0
-; GFX940-NEXT:    v_mbcnt_hi_u32_b32 v0, s1, v0
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
-; GFX940-NEXT:    s_and_saveexec_b64 s[2:3], vcc
-; GFX940-NEXT:    s_cbranch_execz .LBB51_2
-; GFX940-NEXT:  ; %bb.1:
-; GFX940-NEXT:    s_load_dword s2, s[4:5], 0x24
-; GFX940-NEXT:    s_bcnt1_i32_b64 s0, s[0:1]
-; GFX940-NEXT:    v_cvt_f64_u32_e32 v[0:1], s0
-; GFX940-NEXT:    v_mul_f64 v[0:1], v[0:1], 4.0
-; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v2, s2
-; GFX940-NEXT:    ds_add_f64 v2, v[0:1]
-; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-NEXT:  .LBB51_2:
-; GFX940-NEXT:    s_endpgm
+; GFX942-LABEL: local_atomic_fadd_f64_noret_pat:
+; GFX942:       ; %bb.0: ; %main_body
+; GFX942-NEXT:    s_mov_b64 s[0:1], exec
+; GFX942-NEXT:    v_mbcnt_lo_u32_b32 v0, s0, 0
+; GFX942-NEXT:    v_mbcnt_hi_u32_b32 v0, s1, v0
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
+; GFX942-NEXT:    s_and_saveexec_b64 s[2:3], vcc
+; GFX942-NEXT:    s_cbranch_execz .LBB51_2
+; GFX942-NEXT:  ; %bb.1:
+; GFX942-NEXT:    s_load_dword s2, s[4:5], 0x24
+; GFX942-NEXT:    s_bcnt1_i32_b64 s0, s[0:1]
+; GFX942-NEXT:    v_cvt_f64_u32_e32 v[0:1], s0
+; GFX942-NEXT:    v_mul_f64 v[0:1], v[0:1], 4.0
+; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v2, s2
+; GFX942-NEXT:    ds_add_f64 v2, v[0:1]
+; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-NEXT:  .LBB51_2:
+; GFX942-NEXT:    s_endpgm
 main_body:
   %ret = atomicrmw fadd ptr addrspace(3) %ptr, double 4.0 seq_cst, !amdgpu.no.fine.grained.memory !0
   ret void
@@ -1601,25 +1601,25 @@ define amdgpu_kernel void @local_atomic_fadd_f64_noret_pat_flush(ptr addrspace(3
 ; GFX90A-NEXT:  .LBB52_2:
 ; GFX90A-NEXT:    s_endpgm
 ;
-; GFX940-LABEL: local_atomic_fadd_f64_noret_pat_flush:
-; GFX940:       ; %bb.0: ; %main_body
-; GFX940-NEXT:    s_mov_b64 s[0:1], exec
-; GFX940-NEXT:    v_mbcnt_lo_u32_b32 v0, s0, 0
-; GFX940-NEXT:    v_mbcnt_hi_u32_b32 v0, s1, v0
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
-; GFX940-NEXT:    s_and_saveexec_b64 s[2:3], vcc
-; GFX940-NEXT:    s_cbranch_execz .LBB52_2
-; GFX940-NEXT:  ; %bb.1:
-; GFX940-NEXT:    s_load_dword s2, s[4:5], 0x24
-; GFX940-NEXT:    s_bcnt1_i32_b64 s0, s[0:1]
-; GFX940-NEXT:    v_cvt_f64_u32_e32 v[0:1], s0
-; GFX940-NEXT:    v_mul_f64 v[0:1], v[0:1], 4.0
-; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v2, s2
-; GFX940-NEXT:    ds_add_f64 v2, v[0:1]
-; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-NEXT:  .LBB52_2:
-; GFX940-NEXT:    s_endpgm
+; GFX942-LABEL: local_atomic_fadd_f64_noret_pat_flush:
+; GFX942:       ; %bb.0: ; %main_body
+; GFX942-NEXT:    s_mov_b64 s[0:1], exec
+; GFX942-NEXT:    v_mbcnt_lo_u32_b32 v0, s0, 0
+; GFX942-NEXT:    v_mbcnt_hi_u32_b32 v0, s1, v0
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
+; GFX942-NEXT:    s_and_saveexec_b64 s[2:3], vcc
+; GFX942-NEXT:    s_cbranch_execz .LBB52_2
+; GFX942-NEXT:  ; %bb.1:
+; GFX942-NEXT:    s_load_dword s2, s[4:5], 0x24
+; GFX942-NEXT:    s_bcnt1_i32_b64 s0, s[0:1]
+; GFX942-NEXT:    v_cvt_f64_u32_e32 v[0:1], s0
+; GFX942-NEXT:    v_mul_f64 v[0:1], v[0:1], 4.0
+; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v2, s2
+; GFX942-NEXT:    ds_add_f64 v2, v[0:1]
+; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-NEXT:  .LBB52_2:
+; GFX942-NEXT:    s_endpgm
 main_body:
   %ret = atomicrmw fadd ptr addrspace(3) %ptr, double 4.0 seq_cst, !amdgpu.no.fine.grained.memory !0
   ret void
@@ -1646,25 +1646,25 @@ define amdgpu_kernel void @local_atomic_fadd_f64_noret_pat_flush_safe(ptr addrsp
 ; GFX90A-NEXT:  .LBB53_2:
 ; GFX90A-NEXT:    s_endpgm
 ;
-; GFX940-LABEL: local_atomic_fadd_f64_noret_pat_flush_safe:
-; GFX940:       ; %bb.0: ; %main_body
-; GFX940-NEXT:    s_mov_b64 s[0:1], exec
-; GFX940-NEXT:    v_mbcnt_lo_u32_b32 v0, s0, 0
-; GFX940-NEXT:    v_mbcnt_hi_u32_b32 v0, s1, v0
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
-; GFX940-NEXT:    s_and_saveexec_b64 s[2:3], vcc
-; GFX940-NEXT:    s_cbranch_execz .LBB53_2
-; GFX940-NEXT:  ; %bb.1:
-; GFX940-NEXT:    s_load_dword s2, s[4:5], 0x24
-; GFX940-NEXT:    s_bcnt1_i32_b64 s0, s[0:1]
-; GFX940-NEXT:    v_cvt_f64_u32_e32 v[0:1], s0
-; GFX940-NEXT:    v_mul_f64 v[0:1], v[0:1], 4.0
-; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v2, s2
-; GFX940-NEXT:    ds_add_f64 v2, v[0:1]
-; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-NEXT:  .LBB53_2:
-; GFX940-NEXT:    s_endpgm
+; GFX942-LABEL: local_atomic_fadd_f64_noret_pat_flush_safe:
+; GFX942:       ; %bb.0: ; %main_body
+; GFX942-NEXT:    s_mov_b64 s[0:1], exec
+; GFX942-NEXT:    v_mbcnt_lo_u32_b32 v0, s0, 0
+; GFX942-NEXT:    v_mbcnt_hi_u32_b32 v0, s1, v0
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
+; GFX942-NEXT:    s_and_saveexec_b64 s[2:3], vcc
+; GFX942-NEXT:    s_cbranch_execz .LBB53_2
+; GFX942-NEXT:  ; %bb.1:
+; GFX942-NEXT:    s_load_dword s2, s[4:5], 0x24
+; GFX942-NEXT:    s_bcnt1_i32_b64 s0, s[0:1]
+; GFX942-NEXT:    v_cvt_f64_u32_e32 v[0:1], s0
+; GFX942-NEXT:    v_mul_f64 v[0:1], v[0:1], 4.0
+; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v2, s2
+; GFX942-NEXT:    ds_add_f64 v2, v[0:1]
+; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-NEXT:  .LBB53_2:
+; GFX942-NEXT:    s_endpgm
 main_body:
   %ret = atomicrmw fadd ptr addrspace(3) %ptr, double 4.0 seq_cst, !amdgpu.no.fine.grained.memory !0
   ret void
@@ -1680,13 +1680,13 @@ define double @local_atomic_fadd_f64_rtn_pat(ptr addrspace(3) %ptr, double %data
 ; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX90A-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: local_atomic_fadd_f64_rtn_pat:
-; GFX940:       ; %bb.0: ; %main_body
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b64_e32 v[2:3], 4.0
-; GFX940-NEXT:    ds_add_rtn_f64 v[0:1], v0, v[2:3]
-; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: local_atomic_fadd_f64_rtn_pat:
+; GFX942:       ; %bb.0: ; %main_body
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b64_e32 v[2:3], 4.0
+; GFX942-NEXT:    ds_add_rtn_f64 v[0:1], v0, v[2:3]
+; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 main_body:
   %ret = atomicrmw fadd ptr addrspace(3) %ptr, double 4.0 seq_cst
   ret double %ret
diff --git llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-no-rtn.ll llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-no-rtn.ll
index 6459110dd8bb..dbfb4e6ebe7e 100644
--- llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-no-rtn.ll
+++ llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-no-rtn.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
 ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx908 -amdgpu-atomic-optimizer-strategy=DPP -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX908 %s
 ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx90a -amdgpu-atomic-optimizer-strategy=DPP -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A %s
-; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx940 -amdgpu-atomic-optimizer-strategy=DPP -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX940 %s
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx942 -amdgpu-atomic-optimizer-strategy=DPP -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX942 %s
 ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-atomic-optimizer-strategy=DPP -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX11 %s
 ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1200 -amdgpu-atomic-optimizer-strategy=DPP -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX11 %s
 
@@ -28,16 +28,16 @@ define amdgpu_ps void @global_atomic_fadd_f32_no_rtn_atomicrmw(ptr addrspace(1)
   ; GFX90A-NEXT:   GLOBAL_ATOMIC_ADD_F32 [[REG_SEQUENCE]], [[COPY2]], 0, 0, implicit $exec :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr, addrspace 1)
   ; GFX90A-NEXT:   S_ENDPGM 0
   ;
-  ; GFX940-LABEL: name: global_atomic_fadd_f32_no_rtn_atomicrmw
-  ; GFX940: bb.1 (%ir-block.0):
-  ; GFX940-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2
-  ; GFX940-NEXT: {{  $}}
-  ; GFX940-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GFX940-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-  ; GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
-  ; GFX940-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
-  ; GFX940-NEXT:   GLOBAL_ATOMIC_ADD_F32 [[REG_SEQUENCE]], [[COPY2]], 0, 0, implicit $exec :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr, addrspace 1)
-  ; GFX940-NEXT:   S_ENDPGM 0
+  ; GFX942-LABEL: name: global_atomic_fadd_f32_no_rtn_atomicrmw
+  ; GFX942: bb.1 (%ir-block.0):
+  ; GFX942-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2
+  ; GFX942-NEXT: {{  $}}
+  ; GFX942-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GFX942-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+  ; GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
+  ; GFX942-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+  ; GFX942-NEXT:   GLOBAL_ATOMIC_ADD_F32 [[REG_SEQUENCE]], [[COPY2]], 0, 0, implicit $exec :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr, addrspace 1)
+  ; GFX942-NEXT:   S_ENDPGM 0
   ;
   ; GFX11-LABEL: name: global_atomic_fadd_f32_no_rtn_atomicrmw
   ; GFX11: bb.1 (%ir-block.0):
@@ -198,77 +198,77 @@ define amdgpu_ps void @global_atomic_fadd_f32_saddr_no_rtn_atomicrmw(ptr addrspa
   ; GFX90A-NEXT:   SI_END_CF [[SI_IF]], implicit-def $exec, implicit-def $scc, implicit $exec
   ; GFX90A-NEXT:   S_ENDPGM 0
   ;
-  ; GFX940-LABEL: name: global_atomic_fadd_f32_saddr_no_rtn_atomicrmw
-  ; GFX940: bb.1 (%ir-block.0):
-  ; GFX940-NEXT:   successors: %bb.2(0x40000000), %bb.5(0x40000000)
-  ; GFX940-NEXT:   liveins: $sgpr0, $sgpr1, $vgpr0
-  ; GFX940-NEXT: {{  $}}
-  ; GFX940-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
-  ; GFX940-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
-  ; GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec_xnull = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
-  ; GFX940-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GFX940-NEXT:   [[SI_PS_LIVE:%[0-9]+]]:sreg_64_xexec = SI_PS_LIVE
-  ; GFX940-NEXT:   [[SI_IF:%[0-9]+]]:sreg_64_xexec = SI_IF [[SI_PS_LIVE]], %bb.5, implicit-def $exec, implicit-def $scc, implicit $exec
-  ; GFX940-NEXT:   S_BRANCH %bb.2
-  ; GFX940-NEXT: {{  $}}
-  ; GFX940-NEXT: bb.2 (%ir-block.5):
-  ; GFX940-NEXT:   successors: %bb.3(0x40000000), %bb.4(0x40000000)
-  ; GFX940-NEXT: {{  $}}
-  ; GFX940-NEXT:   [[COPY3:%[0-9]+]]:sreg_64 = COPY $exec
-  ; GFX940-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY [[COPY3]].sub0
-  ; GFX940-NEXT:   [[COPY5:%[0-9]+]]:sreg_32 = COPY [[COPY3]].sub0
-  ; GFX940-NEXT:   [[COPY6:%[0-9]+]]:sreg_32 = COPY [[COPY3]].sub1
-  ; GFX940-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
-  ; GFX940-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY4]]
-  ; GFX940-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
-  ; GFX940-NEXT:   [[V_MBCNT_LO_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_MBCNT_LO_U32_B32_e64 [[COPY7]], [[COPY8]], implicit $exec
-  ; GFX940-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY6]]
-  ; GFX940-NEXT:   [[V_MBCNT_HI_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_MBCNT_HI_U32_B32_e64 [[COPY9]], [[V_MBCNT_LO_U32_B32_e64_]], implicit $exec
-  ; GFX940-NEXT:   [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648
-  ; GFX940-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]]
-  ; GFX940-NEXT:   [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
-  ; GFX940-NEXT:   [[V_SET_INACTIVE_B32_:%[0-9]+]]:vgpr_32 = V_SET_INACTIVE_B32 0, [[COPY2]], 0, [[COPY10]], [[DEF]], implicit $exec
-  ; GFX940-NEXT:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]]
-  ; GFX940-NEXT:   [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY11]], [[V_SET_INACTIVE_B32_]], 273, 15, 15, 0, implicit $exec
-  ; GFX940-NEXT:   [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_SET_INACTIVE_B32_]], 0, [[V_MOV_B32_dpp]], 0, 0, implicit $mode, implicit $exec
-  ; GFX940-NEXT:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]]
-  ; GFX940-NEXT:   [[V_MOV_B32_dpp1:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY12]], [[V_ADD_F32_e64_]], 274, 15, 15, 0, implicit $exec
-  ; GFX940-NEXT:   [[V_ADD_F32_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_]], 0, [[V_MOV_B32_dpp1]], 0, 0, implicit $mode, implicit $exec
-  ; GFX940-NEXT:   [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]]
-  ; GFX940-NEXT:   [[V_MOV_B32_dpp2:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY13]], [[V_ADD_F32_e64_1]], 276, 15, 15, 0, implicit $exec
-  ; GFX940-NEXT:   [[V_ADD_F32_e64_2:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_1]], 0, [[V_MOV_B32_dpp2]], 0, 0, implicit $mode, implicit $exec
-  ; GFX940-NEXT:   [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]]
-  ; GFX940-NEXT:   [[V_MOV_B32_dpp3:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY14]], [[V_ADD_F32_e64_2]], 280, 15, 15, 0, implicit $exec
-  ; GFX940-NEXT:   [[V_ADD_F32_e64_3:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_2]], 0, [[V_MOV_B32_dpp3]], 0, 0, implicit $mode, implicit $exec
-  ; GFX940-NEXT:   [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]]
-  ; GFX940-NEXT:   [[V_MOV_B32_dpp4:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY15]], [[V_ADD_F32_e64_3]], 322, 10, 15, 0, implicit $exec
-  ; GFX940-NEXT:   [[V_ADD_F32_e64_4:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_3]], 0, [[V_MOV_B32_dpp4]], 0, 0, implicit $mode, implicit $exec
-  ; GFX940-NEXT:   [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]]
-  ; GFX940-NEXT:   [[V_MOV_B32_dpp5:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY16]], [[V_ADD_F32_e64_4]], 323, 12, 15, 0, implicit $exec
-  ; GFX940-NEXT:   [[V_ADD_F32_e64_5:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_4]], 0, [[V_MOV_B32_dpp5]], 0, 0, implicit $mode, implicit $exec
-  ; GFX940-NEXT:   [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 63
-  ; GFX940-NEXT:   [[V_READLANE_B32_:%[0-9]+]]:sreg_32 = V_READLANE_B32 [[V_ADD_F32_e64_5]], [[S_MOV_B32_2]]
-  ; GFX940-NEXT:   [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[V_READLANE_B32_]]
-  ; GFX940-NEXT:   [[STRICT_WWM:%[0-9]+]]:vgpr_32 = STRICT_WWM [[COPY17]], implicit $exec
-  ; GFX940-NEXT:   [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
-  ; GFX940-NEXT:   [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_MBCNT_HI_U32_B32_e64_]], [[COPY18]], implicit $exec
-  ; GFX940-NEXT:   [[SI_IF1:%[0-9]+]]:sreg_64_xexec = SI_IF [[V_CMP_EQ_U32_e64_]], %bb.4, implicit-def $exec, implicit-def $scc, implicit $exec
-  ; GFX940-NEXT:   S_BRANCH %bb.3
-  ; GFX940-NEXT: {{  $}}
-  ; GFX940-NEXT: bb.3 (%ir-block.31):
-  ; GFX940-NEXT:   successors: %bb.4(0x80000000)
-  ; GFX940-NEXT: {{  $}}
-  ; GFX940-NEXT:   [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
-  ; GFX940-NEXT:   GLOBAL_ATOMIC_ADD_F32_SADDR [[V_MOV_B32_e32_]], [[STRICT_WWM]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr, addrspace 1)
-  ; GFX940-NEXT: {{  $}}
-  ; GFX940-NEXT: bb.4.Flow:
-  ; GFX940-NEXT:   successors: %bb.5(0x80000000)
-  ; GFX940-NEXT: {{  $}}
-  ; GFX940-NEXT:   SI_END_CF [[SI_IF1]], implicit-def $exec, implicit-def $scc, implicit $exec
-  ; GFX940-NEXT: {{  $}}
-  ; GFX940-NEXT: bb.5 (%ir-block.33):
-  ; GFX940-NEXT:   SI_END_CF [[SI_IF]], implicit-def $exec, implicit-def $scc, implicit $exec
-  ; GFX940-NEXT:   S_ENDPGM 0
+  ; GFX942-LABEL: name: global_atomic_fadd_f32_saddr_no_rtn_atomicrmw
+  ; GFX942: bb.1 (%ir-block.0):
+  ; GFX942-NEXT:   successors: %bb.2(0x40000000), %bb.5(0x40000000)
+  ; GFX942-NEXT:   liveins: $sgpr0, $sgpr1, $vgpr0
+  ; GFX942-NEXT: {{  $}}
+  ; GFX942-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+  ; GFX942-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
+  ; GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec_xnull = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
+  ; GFX942-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GFX942-NEXT:   [[SI_PS_LIVE:%[0-9]+]]:sreg_64_xexec = SI_PS_LIVE
+  ; GFX942-NEXT:   [[SI_IF:%[0-9]+]]:sreg_64_xexec = SI_IF [[SI_PS_LIVE]], %bb.5, implicit-def $exec, implicit-def $scc, implicit $exec
+  ; GFX942-NEXT:   S_BRANCH %bb.2
+  ; GFX942-NEXT: {{  $}}
+  ; GFX942-NEXT: bb.2 (%ir-block.5):
+  ; GFX942-NEXT:   successors: %bb.3(0x40000000), %bb.4(0x40000000)
+  ; GFX942-NEXT: {{  $}}
+  ; GFX942-NEXT:   [[COPY3:%[0-9]+]]:sreg_64 = COPY $exec
+  ; GFX942-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY [[COPY3]].sub0
+  ; GFX942-NEXT:   [[COPY5:%[0-9]+]]:sreg_32 = COPY [[COPY3]].sub0
+  ; GFX942-NEXT:   [[COPY6:%[0-9]+]]:sreg_32 = COPY [[COPY3]].sub1
+  ; GFX942-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+  ; GFX942-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY4]]
+  ; GFX942-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
+  ; GFX942-NEXT:   [[V_MBCNT_LO_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_MBCNT_LO_U32_B32_e64 [[COPY7]], [[COPY8]], implicit $exec
+  ; GFX942-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY6]]
+  ; GFX942-NEXT:   [[V_MBCNT_HI_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_MBCNT_HI_U32_B32_e64 [[COPY9]], [[V_MBCNT_LO_U32_B32_e64_]], implicit $exec
+  ; GFX942-NEXT:   [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648
+  ; GFX942-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]]
+  ; GFX942-NEXT:   [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
+  ; GFX942-NEXT:   [[V_SET_INACTIVE_B32_:%[0-9]+]]:vgpr_32 = V_SET_INACTIVE_B32 0, [[COPY2]], 0, [[COPY10]], [[DEF]], implicit $exec
+  ; GFX942-NEXT:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]]
+  ; GFX942-NEXT:   [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY11]], [[V_SET_INACTIVE_B32_]], 273, 15, 15, 0, implicit $exec
+  ; GFX942-NEXT:   [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_SET_INACTIVE_B32_]], 0, [[V_MOV_B32_dpp]], 0, 0, implicit $mode, implicit $exec
+  ; GFX942-NEXT:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]]
+  ; GFX942-NEXT:   [[V_MOV_B32_dpp1:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY12]], [[V_ADD_F32_e64_]], 274, 15, 15, 0, implicit $exec
+  ; GFX942-NEXT:   [[V_ADD_F32_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_]], 0, [[V_MOV_B32_dpp1]], 0, 0, implicit $mode, implicit $exec
+  ; GFX942-NEXT:   [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]]
+  ; GFX942-NEXT:   [[V_MOV_B32_dpp2:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY13]], [[V_ADD_F32_e64_1]], 276, 15, 15, 0, implicit $exec
+  ; GFX942-NEXT:   [[V_ADD_F32_e64_2:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_1]], 0, [[V_MOV_B32_dpp2]], 0, 0, implicit $mode, implicit $exec
+  ; GFX942-NEXT:   [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]]
+  ; GFX942-NEXT:   [[V_MOV_B32_dpp3:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY14]], [[V_ADD_F32_e64_2]], 280, 15, 15, 0, implicit $exec
+  ; GFX942-NEXT:   [[V_ADD_F32_e64_3:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_2]], 0, [[V_MOV_B32_dpp3]], 0, 0, implicit $mode, implicit $exec
+  ; GFX942-NEXT:   [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]]
+  ; GFX942-NEXT:   [[V_MOV_B32_dpp4:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY15]], [[V_ADD_F32_e64_3]], 322, 10, 15, 0, implicit $exec
+  ; GFX942-NEXT:   [[V_ADD_F32_e64_4:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_3]], 0, [[V_MOV_B32_dpp4]], 0, 0, implicit $mode, implicit $exec
+  ; GFX942-NEXT:   [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]]
+  ; GFX942-NEXT:   [[V_MOV_B32_dpp5:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY16]], [[V_ADD_F32_e64_4]], 323, 12, 15, 0, implicit $exec
+  ; GFX942-NEXT:   [[V_ADD_F32_e64_5:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_4]], 0, [[V_MOV_B32_dpp5]], 0, 0, implicit $mode, implicit $exec
+  ; GFX942-NEXT:   [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 63
+  ; GFX942-NEXT:   [[V_READLANE_B32_:%[0-9]+]]:sreg_32 = V_READLANE_B32 [[V_ADD_F32_e64_5]], [[S_MOV_B32_2]]
+  ; GFX942-NEXT:   [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[V_READLANE_B32_]]
+  ; GFX942-NEXT:   [[STRICT_WWM:%[0-9]+]]:vgpr_32 = STRICT_WWM [[COPY17]], implicit $exec
+  ; GFX942-NEXT:   [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
+  ; GFX942-NEXT:   [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_MBCNT_HI_U32_B32_e64_]], [[COPY18]], implicit $exec
+  ; GFX942-NEXT:   [[SI_IF1:%[0-9]+]]:sreg_64_xexec = SI_IF [[V_CMP_EQ_U32_e64_]], %bb.4, implicit-def $exec, implicit-def $scc, implicit $exec
+  ; GFX942-NEXT:   S_BRANCH %bb.3
+  ; GFX942-NEXT: {{  $}}
+  ; GFX942-NEXT: bb.3 (%ir-block.31):
+  ; GFX942-NEXT:   successors: %bb.4(0x80000000)
+  ; GFX942-NEXT: {{  $}}
+  ; GFX942-NEXT:   [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+  ; GFX942-NEXT:   GLOBAL_ATOMIC_ADD_F32_SADDR [[V_MOV_B32_e32_]], [[STRICT_WWM]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr, addrspace 1)
+  ; GFX942-NEXT: {{  $}}
+  ; GFX942-NEXT: bb.4.Flow:
+  ; GFX942-NEXT:   successors: %bb.5(0x80000000)
+  ; GFX942-NEXT: {{  $}}
+  ; GFX942-NEXT:   SI_END_CF [[SI_IF1]], implicit-def $exec, implicit-def $scc, implicit $exec
+  ; GFX942-NEXT: {{  $}}
+  ; GFX942-NEXT: bb.5 (%ir-block.33):
+  ; GFX942-NEXT:   SI_END_CF [[SI_IF]], implicit-def $exec, implicit-def $scc, implicit $exec
+  ; GFX942-NEXT:   S_ENDPGM 0
   ;
   ; GFX11-LABEL: name: global_atomic_fadd_f32_saddr_no_rtn_atomicrmw
   ; GFX11: bb.1 (%ir-block.0):
diff --git llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-rtn.ll llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-rtn.ll
index e935245e30f1..6cd03a2ecaff 100644
--- llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-rtn.ll
+++ llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-rtn.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
 ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx90a -stop-after=instruction-select -amdgpu-atomic-optimizer-strategy=DPP < %s | FileCheck -check-prefix=GFX90A %s
-; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx940 -stop-after=instruction-select -amdgpu-atomic-optimizer-strategy=DPP < %s | FileCheck -check-prefix=GFX940 %s
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx942 -stop-after=instruction-select -amdgpu-atomic-optimizer-strategy=DPP < %s | FileCheck -check-prefix=GFX942 %s
 ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 -stop-after=instruction-select -amdgpu-atomic-optimizer-strategy=DPP < %s | FileCheck -check-prefix=GFX11 %s
 ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1200 -stop-after=instruction-select -amdgpu-atomic-optimizer-strategy=DPP < %s | FileCheck -check-prefix=GFX11 %s
 
@@ -17,17 +17,17 @@ define amdgpu_ps float @global_atomic_fadd_f32_rtn_atomicrmw(ptr addrspace(1) %p
   ; GFX90A-NEXT:   $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_F32_RTN]]
   ; GFX90A-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
   ;
-  ; GFX940-LABEL: name: global_atomic_fadd_f32_rtn_atomicrmw
-  ; GFX940: bb.1 (%ir-block.0):
-  ; GFX940-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2
-  ; GFX940-NEXT: {{  $}}
-  ; GFX940-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GFX940-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-  ; GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
-  ; GFX940-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
-  ; GFX940-NEXT:   [[GLOBAL_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_F32_RTN [[REG_SEQUENCE]], [[COPY2]], 0, 1, implicit $exec :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr, addrspace 1)
-  ; GFX940-NEXT:   $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_F32_RTN]]
-  ; GFX940-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
+  ; GFX942-LABEL: name: global_atomic_fadd_f32_rtn_atomicrmw
+  ; GFX942: bb.1 (%ir-block.0):
+  ; GFX942-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2
+  ; GFX942-NEXT: {{  $}}
+  ; GFX942-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GFX942-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+  ; GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
+  ; GFX942-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+  ; GFX942-NEXT:   [[GLOBAL_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_F32_RTN [[REG_SEQUENCE]], [[COPY2]], 0, 1, implicit $exec :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr, addrspace 1)
+  ; GFX942-NEXT:   $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_F32_RTN]]
+  ; GFX942-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
   ;
   ; GFX11-LABEL: name: global_atomic_fadd_f32_rtn_atomicrmw
   ; GFX11: bb.1 (%ir-block.0):
@@ -136,96 +136,96 @@ define amdgpu_ps float @global_atomic_fadd_f32_saddr_rtn_atomicrmw(ptr addrspace
   ; GFX90A-NEXT:   $vgpr0 = COPY [[PHI]]
   ; GFX90A-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
   ;
-  ; GFX940-LABEL: name: global_atomic_fadd_f32_saddr_rtn_atomicrmw
-  ; GFX940: bb.1 (%ir-block.0):
-  ; GFX940-NEXT:   successors: %bb.2(0x40000000), %bb.4(0x40000000)
-  ; GFX940-NEXT:   liveins: $sgpr0, $sgpr1, $vgpr0
-  ; GFX940-NEXT: {{  $}}
-  ; GFX940-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
-  ; GFX940-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
-  ; GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec_xnull = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
-  ; GFX940-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GFX940-NEXT:   [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
-  ; GFX940-NEXT:   [[SI_PS_LIVE:%[0-9]+]]:sreg_64_xexec = SI_PS_LIVE
-  ; GFX940-NEXT:   [[SI_IF:%[0-9]+]]:sreg_64_xexec = SI_IF [[SI_PS_LIVE]], %bb.4, implicit-def $exec, implicit-def $scc, implicit $exec
-  ; GFX940-NEXT:   S_BRANCH %bb.2
-  ; GFX940-NEXT: {{  $}}
-  ; GFX940-NEXT: bb.2 (%ir-block.5):
-  ; GFX940-NEXT:   successors: %bb.3(0x40000000), %bb.5(0x40000000)
-  ; GFX940-NEXT: {{  $}}
-  ; GFX940-NEXT:   [[COPY3:%[0-9]+]]:sreg_64 = COPY $exec
-  ; GFX940-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY [[COPY3]].sub0
-  ; GFX940-NEXT:   [[COPY5:%[0-9]+]]:sreg_32 = COPY [[COPY3]].sub0
-  ; GFX940-NEXT:   [[COPY6:%[0-9]+]]:sreg_32 = COPY [[COPY3]].sub1
-  ; GFX940-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
-  ; GFX940-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY4]]
-  ; GFX940-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
-  ; GFX940-NEXT:   [[V_MBCNT_LO_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_MBCNT_LO_U32_B32_e64 [[COPY7]], [[COPY8]], implicit $exec
-  ; GFX940-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY6]]
-  ; GFX940-NEXT:   [[V_MBCNT_HI_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_MBCNT_HI_U32_B32_e64 [[COPY9]], [[V_MBCNT_LO_U32_B32_e64_]], implicit $exec
-  ; GFX940-NEXT:   [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648
-  ; GFX940-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]]
-  ; GFX940-NEXT:   [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
-  ; GFX940-NEXT:   [[V_SET_INACTIVE_B32_:%[0-9]+]]:vgpr_32 = V_SET_INACTIVE_B32 0, [[COPY2]], 0, [[COPY10]], [[DEF1]], implicit $exec
-  ; GFX940-NEXT:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]]
-  ; GFX940-NEXT:   [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY11]], [[V_SET_INACTIVE_B32_]], 273, 15, 15, 0, implicit $exec
-  ; GFX940-NEXT:   [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_SET_INACTIVE_B32_]], 0, [[V_MOV_B32_dpp]], 0, 0, implicit $mode, implicit $exec
-  ; GFX940-NEXT:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]]
-  ; GFX940-NEXT:   [[V_MOV_B32_dpp1:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY12]], [[V_ADD_F32_e64_]], 274, 15, 15, 0, implicit $exec
-  ; GFX940-NEXT:   [[V_ADD_F32_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_]], 0, [[V_MOV_B32_dpp1]], 0, 0, implicit $mode, implicit $exec
-  ; GFX940-NEXT:   [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]]
-  ; GFX940-NEXT:   [[V_MOV_B32_dpp2:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY13]], [[V_ADD_F32_e64_1]], 276, 15, 15, 0, implicit $exec
-  ; GFX940-NEXT:   [[V_ADD_F32_e64_2:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_1]], 0, [[V_MOV_B32_dpp2]], 0, 0, implicit $mode, implicit $exec
-  ; GFX940-NEXT:   [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]]
-  ; GFX940-NEXT:   [[V_MOV_B32_dpp3:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY14]], [[V_ADD_F32_e64_2]], 280, 15, 15, 0, implicit $exec
-  ; GFX940-NEXT:   [[V_ADD_F32_e64_3:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_2]], 0, [[V_MOV_B32_dpp3]], 0, 0, implicit $mode, implicit $exec
-  ; GFX940-NEXT:   [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]]
-  ; GFX940-NEXT:   [[V_MOV_B32_dpp4:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY15]], [[V_ADD_F32_e64_3]], 322, 10, 15, 0, implicit $exec
-  ; GFX940-NEXT:   [[V_ADD_F32_e64_4:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_3]], 0, [[V_MOV_B32_dpp4]], 0, 0, implicit $mode, implicit $exec
-  ; GFX940-NEXT:   [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]]
-  ; GFX940-NEXT:   [[V_MOV_B32_dpp5:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY16]], [[V_ADD_F32_e64_4]], 323, 12, 15, 0, implicit $exec
-  ; GFX940-NEXT:   [[V_ADD_F32_e64_5:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_4]], 0, [[V_MOV_B32_dpp5]], 0, 0, implicit $mode, implicit $exec
-  ; GFX940-NEXT:   [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]]
-  ; GFX940-NEXT:   [[V_MOV_B32_dpp6:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY17]], [[V_ADD_F32_e64_5]], 312, 15, 15, 0, implicit $exec
-  ; GFX940-NEXT:   [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 63
-  ; GFX940-NEXT:   [[V_READLANE_B32_:%[0-9]+]]:sreg_32 = V_READLANE_B32 [[V_ADD_F32_e64_5]], [[S_MOV_B32_2]]
-  ; GFX940-NEXT:   [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[V_READLANE_B32_]]
-  ; GFX940-NEXT:   [[STRICT_WWM:%[0-9]+]]:vgpr_32 = STRICT_WWM [[COPY18]], implicit $exec
-  ; GFX940-NEXT:   [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
-  ; GFX940-NEXT:   [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_MBCNT_HI_U32_B32_e64_]], [[COPY19]], implicit $exec
-  ; GFX940-NEXT:   [[SI_IF1:%[0-9]+]]:sreg_64_xexec = SI_IF [[V_CMP_EQ_U32_e64_]], %bb.5, implicit-def $exec, implicit-def $scc, implicit $exec
-  ; GFX940-NEXT:   S_BRANCH %bb.3
-  ; GFX940-NEXT: {{  $}}
-  ; GFX940-NEXT: bb.3 (%ir-block.32):
-  ; GFX940-NEXT:   successors: %bb.5(0x80000000)
-  ; GFX940-NEXT: {{  $}}
-  ; GFX940-NEXT:   [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
-  ; GFX940-NEXT:   [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_F32_SADDR_RTN [[V_MOV_B32_e32_]], [[STRICT_WWM]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr, addrspace 1)
-  ; GFX940-NEXT:   S_BRANCH %bb.5
-  ; GFX940-NEXT: {{  $}}
-  ; GFX940-NEXT: bb.4.Flow:
-  ; GFX940-NEXT:   successors: %bb.6(0x80000000)
-  ; GFX940-NEXT: {{  $}}
-  ; GFX940-NEXT:   [[PHI:%[0-9]+]]:vgpr_32 = PHI %42, %bb.5, [[DEF]], %bb.1
-  ; GFX940-NEXT:   SI_END_CF [[SI_IF]], implicit-def $exec, implicit-def $scc, implicit $exec
-  ; GFX940-NEXT:   S_BRANCH %bb.6
-  ; GFX940-NEXT: {{  $}}
-  ; GFX940-NEXT: bb.5 (%ir-block.35):
-  ; GFX940-NEXT:   successors: %bb.4(0x80000000)
-  ; GFX940-NEXT: {{  $}}
-  ; GFX940-NEXT:   [[PHI1:%[0-9]+]]:vgpr_32 = PHI [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN]], %bb.3, [[DEF]], %bb.2
-  ; GFX940-NEXT:   SI_END_CF [[SI_IF1]], implicit-def $exec, implicit-def $scc, implicit $exec
-  ; GFX940-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PHI1]], implicit $exec
-  ; GFX940-NEXT:   [[STRICT_WWM1:%[0-9]+]]:vgpr_32 = STRICT_WWM [[V_MOV_B32_dpp6]], implicit $exec
-  ; GFX940-NEXT:   [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[V_READFIRSTLANE_B32_]]
-  ; GFX940-NEXT:   [[V_ADD_F32_e64_6:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[COPY20]], 0, [[STRICT_WWM1]], 0, 0, implicit $mode, implicit $exec
-  ; GFX940-NEXT:   [[COPY21:%[0-9]+]]:vgpr_32 = COPY [[V_READFIRSTLANE_B32_]]
-  ; GFX940-NEXT:   [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_ADD_F32_e64_6]], 0, [[COPY21]], [[V_CMP_EQ_U32_e64_]], implicit $exec
-  ; GFX940-NEXT:   S_BRANCH %bb.4
-  ; GFX940-NEXT: {{  $}}
-  ; GFX940-NEXT: bb.6 (%ir-block.41):
-  ; GFX940-NEXT:   $vgpr0 = COPY [[PHI]]
-  ; GFX940-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
+  ; GFX942-LABEL: name: global_atomic_fadd_f32_saddr_rtn_atomicrmw
+  ; GFX942: bb.1 (%ir-block.0):
+  ; GFX942-NEXT:   successors: %bb.2(0x40000000), %bb.4(0x40000000)
+  ; GFX942-NEXT:   liveins: $sgpr0, $sgpr1, $vgpr0
+  ; GFX942-NEXT: {{  $}}
+  ; GFX942-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+  ; GFX942-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
+  ; GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec_xnull = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
+  ; GFX942-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GFX942-NEXT:   [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
+  ; GFX942-NEXT:   [[SI_PS_LIVE:%[0-9]+]]:sreg_64_xexec = SI_PS_LIVE
+  ; GFX942-NEXT:   [[SI_IF:%[0-9]+]]:sreg_64_xexec = SI_IF [[SI_PS_LIVE]], %bb.4, implicit-def $exec, implicit-def $scc, implicit $exec
+  ; GFX942-NEXT:   S_BRANCH %bb.2
+  ; GFX942-NEXT: {{  $}}
+  ; GFX942-NEXT: bb.2 (%ir-block.5):
+  ; GFX942-NEXT:   successors: %bb.3(0x40000000), %bb.5(0x40000000)
+  ; GFX942-NEXT: {{  $}}
+  ; GFX942-NEXT:   [[COPY3:%[0-9]+]]:sreg_64 = COPY $exec
+  ; GFX942-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY [[COPY3]].sub0
+  ; GFX942-NEXT:   [[COPY5:%[0-9]+]]:sreg_32 = COPY [[COPY3]].sub0
+  ; GFX942-NEXT:   [[COPY6:%[0-9]+]]:sreg_32 = COPY [[COPY3]].sub1
+  ; GFX942-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+  ; GFX942-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY4]]
+  ; GFX942-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
+  ; GFX942-NEXT:   [[V_MBCNT_LO_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_MBCNT_LO_U32_B32_e64 [[COPY7]], [[COPY8]], implicit $exec
+  ; GFX942-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY6]]
+  ; GFX942-NEXT:   [[V_MBCNT_HI_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_MBCNT_HI_U32_B32_e64 [[COPY9]], [[V_MBCNT_LO_U32_B32_e64_]], implicit $exec
+  ; GFX942-NEXT:   [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648
+  ; GFX942-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]]
+  ; GFX942-NEXT:   [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
+  ; GFX942-NEXT:   [[V_SET_INACTIVE_B32_:%[0-9]+]]:vgpr_32 = V_SET_INACTIVE_B32 0, [[COPY2]], 0, [[COPY10]], [[DEF1]], implicit $exec
+  ; GFX942-NEXT:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]]
+  ; GFX942-NEXT:   [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY11]], [[V_SET_INACTIVE_B32_]], 273, 15, 15, 0, implicit $exec
+  ; GFX942-NEXT:   [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_SET_INACTIVE_B32_]], 0, [[V_MOV_B32_dpp]], 0, 0, implicit $mode, implicit $exec
+  ; GFX942-NEXT:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]]
+  ; GFX942-NEXT:   [[V_MOV_B32_dpp1:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY12]], [[V_ADD_F32_e64_]], 274, 15, 15, 0, implicit $exec
+  ; GFX942-NEXT:   [[V_ADD_F32_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_]], 0, [[V_MOV_B32_dpp1]], 0, 0, implicit $mode, implicit $exec
+  ; GFX942-NEXT:   [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]]
+  ; GFX942-NEXT:   [[V_MOV_B32_dpp2:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY13]], [[V_ADD_F32_e64_1]], 276, 15, 15, 0, implicit $exec
+  ; GFX942-NEXT:   [[V_ADD_F32_e64_2:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_1]], 0, [[V_MOV_B32_dpp2]], 0, 0, implicit $mode, implicit $exec
+  ; GFX942-NEXT:   [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]]
+  ; GFX942-NEXT:   [[V_MOV_B32_dpp3:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY14]], [[V_ADD_F32_e64_2]], 280, 15, 15, 0, implicit $exec
+  ; GFX942-NEXT:   [[V_ADD_F32_e64_3:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_2]], 0, [[V_MOV_B32_dpp3]], 0, 0, implicit $mode, implicit $exec
+  ; GFX942-NEXT:   [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]]
+  ; GFX942-NEXT:   [[V_MOV_B32_dpp4:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY15]], [[V_ADD_F32_e64_3]], 322, 10, 15, 0, implicit $exec
+  ; GFX942-NEXT:   [[V_ADD_F32_e64_4:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_3]], 0, [[V_MOV_B32_dpp4]], 0, 0, implicit $mode, implicit $exec
+  ; GFX942-NEXT:   [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]]
+  ; GFX942-NEXT:   [[V_MOV_B32_dpp5:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY16]], [[V_ADD_F32_e64_4]], 323, 12, 15, 0, implicit $exec
+  ; GFX942-NEXT:   [[V_ADD_F32_e64_5:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_4]], 0, [[V_MOV_B32_dpp5]], 0, 0, implicit $mode, implicit $exec
+  ; GFX942-NEXT:   [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]]
+  ; GFX942-NEXT:   [[V_MOV_B32_dpp6:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY17]], [[V_ADD_F32_e64_5]], 312, 15, 15, 0, implicit $exec
+  ; GFX942-NEXT:   [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 63
+  ; GFX942-NEXT:   [[V_READLANE_B32_:%[0-9]+]]:sreg_32 = V_READLANE_B32 [[V_ADD_F32_e64_5]], [[S_MOV_B32_2]]
+  ; GFX942-NEXT:   [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[V_READLANE_B32_]]
+  ; GFX942-NEXT:   [[STRICT_WWM:%[0-9]+]]:vgpr_32 = STRICT_WWM [[COPY18]], implicit $exec
+  ; GFX942-NEXT:   [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
+  ; GFX942-NEXT:   [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_MBCNT_HI_U32_B32_e64_]], [[COPY19]], implicit $exec
+  ; GFX942-NEXT:   [[SI_IF1:%[0-9]+]]:sreg_64_xexec = SI_IF [[V_CMP_EQ_U32_e64_]], %bb.5, implicit-def $exec, implicit-def $scc, implicit $exec
+  ; GFX942-NEXT:   S_BRANCH %bb.3
+  ; GFX942-NEXT: {{  $}}
+  ; GFX942-NEXT: bb.3 (%ir-block.32):
+  ; GFX942-NEXT:   successors: %bb.5(0x80000000)
+  ; GFX942-NEXT: {{  $}}
+  ; GFX942-NEXT:   [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+  ; GFX942-NEXT:   [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_F32_SADDR_RTN [[V_MOV_B32_e32_]], [[STRICT_WWM]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr, addrspace 1)
+  ; GFX942-NEXT:   S_BRANCH %bb.5
+  ; GFX942-NEXT: {{  $}}
+  ; GFX942-NEXT: bb.4.Flow:
+  ; GFX942-NEXT:   successors: %bb.6(0x80000000)
+  ; GFX942-NEXT: {{  $}}
+  ; GFX942-NEXT:   [[PHI:%[0-9]+]]:vgpr_32 = PHI %42, %bb.5, [[DEF]], %bb.1
+  ; GFX942-NEXT:   SI_END_CF [[SI_IF]], implicit-def $exec, implicit-def $scc, implicit $exec
+  ; GFX942-NEXT:   S_BRANCH %bb.6
+  ; GFX942-NEXT: {{  $}}
+  ; GFX942-NEXT: bb.5 (%ir-block.35):
+  ; GFX942-NEXT:   successors: %bb.4(0x80000000)
+  ; GFX942-NEXT: {{  $}}
+  ; GFX942-NEXT:   [[PHI1:%[0-9]+]]:vgpr_32 = PHI [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN]], %bb.3, [[DEF]], %bb.2
+  ; GFX942-NEXT:   SI_END_CF [[SI_IF1]], implicit-def $exec, implicit-def $scc, implicit $exec
+  ; GFX942-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PHI1]], implicit $exec
+  ; GFX942-NEXT:   [[STRICT_WWM1:%[0-9]+]]:vgpr_32 = STRICT_WWM [[V_MOV_B32_dpp6]], implicit $exec
+  ; GFX942-NEXT:   [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[V_READFIRSTLANE_B32_]]
+  ; GFX942-NEXT:   [[V_ADD_F32_e64_6:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[COPY20]], 0, [[STRICT_WWM1]], 0, 0, implicit $mode, implicit $exec
+  ; GFX942-NEXT:   [[COPY21:%[0-9]+]]:vgpr_32 = COPY [[V_READFIRSTLANE_B32_]]
+  ; GFX942-NEXT:   [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_ADD_F32_e64_6]], 0, [[COPY21]], [[V_CMP_EQ_U32_e64_]], implicit $exec
+  ; GFX942-NEXT:   S_BRANCH %bb.4
+  ; GFX942-NEXT: {{  $}}
+  ; GFX942-NEXT: bb.6 (%ir-block.41):
+  ; GFX942-NEXT:   $vgpr0 = COPY [[PHI]]
+  ; GFX942-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
   ;
   ; GFX11-LABEL: name: global_atomic_fadd_f32_saddr_rtn_atomicrmw
   ; GFX11: bb.1 (%ir-block.0):
diff --git llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f64.ll llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f64.ll
index 49c5dc7ed5a9..e3bd3d4f2258 100644
--- llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f64.ll
+++ llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f64.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx90a -stop-after=instruction-select -amdgpu-atomic-optimizer-strategy=None < %s | FileCheck -check-prefixes=GFX90A_GFX940,GFX90A %s
-; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx940 -stop-after=instruction-select -amdgpu-atomic-optimizer-strategy=None < %s | FileCheck -check-prefixes=GFX90A_GFX940,GFX940 %s
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx90a -stop-after=instruction-select -amdgpu-atomic-optimizer-strategy=None < %s | FileCheck -check-prefixes=GFX90A_GFX942,GFX90A %s
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx942 -stop-after=instruction-select -amdgpu-atomic-optimizer-strategy=None < %s | FileCheck -check-prefixes=GFX90A_GFX942,GFX942 %s
 
 define amdgpu_ps void @global_atomic_fadd_f64_no_rtn_atomicrmw(ptr addrspace(1) %ptr, double %data) {
   ; GFX90A-LABEL: name: global_atomic_fadd_f64_no_rtn_atomicrmw
@@ -35,18 +35,18 @@ define amdgpu_ps void @global_atomic_fadd_f64_no_rtn_atomicrmw(ptr addrspace(1)
   ; GFX90A-NEXT:   SI_END_CF [[PHI2]], implicit-def $exec, implicit-def $scc, implicit $exec
   ; GFX90A-NEXT:   S_ENDPGM 0
   ;
-  ; GFX940-LABEL: name: global_atomic_fadd_f64_no_rtn_atomicrmw
-  ; GFX940: bb.1 (%ir-block.0):
-  ; GFX940-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
-  ; GFX940-NEXT: {{  $}}
-  ; GFX940-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GFX940-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-  ; GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
-  ; GFX940-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
-  ; GFX940-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
-  ; GFX940-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1
-  ; GFX940-NEXT:   GLOBAL_ATOMIC_ADD_F64 [[REG_SEQUENCE]], [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr, addrspace 1)
-  ; GFX940-NEXT:   S_ENDPGM 0
+  ; GFX942-LABEL: name: global_atomic_fadd_f64_no_rtn_atomicrmw
+  ; GFX942: bb.1 (%ir-block.0):
+  ; GFX942-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
+  ; GFX942-NEXT: {{  $}}
+  ; GFX942-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GFX942-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+  ; GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
+  ; GFX942-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+  ; GFX942-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+  ; GFX942-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1
+  ; GFX942-NEXT:   GLOBAL_ATOMIC_ADD_F64 [[REG_SEQUENCE]], [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr, addrspace 1)
+  ; GFX942-NEXT:   S_ENDPGM 0
   %ret = atomicrmw fadd ptr addrspace(1) %ptr, double %data syncscope("wavefront") monotonic
   ret void
 }
@@ -91,66 +91,66 @@ define amdgpu_ps double @global_atomic_fadd_f64_rtn_atomicrmw(ptr addrspace(1) %
   ; GFX90A-NEXT:   $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
   ; GFX90A-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1
   ;
-  ; GFX940-LABEL: name: global_atomic_fadd_f64_rtn_atomicrmw
-  ; GFX940: bb.1 (%ir-block.0):
-  ; GFX940-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
-  ; GFX940-NEXT: {{  $}}
-  ; GFX940-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GFX940-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-  ; GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
-  ; GFX940-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
-  ; GFX940-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
-  ; GFX940-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1
-  ; GFX940-NEXT:   [[GLOBAL_ATOMIC_ADD_F64_RTN:%[0-9]+]]:vreg_64_align2 = GLOBAL_ATOMIC_ADD_F64_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE1]], 0, 1, implicit $exec :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr, addrspace 1)
-  ; GFX940-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_ATOMIC_ADD_F64_RTN]].sub0
-  ; GFX940-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_ATOMIC_ADD_F64_RTN]].sub1
-  ; GFX940-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
-  ; GFX940-NEXT:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
-  ; GFX940-NEXT:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec
-  ; GFX940-NEXT:   $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
-  ; GFX940-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1
+  ; GFX942-LABEL: name: global_atomic_fadd_f64_rtn_atomicrmw
+  ; GFX942: bb.1 (%ir-block.0):
+  ; GFX942-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
+  ; GFX942-NEXT: {{  $}}
+  ; GFX942-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GFX942-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+  ; GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
+  ; GFX942-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+  ; GFX942-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+  ; GFX942-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1
+  ; GFX942-NEXT:   [[GLOBAL_ATOMIC_ADD_F64_RTN:%[0-9]+]]:vreg_64_align2 = GLOBAL_ATOMIC_ADD_F64_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE1]], 0, 1, implicit $exec :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr, addrspace 1)
+  ; GFX942-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_ATOMIC_ADD_F64_RTN]].sub0
+  ; GFX942-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_ATOMIC_ADD_F64_RTN]].sub1
+  ; GFX942-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
+  ; GFX942-NEXT:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+  ; GFX942-NEXT:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec
+  ; GFX942-NEXT:   $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
+  ; GFX942-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1
   %ret = atomicrmw fadd ptr addrspace(1) %ptr, double %data syncscope("wavefront") monotonic
   ret double %ret
 }
 
 define amdgpu_ps void @global_atomic_fadd_f64_saddr_no_rtn_atomicrmw(ptr addrspace(1) inreg %ptr, double %data) {
-  ; GFX90A_GFX940-LABEL: name: global_atomic_fadd_f64_saddr_no_rtn_atomicrmw
-  ; GFX90A_GFX940: bb.1 (%ir-block.0):
-  ; GFX90A_GFX940-NEXT:   liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1
-  ; GFX90A_GFX940-NEXT: {{  $}}
-  ; GFX90A_GFX940-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec_xnull = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1
-  ; GFX90A_GFX940-NEXT:   [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
-  ; GFX90A_GFX940-NEXT:   GLOBAL_ATOMIC_ADD_F64_SADDR [[V_MOV_B32_e32_]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr, addrspace 1)
-  ; GFX90A_GFX940-NEXT:   S_ENDPGM 0
+  ; GFX90A_GFX942-LABEL: name: global_atomic_fadd_f64_saddr_no_rtn_atomicrmw
+  ; GFX90A_GFX942: bb.1 (%ir-block.0):
+  ; GFX90A_GFX942-NEXT:   liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1
+  ; GFX90A_GFX942-NEXT: {{  $}}
+  ; GFX90A_GFX942-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec_xnull = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1
+  ; GFX90A_GFX942-NEXT:   [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+  ; GFX90A_GFX942-NEXT:   GLOBAL_ATOMIC_ADD_F64_SADDR [[V_MOV_B32_e32_]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr, addrspace 1)
+  ; GFX90A_GFX942-NEXT:   S_ENDPGM 0
   %ret = atomicrmw fadd ptr addrspace(1) %ptr, double %data syncscope("wavefront") monotonic, !amdgpu.no.fine.grained.memory !0
   ret void
 }
 
 define amdgpu_ps double @global_atomic_fadd_f64_saddr_rtn_atomicrmw(ptr addrspace(1) inreg %ptr, double %data) {
-  ; GFX90A_GFX940-LABEL: name: global_atomic_fadd_f64_saddr_rtn_atomicrmw
-  ; GFX90A_GFX940: bb.1 (%ir-block.0):
-  ; GFX90A_GFX940-NEXT:   liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1
-  ; GFX90A_GFX940-NEXT: {{  $}}
-  ; GFX90A_GFX940-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec_xnull = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1
-  ; GFX90A_GFX940-NEXT:   [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
-  ; GFX90A_GFX940-NEXT:   [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN:%[0-9]+]]:vreg_64_align2 = GLOBAL_ATOMIC_ADD_F64_SADDR_RTN [[V_MOV_B32_e32_]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr, addrspace 1)
-  ; GFX90A_GFX940-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN]].sub0
-  ; GFX90A_GFX940-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN]].sub1
-  ; GFX90A_GFX940-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
-  ; GFX90A_GFX940-NEXT:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
-  ; GFX90A_GFX940-NEXT:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec
-  ; GFX90A_GFX940-NEXT:   $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
-  ; GFX90A_GFX940-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1
+  ; GFX90A_GFX942-LABEL: name: global_atomic_fadd_f64_saddr_rtn_atomicrmw
+  ; GFX90A_GFX942: bb.1 (%ir-block.0):
+  ; GFX90A_GFX942-NEXT:   liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1
+  ; GFX90A_GFX942-NEXT: {{  $}}
+  ; GFX90A_GFX942-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec_xnull = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1
+  ; GFX90A_GFX942-NEXT:   [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+  ; GFX90A_GFX942-NEXT:   [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN:%[0-9]+]]:vreg_64_align2 = GLOBAL_ATOMIC_ADD_F64_SADDR_RTN [[V_MOV_B32_e32_]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr, addrspace 1)
+  ; GFX90A_GFX942-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN]].sub0
+  ; GFX90A_GFX942-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN]].sub1
+  ; GFX90A_GFX942-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
+  ; GFX90A_GFX942-NEXT:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+  ; GFX90A_GFX942-NEXT:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec
+  ; GFX90A_GFX942-NEXT:   $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
+  ; GFX90A_GFX942-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1
   %ret = atomicrmw fadd ptr addrspace(1) %ptr, double %data syncscope("wavefront") monotonic, !amdgpu.no.fine.grained.memory !0
   ret double %ret
 }
diff --git llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.v2f16-no-rtn.ll llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.v2f16-no-rtn.ll
index 1317770ad834..9c0db4cd162f 100644
--- llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.v2f16-no-rtn.ll
+++ llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.v2f16-no-rtn.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
 ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx908 -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX908 %s
-; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx90a -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX940 %s
-; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx940 -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX940 %s
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx90a -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX942 %s
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx942 -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX942 %s
 
 define amdgpu_ps void @global_atomic_fadd_v2f16_no_rtn(ptr addrspace(1) %ptr, <2 x half> %data) {
   ; GFX908-LABEL: name: global_atomic_fadd_v2f16_no_rtn
@@ -15,16 +15,16 @@ define amdgpu_ps void @global_atomic_fadd_v2f16_no_rtn(ptr addrspace(1) %ptr, <2
   ; GFX908-NEXT:   GLOBAL_ATOMIC_PK_ADD_F16 [[REG_SEQUENCE]], [[COPY2]], 0, 0, implicit $exec :: (load store syncscope("agent") seq_cst (<2 x s16>) on %ir.ptr, addrspace 1)
   ; GFX908-NEXT:   S_ENDPGM 0
   ;
-  ; GFX90A_GFX940-LABEL: name: global_atomic_fadd_v2f16_no_rtn
-  ; GFX90A_GFX940: bb.1 (%ir-block.0):
-  ; GFX90A_GFX940-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2
-  ; GFX90A_GFX940-NEXT: {{  $}}
-  ; GFX90A_GFX940-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
-  ; GFX90A_GFX940-NEXT:   GLOBAL_ATOMIC_PK_ADD_F16 [[REG_SEQUENCE]], [[COPY2]], 0, 0, implicit $exec :: (load store syncscope("agent") seq_cst (<2 x s16>) on %ir.ptr, addrspace 1)
-  ; GFX90A_GFX940-NEXT:   S_ENDPGM 0
+  ; GFX90A_GFX942-LABEL: name: global_atomic_fadd_v2f16_no_rtn
+  ; GFX90A_GFX942: bb.1 (%ir-block.0):
+  ; GFX90A_GFX942-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2
+  ; GFX90A_GFX942-NEXT: {{  $}}
+  ; GFX90A_GFX942-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+  ; GFX90A_GFX942-NEXT:   GLOBAL_ATOMIC_PK_ADD_F16 [[REG_SEQUENCE]], [[COPY2]], 0, 0, implicit $exec :: (load store syncscope("agent") seq_cst (<2 x s16>) on %ir.ptr, addrspace 1)
+  ; GFX90A_GFX942-NEXT:   S_ENDPGM 0
   %ret = atomicrmw fadd ptr addrspace(1) %ptr, <2 x half> %data syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0
   ret void
 }
@@ -42,17 +42,17 @@ define amdgpu_ps void @global_atomic_fadd_v2f16_saddr_no_rtn(ptr addrspace(1) in
   ; GFX908-NEXT:   GLOBAL_ATOMIC_PK_ADD_F16_SADDR [[V_MOV_B32_e32_]], [[COPY2]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load store syncscope("agent") seq_cst (<2 x s16>) on %ir.ptr, addrspace 1)
   ; GFX908-NEXT:   S_ENDPGM 0
   ;
-  ; GFX90A_GFX940-LABEL: name: global_atomic_fadd_v2f16_saddr_no_rtn
-  ; GFX90A_GFX940: bb.1 (%ir-block.0):
-  ; GFX90A_GFX940-NEXT:   liveins: $sgpr0, $sgpr1, $vgpr0
-  ; GFX90A_GFX940-NEXT: {{  $}}
-  ; GFX90A_GFX940-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec_xnull = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GFX90A_GFX940-NEXT:   [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
-  ; GFX90A_GFX940-NEXT:   GLOBAL_ATOMIC_PK_ADD_F16_SADDR [[V_MOV_B32_e32_]], [[COPY2]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load store syncscope("agent") seq_cst (<2 x s16>) on %ir.ptr, addrspace 1)
-  ; GFX90A_GFX940-NEXT:   S_ENDPGM 0
+  ; GFX90A_GFX942-LABEL: name: global_atomic_fadd_v2f16_saddr_no_rtn
+  ; GFX90A_GFX942: bb.1 (%ir-block.0):
+  ; GFX90A_GFX942-NEXT:   liveins: $sgpr0, $sgpr1, $vgpr0
+  ; GFX90A_GFX942-NEXT: {{  $}}
+  ; GFX90A_GFX942-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec_xnull = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GFX90A_GFX942-NEXT:   [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+  ; GFX90A_GFX942-NEXT:   GLOBAL_ATOMIC_PK_ADD_F16_SADDR [[V_MOV_B32_e32_]], [[COPY2]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load store syncscope("agent") seq_cst (<2 x s16>) on %ir.ptr, addrspace 1)
+  ; GFX90A_GFX942-NEXT:   S_ENDPGM 0
   %ret = atomicrmw fadd ptr addrspace(1) %ptr, <2 x half> %data syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0
   ret void
 }
diff --git llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.v2f16-rtn.ll llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.v2f16-rtn.ll
index a65fc6c0c4cf..62620a8875a3 100644
--- llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.v2f16-rtn.ll
+++ llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.v2f16-rtn.ll
@@ -1,36 +1,36 @@
 ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx90a -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX940 %s
-; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx940 -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX940 %s
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx90a -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX942 %s
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx942 -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX942 %s
 
 define amdgpu_ps <2 x half> @global_atomic_fadd_v2f16_rtn(ptr addrspace(1) %ptr, <2 x half> %data) {
-  ; GFX90A_GFX940-LABEL: name: global_atomic_fadd_v2f16_rtn
-  ; GFX90A_GFX940: bb.1 (%ir-block.0):
-  ; GFX90A_GFX940-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2
-  ; GFX90A_GFX940-NEXT: {{  $}}
-  ; GFX90A_GFX940-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
-  ; GFX90A_GFX940-NEXT:   [[GLOBAL_ATOMIC_PK_ADD_F16_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_PK_ADD_F16_RTN [[REG_SEQUENCE]], [[COPY2]], 0, 1, implicit $exec :: (load store syncscope("agent") seq_cst (<2 x s16>) on %ir.ptr, addrspace 1)
-  ; GFX90A_GFX940-NEXT:   $vgpr0 = COPY [[GLOBAL_ATOMIC_PK_ADD_F16_RTN]]
-  ; GFX90A_GFX940-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
+  ; GFX90A_GFX942-LABEL: name: global_atomic_fadd_v2f16_rtn
+  ; GFX90A_GFX942: bb.1 (%ir-block.0):
+  ; GFX90A_GFX942-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2
+  ; GFX90A_GFX942-NEXT: {{  $}}
+  ; GFX90A_GFX942-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+  ; GFX90A_GFX942-NEXT:   [[GLOBAL_ATOMIC_PK_ADD_F16_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_PK_ADD_F16_RTN [[REG_SEQUENCE]], [[COPY2]], 0, 1, implicit $exec :: (load store syncscope("agent") seq_cst (<2 x s16>) on %ir.ptr, addrspace 1)
+  ; GFX90A_GFX942-NEXT:   $vgpr0 = COPY [[GLOBAL_ATOMIC_PK_ADD_F16_RTN]]
+  ; GFX90A_GFX942-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
   %ret = atomicrmw fadd ptr addrspace(1) %ptr, <2 x half> %data syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0
   ret <2 x half> %ret
 }
 
 define amdgpu_ps <2 x half> @global_atomic_fadd_v2f16_saddr_rtn(ptr addrspace(1) inreg %ptr, <2 x half> %data) {
-  ; GFX90A_GFX940-LABEL: name: global_atomic_fadd_v2f16_saddr_rtn
-  ; GFX90A_GFX940: bb.1 (%ir-block.0):
-  ; GFX90A_GFX940-NEXT:   liveins: $sgpr0, $sgpr1, $vgpr0
-  ; GFX90A_GFX940-NEXT: {{  $}}
-  ; GFX90A_GFX940-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec_xnull = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GFX90A_GFX940-NEXT:   [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
-  ; GFX90A_GFX940-NEXT:   [[GLOBAL_ATOMIC_PK_ADD_F16_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_PK_ADD_F16_SADDR_RTN [[V_MOV_B32_e32_]], [[COPY2]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (load store syncscope("agent") seq_cst (<2 x s16>) on %ir.ptr, addrspace 1)
-  ; GFX90A_GFX940-NEXT:   $vgpr0 = COPY [[GLOBAL_ATOMIC_PK_ADD_F16_SADDR_RTN]]
-  ; GFX90A_GFX940-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
+  ; GFX90A_GFX942-LABEL: name: global_atomic_fadd_v2f16_saddr_rtn
+  ; GFX90A_GFX942: bb.1 (%ir-block.0):
+  ; GFX90A_GFX942-NEXT:   liveins: $sgpr0, $sgpr1, $vgpr0
+  ; GFX90A_GFX942-NEXT: {{  $}}
+  ; GFX90A_GFX942-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec_xnull = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GFX90A_GFX942-NEXT:   [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+  ; GFX90A_GFX942-NEXT:   [[GLOBAL_ATOMIC_PK_ADD_F16_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_PK_ADD_F16_SADDR_RTN [[V_MOV_B32_e32_]], [[COPY2]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (load store syncscope("agent") seq_cst (<2 x s16>) on %ir.ptr, addrspace 1)
+  ; GFX90A_GFX942-NEXT:   $vgpr0 = COPY [[GLOBAL_ATOMIC_PK_ADD_F16_SADDR_RTN]]
+  ; GFX90A_GFX942-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
   %ret = atomicrmw fadd ptr addrspace(1) %ptr, <2 x half> %data syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0
   ret <2 x half> %ret
 }
diff --git llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-atomicrmw.ll llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-atomicrmw.ll
index be0c9e2a602f..4ee658666a1b 100644
--- llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-atomicrmw.ll
+++ llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-atomicrmw.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx940 -O0 -stop-after=irtranslator -o - %s | FileCheck %s
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx942 -O0 -stop-after=irtranslator -o - %s | FileCheck %s
 
 define float @test_atomicrmw_fadd(ptr addrspace(3) %addr) {
   ; CHECK-LABEL: name: test_atomicrmw_fadd
diff --git llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.mfma.gfx940.mir llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.mfma.gfx942.mir
similarity index 99%
rename from llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.mfma.gfx940.mir
rename to llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.mfma.gfx942.mir
index 2944bb4ebbd1..e11586e464fb 100644
--- llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.mfma.gfx940.mir
+++ llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.mfma.gfx942.mir
@@ -1,6 +1,6 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=amdgcn -mcpu=gfx940 -run-pass=regbankselect -regbankselect-fast -verify-machineinstrs %s -o - | FileCheck %s -check-prefix=FAST
-# RUN: llc -mtriple=amdgcn -mcpu=gfx940 -run-pass=regbankselect -regbankselect-greedy -verify-machineinstrs %s -o - | FileCheck %s -check-prefix=GREEDY
+# RUN: llc -mtriple=amdgcn -mcpu=gfx942 -run-pass=regbankselect -regbankselect-fast -verify-machineinstrs %s -o - | FileCheck %s -check-prefix=FAST
+# RUN: llc -mtriple=amdgcn -mcpu=gfx942 -run-pass=regbankselect -regbankselect-greedy -verify-machineinstrs %s -o - | FileCheck %s -check-prefix=GREEDY
 
 ---
 name: mfma_i32_16x16x32_i8_vva
diff --git llvm/test/CodeGen/AMDGPU/accvgpr-copy.mir llvm/test/CodeGen/AMDGPU/accvgpr-copy.mir
index e9a8248ef4e9..94fde7c4733a 100644
--- llvm/test/CodeGen/AMDGPU/accvgpr-copy.mir
+++ llvm/test/CodeGen/AMDGPU/accvgpr-copy.mir
@@ -1,7 +1,7 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
 # RUN: llc -mtriple=amdgcn -mcpu=gfx908 -run-pass postrapseudos -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX908 %s
 # RUN: llc -mtriple=amdgcn -mcpu=gfx90a -run-pass postrapseudos -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX90A %s
-# RUN: llc -mtriple=amdgcn -mcpu=gfx940 -run-pass postrapseudos -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX940 %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx942 -run-pass postrapseudos -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX942 %s
 
 --- |
     define amdgpu_kernel void @a_to_v() #0 { ret void }
@@ -64,11 +64,11 @@ body:             |
     ; GFX90A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit $exec
     ; GFX90A-NEXT: S_ENDPGM 0, implicit $vgpr0
     ;
-    ; GFX940-LABEL: name: a_to_v
-    ; GFX940: liveins: $agpr0
-    ; GFX940-NEXT: {{  $}}
-    ; GFX940-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit $exec
-    ; GFX940-NEXT: S_ENDPGM 0, implicit $vgpr0
+    ; GFX942-LABEL: name: a_to_v
+    ; GFX942: liveins: $agpr0
+    ; GFX942-NEXT: {{  $}}
+    ; GFX942-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit $exec
+    ; GFX942-NEXT: S_ENDPGM 0, implicit $vgpr0
     $vgpr0 = COPY killed $agpr0, implicit $exec
     S_ENDPGM 0, implicit $vgpr0
 ...
@@ -94,12 +94,12 @@ body:             |
     ; GFX90A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit killed $agpr0_agpr1, implicit $exec
     ; GFX90A-NEXT: S_ENDPGM 0, implicit $vgpr0_vgpr1
     ;
-    ; GFX940-LABEL: name: a2_to_v2
-    ; GFX940: liveins: $agpr0_agpr1
-    ; GFX940-NEXT: {{  $}}
-    ; GFX940-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $agpr0_agpr1
-    ; GFX940-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit killed $agpr0_agpr1, implicit $exec
-    ; GFX940-NEXT: S_ENDPGM 0, implicit $vgpr0_vgpr1
+    ; GFX942-LABEL: name: a2_to_v2
+    ; GFX942: liveins: $agpr0_agpr1
+    ; GFX942-NEXT: {{  $}}
+    ; GFX942-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $agpr0_agpr1
+    ; GFX942-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit killed $agpr0_agpr1, implicit $exec
+    ; GFX942-NEXT: S_ENDPGM 0, implicit $vgpr0_vgpr1
     $vgpr0_vgpr1 = COPY killed $agpr0_agpr1, implicit $exec
     S_ENDPGM 0, implicit $vgpr0_vgpr1
 ...
@@ -127,13 +127,13 @@ body:             |
     ; GFX90A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit killed $agpr0_agpr1_agpr2, implicit $exec
     ; GFX90A-NEXT: S_ENDPGM 0, implicit $vgpr0_vgpr1_vgpr2
     ;
-    ; GFX940-LABEL: name: a3_to_v3
-    ; GFX940: liveins: $agpr0_agpr1_agpr2
-    ; GFX940-NEXT: {{  $}}
-    ; GFX940-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2, implicit $agpr0_agpr1_agpr2
-    ; GFX940-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2
-    ; GFX940-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit killed $agpr0_agpr1_agpr2, implicit $exec
-    ; GFX940-NEXT: S_ENDPGM 0, implicit $vgpr0_vgpr1_vgpr2
+    ; GFX942-LABEL: name: a3_to_v3
+    ; GFX942: liveins: $agpr0_agpr1_agpr2
+    ; GFX942-NEXT: {{  $}}
+    ; GFX942-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2, implicit $agpr0_agpr1_agpr2
+    ; GFX942-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2
+    ; GFX942-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit killed $agpr0_agpr1_agpr2, implicit $exec
+    ; GFX942-NEXT: S_ENDPGM 0, implicit $vgpr0_vgpr1_vgpr2
     $vgpr0_vgpr1_vgpr2 = COPY killed $agpr0_agpr1_agpr2, implicit $exec
     S_ENDPGM 0, implicit $vgpr0_vgpr1_vgpr2
 ...
@@ -162,14 +162,14 @@ body:             |
     ; GFX90A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3, implicit $exec
     ; GFX90A-NEXT: S_ENDPGM 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3
     ;
-    ; GFX940-LABEL: name: a4_to_v4
-    ; GFX940: liveins: $agpr0_agpr1_agpr2_agpr3
-    ; GFX940-NEXT: {{  $}}
-    ; GFX940-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $agpr0_agpr1_agpr2_agpr3
-    ; GFX940-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3
-    ; GFX940-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3
-    ; GFX940-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3, implicit $exec
-    ; GFX940-NEXT: S_ENDPGM 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3
+    ; GFX942-LABEL: name: a4_to_v4
+    ; GFX942: liveins: $agpr0_agpr1_agpr2_agpr3
+    ; GFX942-NEXT: {{  $}}
+    ; GFX942-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $agpr0_agpr1_agpr2_agpr3
+    ; GFX942-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3
+    ; GFX942-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3
+    ; GFX942-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3, implicit $exec
+    ; GFX942-NEXT: S_ENDPGM 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY killed $agpr0_agpr1_agpr2_agpr3, implicit $exec
     S_ENDPGM 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3
 ...
@@ -207,18 +207,18 @@ body:             |
     ; GFX90A-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 $agpr7, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, implicit $exec
     ; GFX90A-NEXT: S_ENDPGM 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
     ;
-    ; GFX940-LABEL: name: a8_to_v8
-    ; GFX940: liveins: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
-    ; GFX940-NEXT: {{  $}}
-    ; GFX940-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
-    ; GFX940-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
-    ; GFX940-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
-    ; GFX940-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
-    ; GFX940-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
-    ; GFX940-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 $agpr5, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
-    ; GFX940-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 $agpr6, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
-    ; GFX940-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 $agpr7, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, implicit $exec
-    ; GFX940-NEXT: S_ENDPGM 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+    ; GFX942-LABEL: name: a8_to_v8
+    ; GFX942: liveins: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
+    ; GFX942-NEXT: {{  $}}
+    ; GFX942-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
+    ; GFX942-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
+    ; GFX942-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
+    ; GFX942-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
+    ; GFX942-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
+    ; GFX942-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 $agpr5, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
+    ; GFX942-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 $agpr6, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
+    ; GFX942-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 $agpr7, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, implicit $exec
+    ; GFX942-NEXT: S_ENDPGM 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
     $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, implicit $exec
     S_ENDPGM 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
 ...
@@ -271,26 +271,26 @@ body:             |
     ; GFX90A-NEXT: $vgpr15 = V_ACCVGPR_READ_B32_e64 $agpr15, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $exec
     ; GFX90A-NEXT: S_ENDPGM 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
     ;
-    ; GFX940-LABEL: name: a16_to_v16
-    ; GFX940: liveins: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
-    ; GFX940-NEXT: {{  $}}
-    ; GFX940-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
-    ; GFX940-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
-    ; GFX940-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
-    ; GFX940-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
-    ; GFX940-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
-    ; GFX940-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 $agpr5, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
-    ; GFX940-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 $agpr6, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
-    ; GFX940-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 $agpr7, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
-    ; GFX940-NEXT: $vgpr8 = V_ACCVGPR_READ_B32_e64 $agpr8, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
-    ; GFX940-NEXT: $vgpr9 = V_ACCVGPR_READ_B32_e64 $agpr9, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
-    ; GFX940-NEXT: $vgpr10 = V_ACCVGPR_READ_B32_e64 $agpr10, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
-    ; GFX940-NEXT: $vgpr11 = V_ACCVGPR_READ_B32_e64 $agpr11, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
-    ; GFX940-NEXT: $vgpr12 = V_ACCVGPR_READ_B32_e64 $agpr12, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
-    ; GFX940-NEXT: $vgpr13 = V_ACCVGPR_READ_B32_e64 $agpr13, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
-    ; GFX940-NEXT: $vgpr14 = V_ACCVGPR_READ_B32_e64 $agpr14, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
-    ; GFX940-NEXT: $vgpr15 = V_ACCVGPR_READ_B32_e64 $agpr15, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $exec
-    ; GFX940-NEXT: S_ENDPGM 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+    ; GFX942-LABEL: name: a16_to_v16
+    ; GFX942: liveins: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
+    ; GFX942-NEXT: {{  $}}
+    ; GFX942-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
+    ; GFX942-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
+    ; GFX942-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
+    ; GFX942-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
+    ; GFX942-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
+    ; GFX942-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 $agpr5, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
+    ; GFX942-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 $agpr6, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
+    ; GFX942-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 $agpr7, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
+    ; GFX942-NEXT: $vgpr8 = V_ACCVGPR_READ_B32_e64 $agpr8, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
+    ; GFX942-NEXT: $vgpr9 = V_ACCVGPR_READ_B32_e64 $agpr9, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
+    ; GFX942-NEXT: $vgpr10 = V_ACCVGPR_READ_B32_e64 $agpr10, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
+    ; GFX942-NEXT: $vgpr11 = V_ACCVGPR_READ_B32_e64 $agpr11, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
+    ; GFX942-NEXT: $vgpr12 = V_ACCVGPR_READ_B32_e64 $agpr12, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
+    ; GFX942-NEXT: $vgpr13 = V_ACCVGPR_READ_B32_e64 $agpr13, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
+    ; GFX942-NEXT: $vgpr14 = V_ACCVGPR_READ_B32_e64 $agpr14, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
+    ; GFX942-NEXT: $vgpr15 = V_ACCVGPR_READ_B32_e64 $agpr15, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $exec
+    ; GFX942-NEXT: S_ENDPGM 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
     $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $exec
     S_ENDPGM 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
 ...
@@ -313,11 +313,11 @@ body:             |
     ; GFX90A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit $exec
     ; GFX90A-NEXT: S_ENDPGM 0, implicit $agpr0
     ;
-    ; GFX940-LABEL: name: v_to_a
-    ; GFX940: liveins: $vgpr0
-    ; GFX940-NEXT: {{  $}}
-    ; GFX940-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit $exec
-    ; GFX940-NEXT: S_ENDPGM 0, implicit $agpr0
+    ; GFX942-LABEL: name: v_to_a
+    ; GFX942: liveins: $vgpr0
+    ; GFX942-NEXT: {{  $}}
+    ; GFX942-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit $exec
+    ; GFX942-NEXT: S_ENDPGM 0, implicit $agpr0
     $agpr0 = COPY killed $vgpr0, implicit $exec
     S_ENDPGM 0, implicit $agpr0
 ...
@@ -342,12 +342,12 @@ body:             |
     ; GFX90A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit killed $vgpr0_vgpr1, implicit $exec
     ; GFX90A-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1
     ;
-    ; GFX940-LABEL: name: v2_to_a2
-    ; GFX940: liveins: $vgpr0_vgpr1
-    ; GFX940-NEXT: {{  $}}
-    ; GFX940-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit-def $agpr0_agpr1, implicit $vgpr0_vgpr1
-    ; GFX940-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit killed $vgpr0_vgpr1, implicit $exec
-    ; GFX940-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1
+    ; GFX942-LABEL: name: v2_to_a2
+    ; GFX942: liveins: $vgpr0_vgpr1
+    ; GFX942-NEXT: {{  $}}
+    ; GFX942-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit-def $agpr0_agpr1, implicit $vgpr0_vgpr1
+    ; GFX942-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit killed $vgpr0_vgpr1, implicit $exec
+    ; GFX942-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1
     $agpr0_agpr1 = COPY killed $vgpr0_vgpr1, implicit $exec
     S_ENDPGM 0, implicit $agpr0_agpr1
 ...
@@ -374,13 +374,13 @@ body:             |
     ; GFX90A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2, implicit $exec
     ; GFX90A-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2
     ;
-    ; GFX940-LABEL: name: v3_to_a3
-    ; GFX940: liveins: $vgpr0_vgpr1_vgpr2
-    ; GFX940-NEXT: {{  $}}
-    ; GFX940-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2, implicit $vgpr0_vgpr1_vgpr2
-    ; GFX940-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2
-    ; GFX940-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2, implicit $exec
-    ; GFX940-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2
+    ; GFX942-LABEL: name: v3_to_a3
+    ; GFX942: liveins: $vgpr0_vgpr1_vgpr2
+    ; GFX942-NEXT: {{  $}}
+    ; GFX942-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2, implicit $vgpr0_vgpr1_vgpr2
+    ; GFX942-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2
+    ; GFX942-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2, implicit $exec
+    ; GFX942-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2
     $agpr0_agpr1_agpr2 = COPY killed $vgpr0_vgpr1_vgpr2, implicit $exec
     S_ENDPGM 0, implicit $agpr0_agpr1_agpr2
 ...
@@ -409,14 +409,14 @@ body:             |
     ; GFX90A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr3, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3, implicit $exec
     ; GFX90A-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3
     ;
-    ; GFX940-LABEL: name: v4_to_a4
-    ; GFX940: liveins: $vgpr0_vgpr1_vgpr2_vgpr3
-    ; GFX940-NEXT: {{  $}}
-    ; GFX940-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3
-    ; GFX940-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3
-    ; GFX940-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3
-    ; GFX940-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr3, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3, implicit $exec
-    ; GFX940-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3
+    ; GFX942-LABEL: name: v4_to_a4
+    ; GFX942: liveins: $vgpr0_vgpr1_vgpr2_vgpr3
+    ; GFX942-NEXT: {{  $}}
+    ; GFX942-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3
+    ; GFX942-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3
+    ; GFX942-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3
+    ; GFX942-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr3, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3, implicit $exec
+    ; GFX942-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3
     $agpr0_agpr1_agpr2_agpr3 = COPY killed $vgpr0_vgpr1_vgpr2_vgpr3, implicit $exec
     S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3
 ...
@@ -453,18 +453,18 @@ body:             |
     ; GFX90A-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 $vgpr7, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
     ; GFX90A-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
     ;
-    ; GFX940-LABEL: name: v8_to_a8
-    ; GFX940: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
-    ; GFX940-NEXT: {{  $}}
-    ; GFX940-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
-    ; GFX940-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
-    ; GFX940-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
-    ; GFX940-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr3, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
-    ; GFX940-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $vgpr4, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
-    ; GFX940-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 $vgpr5, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
-    ; GFX940-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 $vgpr6, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
-    ; GFX940-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 $vgpr7, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
-    ; GFX940-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
+    ; GFX942-LABEL: name: v8_to_a8
+    ; GFX942: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+    ; GFX942-NEXT: {{  $}}
+    ; GFX942-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+    ; GFX942-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+    ; GFX942-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+    ; GFX942-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr3, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+    ; GFX942-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $vgpr4, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+    ; GFX942-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 $vgpr5, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+    ; GFX942-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 $vgpr6, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+    ; GFX942-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 $vgpr7, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+    ; GFX942-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
     $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = COPY killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
     S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
 ...
@@ -517,26 +517,26 @@ body:             |
     ; GFX90A-NEXT: $agpr15 = V_ACCVGPR_WRITE_B32_e64 $vgpr15, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $exec
     ; GFX90A-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
     ;
-    ; GFX940-LABEL: name: v16_to_a16
-    ; GFX940: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
-    ; GFX940-NEXT: {{  $}}
-    ; GFX940-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
-    ; GFX940-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
-    ; GFX940-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
-    ; GFX940-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr3, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
-    ; GFX940-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $vgpr4, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
-    ; GFX940-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 $vgpr5, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
-    ; GFX940-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 $vgpr6, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
-    ; GFX940-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 $vgpr7, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
-    ; GFX940-NEXT: $agpr8 = V_ACCVGPR_WRITE_B32_e64 $vgpr8, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
-    ; GFX940-NEXT: $agpr9 = V_ACCVGPR_WRITE_B32_e64 $vgpr9, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
-    ; GFX940-NEXT: $agpr10 = V_ACCVGPR_WRITE_B32_e64 $vgpr10, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
-    ; GFX940-NEXT: $agpr11 = V_ACCVGPR_WRITE_B32_e64 $vgpr11, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
-    ; GFX940-NEXT: $agpr12 = V_ACCVGPR_WRITE_B32_e64 $vgpr12, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
-    ; GFX940-NEXT: $agpr13 = V_ACCVGPR_WRITE_B32_e64 $vgpr13, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
-    ; GFX940-NEXT: $agpr14 = V_ACCVGPR_WRITE_B32_e64 $vgpr14, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
-    ; GFX940-NEXT: $agpr15 = V_ACCVGPR_WRITE_B32_e64 $vgpr15, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $exec
-    ; GFX940-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
+    ; GFX942-LABEL: name: v16_to_a16
+    ; GFX942: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+    ; GFX942-NEXT: {{  $}}
+    ; GFX942-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+    ; GFX942-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+    ; GFX942-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+    ; GFX942-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr3, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+    ; GFX942-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $vgpr4, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+    ; GFX942-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 $vgpr5, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+    ; GFX942-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 $vgpr6, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+    ; GFX942-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 $vgpr7, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+    ; GFX942-NEXT: $agpr8 = V_ACCVGPR_WRITE_B32_e64 $vgpr8, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+    ; GFX942-NEXT: $agpr9 = V_ACCVGPR_WRITE_B32_e64 $vgpr9, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+    ; GFX942-NEXT: $agpr10 = V_ACCVGPR_WRITE_B32_e64 $vgpr10, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+    ; GFX942-NEXT: $agpr11 = V_ACCVGPR_WRITE_B32_e64 $vgpr11, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+    ; GFX942-NEXT: $agpr12 = V_ACCVGPR_WRITE_B32_e64 $vgpr12, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+    ; GFX942-NEXT: $agpr13 = V_ACCVGPR_WRITE_B32_e64 $vgpr13, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+    ; GFX942-NEXT: $agpr14 = V_ACCVGPR_WRITE_B32_e64 $vgpr14, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+    ; GFX942-NEXT: $agpr15 = V_ACCVGPR_WRITE_B32_e64 $vgpr15, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $exec
+    ; GFX942-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
     $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = COPY killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $exec
     S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
 ...
@@ -560,11 +560,11 @@ body:             |
     ; GFX90A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $sgpr0, implicit $exec, implicit $exec
     ; GFX90A-NEXT: S_ENDPGM 0, implicit $agpr0
     ;
-    ; GFX940-LABEL: name: s_to_a
-    ; GFX940: liveins: $sgpr0
-    ; GFX940-NEXT: {{  $}}
-    ; GFX940-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $sgpr0, implicit $exec, implicit $exec
-    ; GFX940-NEXT: S_ENDPGM 0, implicit $agpr0
+    ; GFX942-LABEL: name: s_to_a
+    ; GFX942: liveins: $sgpr0
+    ; GFX942-NEXT: {{  $}}
+    ; GFX942-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $sgpr0, implicit $exec, implicit $exec
+    ; GFX942-NEXT: S_ENDPGM 0, implicit $agpr0
     $agpr0 = COPY killed $sgpr0, implicit $exec
     S_ENDPGM 0, implicit $agpr0
 ...
@@ -591,12 +591,12 @@ body:             |
     ; GFX90A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $sgpr1, implicit $exec, implicit killed $sgpr0_sgpr1, implicit $exec
     ; GFX90A-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1
     ;
-    ; GFX940-LABEL: name: s2_to_a2
-    ; GFX940: liveins: $sgpr0_sgpr1
-    ; GFX940-NEXT: {{  $}}
-    ; GFX940-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $sgpr0, implicit $exec, implicit-def $agpr0_agpr1, implicit $sgpr0_sgpr1
-    ; GFX940-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $sgpr1, implicit $exec, implicit killed $sgpr0_sgpr1, implicit $exec
-    ; GFX940-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1
+    ; GFX942-LABEL: name: s2_to_a2
+    ; GFX942: liveins: $sgpr0_sgpr1
+    ; GFX942-NEXT: {{  $}}
+    ; GFX942-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $sgpr0, implicit $exec, implicit-def $agpr0_agpr1, implicit $sgpr0_sgpr1
+    ; GFX942-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $sgpr1, implicit $exec, implicit killed $sgpr0_sgpr1, implicit $exec
+    ; GFX942-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1
     $agpr0_agpr1 = COPY killed $sgpr0_sgpr1, implicit $exec
     S_ENDPGM 0, implicit $agpr0_agpr1
 ...
@@ -626,13 +626,13 @@ body:             |
     ; GFX90A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $sgpr2, implicit $exec, implicit killed $sgpr0_sgpr1_sgpr2, implicit $exec
     ; GFX90A-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2
     ;
-    ; GFX940-LABEL: name: s3_to_a3
-    ; GFX940: liveins: $sgpr0_sgpr1_sgpr2
-    ; GFX940-NEXT: {{  $}}
-    ; GFX940-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $sgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2, implicit $sgpr0_sgpr1_sgpr2
-    ; GFX940-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $sgpr1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2
-    ; GFX940-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $sgpr2, implicit $exec, implicit killed $sgpr0_sgpr1_sgpr2, implicit $exec
-    ; GFX940-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2
+    ; GFX942-LABEL: name: s3_to_a3
+    ; GFX942: liveins: $sgpr0_sgpr1_sgpr2
+    ; GFX942-NEXT: {{  $}}
+    ; GFX942-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $sgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2, implicit $sgpr0_sgpr1_sgpr2
+    ; GFX942-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $sgpr1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2
+    ; GFX942-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $sgpr2, implicit $exec, implicit killed $sgpr0_sgpr1_sgpr2, implicit $exec
+    ; GFX942-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2
     $agpr0_agpr1_agpr2 = COPY killed $sgpr0_sgpr1_sgpr2, implicit $exec
     S_ENDPGM 0, implicit $agpr0_agpr1_agpr2
 ...
@@ -665,14 +665,14 @@ body:             |
     ; GFX90A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $sgpr3, implicit $exec, implicit killed $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec
     ; GFX90A-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3
     ;
-    ; GFX940-LABEL: name: s4_to_a4
-    ; GFX940: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
-    ; GFX940-NEXT: {{  $}}
-    ; GFX940-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $sgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3
-    ; GFX940-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $sgpr1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3
-    ; GFX940-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $sgpr2, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3
-    ; GFX940-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $sgpr3, implicit $exec, implicit killed $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec
-    ; GFX940-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3
+    ; GFX942-LABEL: name: s4_to_a4
+    ; GFX942: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
+    ; GFX942-NEXT: {{  $}}
+    ; GFX942-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $sgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3
+    ; GFX942-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $sgpr1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3
+    ; GFX942-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $sgpr2, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3
+    ; GFX942-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $sgpr3, implicit $exec, implicit killed $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec
+    ; GFX942-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3
     $agpr0_agpr1_agpr2_agpr3 = COPY killed $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec
     S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3
 ...
@@ -711,16 +711,16 @@ body:             |
     ; GFX90A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 $sgpr5, implicit $exec, implicit killed $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5, implicit $exec
     ; GFX90A-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5
     ;
-    ; GFX940-LABEL: name: s6_to_a6
-    ; GFX940: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5
-    ; GFX940-NEXT: {{  $}}
-    ; GFX940-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $sgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5
-    ; GFX940-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $sgpr1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5
-    ; GFX940-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $sgpr2, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5
-    ; GFX940-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $sgpr3, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5
-    ; GFX940-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $sgpr4, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5
-    ; GFX940-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 $sgpr5, implicit $exec, implicit killed $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5, implicit $exec
-    ; GFX940-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5
+    ; GFX942-LABEL: name: s6_to_a6
+    ; GFX942: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5
+    ; GFX942-NEXT: {{  $}}
+    ; GFX942-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $sgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5
+    ; GFX942-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $sgpr1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5
+    ; GFX942-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $sgpr2, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5
+    ; GFX942-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $sgpr3, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5
+    ; GFX942-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $sgpr4, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5
+    ; GFX942-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 $sgpr5, implicit $exec, implicit killed $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5, implicit $exec
+    ; GFX942-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5
     $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 = COPY killed $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5, implicit $exec
     S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5
 ...
@@ -765,18 +765,18 @@ body:             |
     ; GFX90A-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 $sgpr7, implicit $exec, implicit killed $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, implicit $exec
     ; GFX90A-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
     ;
-    ; GFX940-LABEL: name: s8_to_a8
-    ; GFX940: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
-    ; GFX940-NEXT: {{  $}}
-    ; GFX940-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $sgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
-    ; GFX940-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $sgpr1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
-    ; GFX940-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $sgpr2, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
-    ; GFX940-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $sgpr3, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
-    ; GFX940-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $sgpr4, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
-    ; GFX940-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 $sgpr5, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
-    ; GFX940-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 $sgpr6, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
-    ; GFX940-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 $sgpr7, implicit $exec, implicit killed $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, implicit $exec
-    ; GFX940-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
+    ; GFX942-LABEL: name: s8_to_a8
+    ; GFX942: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
+    ; GFX942-NEXT: {{  $}}
+    ; GFX942-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $sgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
+    ; GFX942-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $sgpr1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
+    ; GFX942-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $sgpr2, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
+    ; GFX942-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $sgpr3, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
+    ; GFX942-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $sgpr4, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
+    ; GFX942-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 $sgpr5, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
+    ; GFX942-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 $sgpr6, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
+    ; GFX942-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 $sgpr7, implicit $exec, implicit killed $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, implicit $exec
+    ; GFX942-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
     $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = COPY killed $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, implicit $exec
     S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
 ...
@@ -845,26 +845,26 @@ body:             |
     ; GFX90A-NEXT: $agpr15 = V_ACCVGPR_WRITE_B32_e64 $sgpr15, implicit $exec, implicit killed $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, implicit $exec
     ; GFX90A-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
     ;
-    ; GFX940-LABEL: name: s16_to_a16
-    ; GFX940: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
-    ; GFX940-NEXT: {{  $}}
-    ; GFX940-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $sgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
-    ; GFX940-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $sgpr1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
-    ; GFX940-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $sgpr2, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
-    ; GFX940-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $sgpr3, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
-    ; GFX940-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $sgpr4, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
-    ; GFX940-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 $sgpr5, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
-    ; GFX940-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 $sgpr6, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
-    ; GFX940-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 $sgpr7, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
-    ; GFX940-NEXT: $agpr8 = V_ACCVGPR_WRITE_B32_e64 $sgpr8, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
-    ; GFX940-NEXT: $agpr9 = V_ACCVGPR_WRITE_B32_e64 $sgpr9, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
-    ; GFX940-NEXT: $agpr10 = V_ACCVGPR_WRITE_B32_e64 $sgpr10, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
-    ; GFX940-NEXT: $agpr11 = V_ACCVGPR_WRITE_B32_e64 $sgpr11, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
-    ; GFX940-NEXT: $agpr12 = V_ACCVGPR_WRITE_B32_e64 $sgpr12, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
-    ; GFX940-NEXT: $agpr13 = V_ACCVGPR_WRITE_B32_e64 $sgpr13, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
-    ; GFX940-NEXT: $agpr14 = V_ACCVGPR_WRITE_B32_e64 $sgpr14, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
-    ; GFX940-NEXT: $agpr15 = V_ACCVGPR_WRITE_B32_e64 $sgpr15, implicit $exec, implicit killed $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, implicit $exec
-    ; GFX940-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
+    ; GFX942-LABEL: name: s16_to_a16
+    ; GFX942: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+    ; GFX942-NEXT: {{  $}}
+    ; GFX942-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $sgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+    ; GFX942-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $sgpr1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+    ; GFX942-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $sgpr2, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+    ; GFX942-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $sgpr3, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+    ; GFX942-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $sgpr4, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+    ; GFX942-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 $sgpr5, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+    ; GFX942-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 $sgpr6, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+    ; GFX942-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 $sgpr7, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+    ; GFX942-NEXT: $agpr8 = V_ACCVGPR_WRITE_B32_e64 $sgpr8, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+    ; GFX942-NEXT: $agpr9 = V_ACCVGPR_WRITE_B32_e64 $sgpr9, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+    ; GFX942-NEXT: $agpr10 = V_ACCVGPR_WRITE_B32_e64 $sgpr10, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+    ; GFX942-NEXT: $agpr11 = V_ACCVGPR_WRITE_B32_e64 $sgpr11, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+    ; GFX942-NEXT: $agpr12 = V_ACCVGPR_WRITE_B32_e64 $sgpr12, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+    ; GFX942-NEXT: $agpr13 = V_ACCVGPR_WRITE_B32_e64 $sgpr13, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+    ; GFX942-NEXT: $agpr14 = V_ACCVGPR_WRITE_B32_e64 $sgpr14, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+    ; GFX942-NEXT: $agpr15 = V_ACCVGPR_WRITE_B32_e64 $sgpr15, implicit $exec, implicit killed $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, implicit $exec
+    ; GFX942-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
     $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = COPY killed $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, implicit $exec
     S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
 ...
@@ -885,10 +885,10 @@ body:             |
     ; GFX90A-NEXT: $agpr0 = V_ACCVGPR_MOV_B32 killed $agpr1, implicit $exec, implicit $exec
     ; GFX90A-NEXT: S_ENDPGM 0, implicit $agpr0
     ;
-    ; GFX940-LABEL: name: a_to_a
-    ; GFX940: $agpr1 = IMPLICIT_DEF
-    ; GFX940-NEXT: $agpr0 = V_ACCVGPR_MOV_B32 killed $agpr1, implicit $exec, implicit $exec
-    ; GFX940-NEXT: S_ENDPGM 0, implicit $agpr0
+    ; GFX942-LABEL: name: a_to_a
+    ; GFX942: $agpr1 = IMPLICIT_DEF
+    ; GFX942-NEXT: $agpr0 = V_ACCVGPR_MOV_B32 killed $agpr1, implicit $exec, implicit $exec
+    ; GFX942-NEXT: S_ENDPGM 0, implicit $agpr0
     $agpr1 = IMPLICIT_DEF
     $agpr0 = COPY killed $agpr1, implicit $exec
     S_ENDPGM 0, implicit $agpr0
@@ -918,13 +918,13 @@ body:             |
     ; GFX90A-NEXT: $agpr3 = V_ACCVGPR_MOV_B32 $agpr2, implicit $exec
     ; GFX90A-NEXT: S_ENDPGM 0, implicit $agpr1, implicit $agpr2, implicit $agpr3
     ;
-    ; GFX940-LABEL: name: a2_to_a2
-    ; GFX940: liveins: $agpr0_agpr1
-    ; GFX940-NEXT: {{  $}}
-    ; GFX940-NEXT: $agpr2 = V_ACCVGPR_MOV_B32 $agpr1, implicit $exec, implicit-def $agpr1_agpr2, implicit $agpr0_agpr1
-    ; GFX940-NEXT: $agpr1 = V_ACCVGPR_MOV_B32 $agpr0, implicit $exec, implicit $agpr0_agpr1, implicit $exec
-    ; GFX940-NEXT: $agpr3 = V_ACCVGPR_MOV_B32 $agpr2, implicit $exec
-    ; GFX940-NEXT: S_ENDPGM 0, implicit $agpr1, implicit $agpr2, implicit $agpr3
+    ; GFX942-LABEL: name: a2_to_a2
+    ; GFX942: liveins: $agpr0_agpr1
+    ; GFX942-NEXT: {{  $}}
+    ; GFX942-NEXT: $agpr2 = V_ACCVGPR_MOV_B32 $agpr1, implicit $exec, implicit-def $agpr1_agpr2, implicit $agpr0_agpr1
+    ; GFX942-NEXT: $agpr1 = V_ACCVGPR_MOV_B32 $agpr0, implicit $exec, implicit $agpr0_agpr1, implicit $exec
+    ; GFX942-NEXT: $agpr3 = V_ACCVGPR_MOV_B32 $agpr2, implicit $exec
+    ; GFX942-NEXT: S_ENDPGM 0, implicit $agpr1, implicit $agpr2, implicit $agpr3
     $agpr1_agpr2 = COPY $agpr0_agpr1, implicit $exec
     $agpr3 = COPY $agpr2
     S_ENDPGM 0, implicit $agpr1, implicit $agpr2, implicit $agpr3
@@ -954,13 +954,13 @@ body:             |
     ; GFX90A-NEXT: $agpr3 = V_ACCVGPR_MOV_B32 $agpr2, implicit $exec
     ; GFX90A-NEXT: S_ENDPGM 0, implicit $agpr1, implicit $agpr2, implicit $agpr3
     ;
-    ; GFX940-LABEL: name: a2_to_a2_kill
-    ; GFX940: liveins: $agpr0_agpr1
-    ; GFX940-NEXT: {{  $}}
-    ; GFX940-NEXT: $agpr2 = V_ACCVGPR_MOV_B32 $agpr1, implicit $exec, implicit-def $agpr1_agpr2, implicit $agpr0_agpr1
-    ; GFX940-NEXT: $agpr1 = V_ACCVGPR_MOV_B32 $agpr0, implicit $exec, implicit $agpr0_agpr1, implicit $exec
-    ; GFX940-NEXT: $agpr3 = V_ACCVGPR_MOV_B32 $agpr2, implicit $exec
-    ; GFX940-NEXT: S_ENDPGM 0, implicit $agpr1, implicit $agpr2, implicit $agpr3
+    ; GFX942-LABEL: name: a2_to_a2_kill
+    ; GFX942: liveins: $agpr0_agpr1
+    ; GFX942-NEXT: {{  $}}
+    ; GFX942-NEXT: $agpr2 = V_ACCVGPR_MOV_B32 $agpr1, implicit $exec, implicit-def $agpr1_agpr2, implicit $agpr0_agpr1
+    ; GFX942-NEXT: $agpr1 = V_ACCVGPR_MOV_B32 $agpr0, implicit $exec, implicit $agpr0_agpr1, implicit $exec
+    ; GFX942-NEXT: $agpr3 = V_ACCVGPR_MOV_B32 $agpr2, implicit $exec
+    ; GFX942-NEXT: S_ENDPGM 0, implicit $agpr1, implicit $agpr2, implicit $agpr3
     $agpr1_agpr2 = COPY killed $agpr0_agpr1, implicit $exec
     $agpr3 = COPY $agpr2
     S_ENDPGM 0, implicit $agpr1, implicit $agpr2, implicit $agpr3
@@ -994,15 +994,15 @@ body:             |
     ; GFX90A-NEXT: $agpr4 = V_ACCVGPR_MOV_B32 $agpr2, implicit $exec, implicit-def $agpr3_agpr4, implicit $agpr1_agpr2
     ; GFX90A-NEXT: $agpr3 = V_ACCVGPR_MOV_B32 $agpr1, implicit $exec, implicit killed $agpr1_agpr2, implicit $exec
     ;
-    ; GFX940-LABEL: name: a2_to_a2_implicit_defs
-    ; GFX940: liveins: $agpr0_agpr1
-    ; GFX940-NEXT: {{  $}}
-    ; GFX940-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit $agpr0_agpr1
-    ; GFX940-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit-def $agpr1_agpr2
-    ; GFX940-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit $agpr0_agpr1
-    ; GFX940-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit $exec, implicit-def $agpr1_agpr2
-    ; GFX940-NEXT: $agpr4 = V_ACCVGPR_MOV_B32 $agpr2, implicit $exec, implicit-def $agpr3_agpr4, implicit $agpr1_agpr2
-    ; GFX940-NEXT: $agpr3 = V_ACCVGPR_MOV_B32 $agpr1, implicit $exec, implicit killed $agpr1_agpr2, implicit $exec
+    ; GFX942-LABEL: name: a2_to_a2_implicit_defs
+    ; GFX942: liveins: $agpr0_agpr1
+    ; GFX942-NEXT: {{  $}}
+    ; GFX942-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit $agpr0_agpr1
+    ; GFX942-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit-def $agpr1_agpr2
+    ; GFX942-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit $agpr0_agpr1
+    ; GFX942-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit $exec, implicit-def $agpr1_agpr2
+    ; GFX942-NEXT: $agpr4 = V_ACCVGPR_MOV_B32 $agpr2, implicit $exec, implicit-def $agpr3_agpr4, implicit $agpr1_agpr2
+    ; GFX942-NEXT: $agpr3 = V_ACCVGPR_MOV_B32 $agpr1, implicit $exec, implicit killed $agpr1_agpr2, implicit $exec
     $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit $agpr0_agpr1
     $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit-def $agpr1_agpr2
     $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit $agpr0_agpr1
@@ -1035,13 +1035,13 @@ body:             |
     ; GFX90A-NEXT: $agpr2 = V_ACCVGPR_MOV_B32 $agpr6, implicit $exec, implicit killed $agpr4_agpr5_agpr6
     ; GFX90A-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2
     ;
-    ; GFX940-LABEL: name: a3_to_a3_nonoverlap_kill
-    ; GFX940: liveins: $agpr4_agpr5_agpr6
-    ; GFX940-NEXT: {{  $}}
-    ; GFX940-NEXT: $agpr0 = V_ACCVGPR_MOV_B32 $agpr4, implicit $exec, implicit-def $agpr0_agpr1_agpr2, implicit $agpr4_agpr5_agpr6
-    ; GFX940-NEXT: $agpr1 = V_ACCVGPR_MOV_B32 $agpr5, implicit $exec, implicit $agpr4_agpr5_agpr6
-    ; GFX940-NEXT: $agpr2 = V_ACCVGPR_MOV_B32 $agpr6, implicit $exec, implicit killed $agpr4_agpr5_agpr6
-    ; GFX940-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2
+    ; GFX942-LABEL: name: a3_to_a3_nonoverlap_kill
+    ; GFX942: liveins: $agpr4_agpr5_agpr6
+    ; GFX942-NEXT: {{  $}}
+    ; GFX942-NEXT: $agpr0 = V_ACCVGPR_MOV_B32 $agpr4, implicit $exec, implicit-def $agpr0_agpr1_agpr2, implicit $agpr4_agpr5_agpr6
+    ; GFX942-NEXT: $agpr1 = V_ACCVGPR_MOV_B32 $agpr5, implicit $exec, implicit $agpr4_agpr5_agpr6
+    ; GFX942-NEXT: $agpr2 = V_ACCVGPR_MOV_B32 $agpr6, implicit $exec, implicit killed $agpr4_agpr5_agpr6
+    ; GFX942-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2
     $agpr0_agpr1_agpr2 = COPY killed $agpr4_agpr5_agpr6
     S_ENDPGM 0, implicit $agpr0_agpr1_agpr2
 ...
@@ -1073,14 +1073,14 @@ body:             |
     ; GFX90A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec
     ; GFX90A-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2, implicit $vgpr1
     ;
-    ; GFX940-LABEL: name: a3_to_a3_overlap_kill
-    ; GFX940: liveins: $agpr1_agpr2_agpr3
-    ; GFX940-NEXT: {{  $}}
-    ; GFX940-NEXT: $agpr0 = V_ACCVGPR_MOV_B32 $agpr1, implicit $exec, implicit-def $agpr0_agpr1_agpr2, implicit $agpr1_agpr2_agpr3
-    ; GFX940-NEXT: $agpr1 = V_ACCVGPR_MOV_B32 $agpr2, implicit $exec, implicit $agpr1_agpr2_agpr3
-    ; GFX940-NEXT: $agpr2 = V_ACCVGPR_MOV_B32 $agpr3, implicit $exec, implicit $agpr1_agpr2_agpr3
-    ; GFX940-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec
-    ; GFX940-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2, implicit $vgpr1
+    ; GFX942-LABEL: name: a3_to_a3_overlap_kill
+    ; GFX942: liveins: $agpr1_agpr2_agpr3
+    ; GFX942-NEXT: {{  $}}
+    ; GFX942-NEXT: $agpr0 = V_ACCVGPR_MOV_B32 $agpr1, implicit $exec, implicit-def $agpr0_agpr1_agpr2, implicit $agpr1_agpr2_agpr3
+    ; GFX942-NEXT: $agpr1 = V_ACCVGPR_MOV_B32 $agpr2, implicit $exec, implicit $agpr1_agpr2_agpr3
+    ; GFX942-NEXT: $agpr2 = V_ACCVGPR_MOV_B32 $agpr3, implicit $exec, implicit $agpr1_agpr2_agpr3
+    ; GFX942-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec
+    ; GFX942-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2, implicit $vgpr1
     $agpr0_agpr1_agpr2 = COPY killed $agpr1_agpr2_agpr3
     $vgpr1 = COPY $agpr1
     S_ENDPGM 0, implicit $agpr0_agpr1_agpr2, implicit $vgpr1
@@ -1111,13 +1111,13 @@ body:             |
     ; GFX90A-NEXT: $agpr2 = V_ACCVGPR_MOV_B32 $agpr0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3, implicit $exec
     ; GFX90A-NEXT: S_ENDPGM 0, implicit $agpr2_agpr3_agpr4_agpr5
     ;
-    ; GFX940-LABEL: name: a4_to_a4
-    ; GFX940: $agpr0_agpr1_agpr2_agpr3 = IMPLICIT_DEF
-    ; GFX940-NEXT: $agpr5 = V_ACCVGPR_MOV_B32 $agpr3, implicit $exec, implicit-def $agpr2_agpr3_agpr4_agpr5, implicit $agpr0_agpr1_agpr2_agpr3
-    ; GFX940-NEXT: $agpr4 = V_ACCVGPR_MOV_B32 $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3
-    ; GFX940-NEXT: $agpr3 = V_ACCVGPR_MOV_B32 $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3
-    ; GFX940-NEXT: $agpr2 = V_ACCVGPR_MOV_B32 $agpr0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3, implicit $exec
-    ; GFX940-NEXT: S_ENDPGM 0, implicit $agpr2_agpr3_agpr4_agpr5
+    ; GFX942-LABEL: name: a4_to_a4
+    ; GFX942: $agpr0_agpr1_agpr2_agpr3 = IMPLICIT_DEF
+    ; GFX942-NEXT: $agpr5 = V_ACCVGPR_MOV_B32 $agpr3, implicit $exec, implicit-def $agpr2_agpr3_agpr4_agpr5, implicit $agpr0_agpr1_agpr2_agpr3
+    ; GFX942-NEXT: $agpr4 = V_ACCVGPR_MOV_B32 $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3
+    ; GFX942-NEXT: $agpr3 = V_ACCVGPR_MOV_B32 $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3
+    ; GFX942-NEXT: $agpr2 = V_ACCVGPR_MOV_B32 $agpr0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3, implicit $exec
+    ; GFX942-NEXT: S_ENDPGM 0, implicit $agpr2_agpr3_agpr4_agpr5
     $agpr0_agpr1_agpr2_agpr3 = IMPLICIT_DEF
     $agpr2_agpr3_agpr4_agpr5 = COPY killed $agpr0_agpr1_agpr2_agpr3, implicit $exec
     S_ENDPGM 0, implicit $agpr2_agpr3_agpr4_agpr5
@@ -1151,14 +1151,14 @@ body:             |
     ; GFX90A-NEXT: $agpr2 = V_ACCVGPR_MOV_B32 $agpr0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3, implicit $exec
     ; GFX90A-NEXT: S_ENDPGM 0, implicit $agpr0, implicit $agpr1, implicit $agpr2, implicit $agpr3, implicit $agpr4, implicit $agpr5
     ;
-    ; GFX940-LABEL: name: a4_to_a4_overlap
-    ; GFX940: liveins: $agpr0_agpr1_agpr2_agpr3
-    ; GFX940-NEXT: {{  $}}
-    ; GFX940-NEXT: $agpr5 = V_ACCVGPR_MOV_B32 $agpr3, implicit $exec, implicit-def $agpr2_agpr3_agpr4_agpr5, implicit $agpr0_agpr1_agpr2_agpr3
-    ; GFX940-NEXT: $agpr4 = V_ACCVGPR_MOV_B32 $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3
-    ; GFX940-NEXT: $agpr3 = V_ACCVGPR_MOV_B32 $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3
-    ; GFX940-NEXT: $agpr2 = V_ACCVGPR_MOV_B32 $agpr0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3, implicit $exec
-    ; GFX940-NEXT: S_ENDPGM 0, implicit $agpr0, implicit $agpr1, implicit $agpr2, implicit $agpr3, implicit $agpr4, implicit $agpr5
+    ; GFX942-LABEL: name: a4_to_a4_overlap
+    ; GFX942: liveins: $agpr0_agpr1_agpr2_agpr3
+    ; GFX942-NEXT: {{  $}}
+    ; GFX942-NEXT: $agpr5 = V_ACCVGPR_MOV_B32 $agpr3, implicit $exec, implicit-def $agpr2_agpr3_agpr4_agpr5, implicit $agpr0_agpr1_agpr2_agpr3
+    ; GFX942-NEXT: $agpr4 = V_ACCVGPR_MOV_B32 $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3
+    ; GFX942-NEXT: $agpr3 = V_ACCVGPR_MOV_B32 $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3
+    ; GFX942-NEXT: $agpr2 = V_ACCVGPR_MOV_B32 $agpr0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3, implicit $exec
+    ; GFX942-NEXT: S_ENDPGM 0, implicit $agpr0, implicit $agpr1, implicit $agpr2, implicit $agpr3, implicit $agpr4, implicit $agpr5
     $agpr2_agpr3_agpr4_agpr5 = COPY $agpr0_agpr1_agpr2_agpr3, implicit $exec
     S_ENDPGM 0, implicit $agpr0, implicit $agpr1, implicit $agpr2, implicit $agpr3, implicit $agpr4, implicit $agpr5
 ...
@@ -1200,17 +1200,17 @@ body:             |
     ; GFX90A-NEXT: $agpr8 = V_ACCVGPR_MOV_B32 $agpr0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, implicit $exec
     ; GFX90A-NEXT: S_ENDPGM 0, implicit $agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
     ;
-    ; GFX940-LABEL: name: a8_to_a8
-    ; GFX940: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = IMPLICIT_DEF
-    ; GFX940-NEXT: $agpr15 = V_ACCVGPR_MOV_B32 $agpr7, implicit $exec, implicit-def $agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
-    ; GFX940-NEXT: $agpr14 = V_ACCVGPR_MOV_B32 $agpr6, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
-    ; GFX940-NEXT: $agpr13 = V_ACCVGPR_MOV_B32 $agpr5, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
-    ; GFX940-NEXT: $agpr12 = V_ACCVGPR_MOV_B32 $agpr4, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
-    ; GFX940-NEXT: $agpr11 = V_ACCVGPR_MOV_B32 $agpr3, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
-    ; GFX940-NEXT: $agpr10 = V_ACCVGPR_MOV_B32 $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
-    ; GFX940-NEXT: $agpr9 = V_ACCVGPR_MOV_B32 $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
-    ; GFX940-NEXT: $agpr8 = V_ACCVGPR_MOV_B32 $agpr0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, implicit $exec
-    ; GFX940-NEXT: S_ENDPGM 0, implicit $agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
+    ; GFX942-LABEL: name: a8_to_a8
+    ; GFX942: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = IMPLICIT_DEF
+    ; GFX942-NEXT: $agpr15 = V_ACCVGPR_MOV_B32 $agpr7, implicit $exec, implicit-def $agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
+    ; GFX942-NEXT: $agpr14 = V_ACCVGPR_MOV_B32 $agpr6, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
+    ; GFX942-NEXT: $agpr13 = V_ACCVGPR_MOV_B32 $agpr5, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
+    ; GFX942-NEXT: $agpr12 = V_ACCVGPR_MOV_B32 $agpr4, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
+    ; GFX942-NEXT: $agpr11 = V_ACCVGPR_MOV_B32 $agpr3, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
+    ; GFX942-NEXT: $agpr10 = V_ACCVGPR_MOV_B32 $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
+    ; GFX942-NEXT: $agpr9 = V_ACCVGPR_MOV_B32 $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
+    ; GFX942-NEXT: $agpr8 = V_ACCVGPR_MOV_B32 $agpr0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, implicit $exec
+    ; GFX942-NEXT: S_ENDPGM 0, implicit $agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
     $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = IMPLICIT_DEF
     $agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = COPY killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, implicit $exec
     S_ENDPGM 0, implicit $agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
@@ -1278,25 +1278,25 @@ body:             |
     ; GFX90A-NEXT: $agpr16 = V_ACCVGPR_MOV_B32 $agpr0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $exec
     ; GFX90A-NEXT: S_ENDPGM 0, implicit $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
     ;
-    ; GFX940-LABEL: name: a16_to_a16
-    ; GFX940: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = IMPLICIT_DEF
-    ; GFX940-NEXT: $agpr31 = V_ACCVGPR_MOV_B32 $agpr15, implicit $exec, implicit-def $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
-    ; GFX940-NEXT: $agpr30 = V_ACCVGPR_MOV_B32 $agpr14, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
-    ; GFX940-NEXT: $agpr29 = V_ACCVGPR_MOV_B32 $agpr13, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
-    ; GFX940-NEXT: $agpr28 = V_ACCVGPR_MOV_B32 $agpr12, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
-    ; GFX940-NEXT: $agpr27 = V_ACCVGPR_MOV_B32 $agpr11, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
-    ; GFX940-NEXT: $agpr26 = V_ACCVGPR_MOV_B32 $agpr10, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
-    ; GFX940-NEXT: $agpr25 = V_ACCVGPR_MOV_B32 $agpr9, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
-    ; GFX940-NEXT: $agpr24 = V_ACCVGPR_MOV_B32 $agpr8, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
-    ; GFX940-NEXT: $agpr23 = V_ACCVGPR_MOV_B32 $agpr7, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
-    ; GFX940-NEXT: $agpr22 = V_ACCVGPR_MOV_B32 $agpr6, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
-    ; GFX940-NEXT: $agpr21 = V_ACCVGPR_MOV_B32 $agpr5, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
-    ; GFX940-NEXT: $agpr20 = V_ACCVGPR_MOV_B32 $agpr4, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
-    ; GFX940-NEXT: $agpr19 = V_ACCVGPR_MOV_B32 $agpr3, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
-    ; GFX940-NEXT: $agpr18 = V_ACCVGPR_MOV_B32 $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
-    ; GFX940-NEXT: $agpr17 = V_ACCVGPR_MOV_B32 $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
-    ; GFX940-NEXT: $agpr16 = V_ACCVGPR_MOV_B32 $agpr0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $exec
-    ; GFX940-NEXT: S_ENDPGM 0, implicit $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
+    ; GFX942-LABEL: name: a16_to_a16
+    ; GFX942: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = IMPLICIT_DEF
+    ; GFX942-NEXT: $agpr31 = V_ACCVGPR_MOV_B32 $agpr15, implicit $exec, implicit-def $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
+    ; GFX942-NEXT: $agpr30 = V_ACCVGPR_MOV_B32 $agpr14, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
+    ; GFX942-NEXT: $agpr29 = V_ACCVGPR_MOV_B32 $agpr13, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
+    ; GFX942-NEXT: $agpr28 = V_ACCVGPR_MOV_B32 $agpr12, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
+    ; GFX942-NEXT: $agpr27 = V_ACCVGPR_MOV_B32 $agpr11, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
+    ; GFX942-NEXT: $agpr26 = V_ACCVGPR_MOV_B32 $agpr10, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
+    ; GFX942-NEXT: $agpr25 = V_ACCVGPR_MOV_B32 $agpr9, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
+    ; GFX942-NEXT: $agpr24 = V_ACCVGPR_MOV_B32 $agpr8, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
+    ; GFX942-NEXT: $agpr23 = V_ACCVGPR_MOV_B32 $agpr7, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
+    ; GFX942-NEXT: $agpr22 = V_ACCVGPR_MOV_B32 $agpr6, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
+    ; GFX942-NEXT: $agpr21 = V_ACCVGPR_MOV_B32 $agpr5, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
+    ; GFX942-NEXT: $agpr20 = V_ACCVGPR_MOV_B32 $agpr4, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
+    ; GFX942-NEXT: $agpr19 = V_ACCVGPR_MOV_B32 $agpr3, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
+    ; GFX942-NEXT: $agpr18 = V_ACCVGPR_MOV_B32 $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
+    ; GFX942-NEXT: $agpr17 = V_ACCVGPR_MOV_B32 $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
+    ; GFX942-NEXT: $agpr16 = V_ACCVGPR_MOV_B32 $agpr0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $exec
+    ; GFX942-NEXT: S_ENDPGM 0, implicit $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
     $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = IMPLICIT_DEF
     $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = COPY killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $exec
     S_ENDPGM 0, implicit $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
@@ -1326,12 +1326,12 @@ body:             |
     ; GFX90A-NEXT: $agpr0 = V_ACCVGPR_MOV_B32 killed $agpr1, implicit $exec, implicit $exec
     ; GFX90A-NEXT: S_ENDPGM 0, implicit $agpr0
     ;
-    ; GFX940-LABEL: name: a_to_a_spill
-    ; GFX940: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253, $vgpr254
-    ; GFX940-NEXT: {{  $}}
-    ; GFX940-NEXT: $agpr1 = IMPLICIT_DEF
-    ; GFX940-NEXT: $agpr0 = V_ACCVGPR_MOV_B32 killed $agpr1, implicit $exec, implicit $exec
-    ; GFX940-NEXT: S_ENDPGM 0, implicit $agpr0
+    ; GFX942-LABEL: name: a_to_a_spill
+    ; GFX942: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253, $vgpr254
+    ; GFX942-NEXT: {{  $}}
+    ; GFX942-NEXT: $agpr1 = IMPLICIT_DEF
+    ; GFX942-NEXT: $agpr0 = V_ACCVGPR_MOV_B32 killed $agpr1, implicit $exec, implicit $exec
+    ; GFX942-NEXT: S_ENDPGM 0, implicit $agpr0
     $agpr1 = IMPLICIT_DEF
     $agpr0 = COPY killed $agpr1, implicit $exec
     S_ENDPGM 0, implicit $agpr0
@@ -1368,15 +1368,15 @@ body:             |
     ; GFX90A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $sgpr0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec
     ; GFX90A-NEXT: S_ENDPGM 0, implicit $agpr4_agpr5_agpr6_agpr7, implicit $sgpr0_sgpr1_sgpr2_sgpr3
     ;
-    ; GFX940-LABEL: name: copy_sgpr_to_agpr_tuple
-    ; GFX940: liveins: $agpr0, $sgpr2_sgpr3
-    ; GFX940-NEXT: {{  $}}
-    ; GFX940-NEXT: S_NOP 0, implicit-def dead $sgpr0_sgpr1
-    ; GFX940-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 $sgpr3, implicit $exec, implicit-def $agpr4_agpr5_agpr6_agpr7, implicit $sgpr0_sgpr1_sgpr2_sgpr3
-    ; GFX940-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 $sgpr2, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3
-    ; GFX940-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 $sgpr1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3
-    ; GFX940-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $sgpr0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec
-    ; GFX940-NEXT: S_ENDPGM 0, implicit $agpr4_agpr5_agpr6_agpr7, implicit $sgpr0_sgpr1_sgpr2_sgpr3
+    ; GFX942-LABEL: name: copy_sgpr_to_agpr_tuple
+    ; GFX942: liveins: $agpr0, $sgpr2_sgpr3
+    ; GFX942-NEXT: {{  $}}
+    ; GFX942-NEXT: S_NOP 0, implicit-def dead $sgpr0_sgpr1
+    ; GFX942-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 $sgpr3, implicit $exec, implicit-def $agpr4_agpr5_agpr6_agpr7, implicit $sgpr0_sgpr1_sgpr2_sgpr3
+    ; GFX942-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 $sgpr2, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3
+    ; GFX942-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 $sgpr1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3
+    ; GFX942-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $sgpr0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec
+    ; GFX942-NEXT: S_ENDPGM 0, implicit $agpr4_agpr5_agpr6_agpr7, implicit $sgpr0_sgpr1_sgpr2_sgpr3
     S_NOP 0, implicit-def dead $sgpr0_sgpr1
     renamable $agpr4_agpr5_agpr6_agpr7 = COPY renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec
     S_ENDPGM 0, implicit $agpr4_agpr5_agpr6_agpr7, implicit $sgpr0_sgpr1_sgpr2_sgpr3
@@ -1412,15 +1412,15 @@ body:             |
     ; GFX90A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $sgpr0, implicit $exec, implicit killed $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec
     ; GFX90A-NEXT: S_ENDPGM 0, implicit $agpr4_agpr5_agpr6_agpr7
     ;
-    ; GFX940-LABEL: name: copy_sgpr_to_agpr_tuple_kill
-    ; GFX940: liveins: $agpr0, $sgpr2_sgpr3
-    ; GFX940-NEXT: {{  $}}
-    ; GFX940-NEXT: S_NOP 0, implicit-def dead $sgpr0_sgpr1
-    ; GFX940-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 $sgpr3, implicit $exec, implicit-def $agpr4_agpr5_agpr6_agpr7, implicit $sgpr0_sgpr1_sgpr2_sgpr3
-    ; GFX940-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 $sgpr2, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3
-    ; GFX940-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 $sgpr1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3
-    ; GFX940-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $sgpr0, implicit $exec, implicit killed $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec
-    ; GFX940-NEXT: S_ENDPGM 0, implicit $agpr4_agpr5_agpr6_agpr7
+    ; GFX942-LABEL: name: copy_sgpr_to_agpr_tuple_kill
+    ; GFX942: liveins: $agpr0, $sgpr2_sgpr3
+    ; GFX942-NEXT: {{  $}}
+    ; GFX942-NEXT: S_NOP 0, implicit-def dead $sgpr0_sgpr1
+    ; GFX942-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 $sgpr3, implicit $exec, implicit-def $agpr4_agpr5_agpr6_agpr7, implicit $sgpr0_sgpr1_sgpr2_sgpr3
+    ; GFX942-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 $sgpr2, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3
+    ; GFX942-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 $sgpr1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3
+    ; GFX942-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $sgpr0, implicit $exec, implicit killed $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec
+    ; GFX942-NEXT: S_ENDPGM 0, implicit $agpr4_agpr5_agpr6_agpr7
     S_NOP 0, implicit-def dead $sgpr0_sgpr1
     renamable $agpr4_agpr5_agpr6_agpr7 = COPY renamable killed $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec
     S_ENDPGM 0, implicit $agpr4_agpr5_agpr6_agpr7
@@ -1457,15 +1457,15 @@ body:             |
     ; GFX90A-NEXT: $agpr4 = V_ACCVGPR_MOV_B32 $agpr0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3, implicit $exec
     ; GFX90A-NEXT: S_ENDPGM 0, implicit $agpr4_agpr5_agpr6_agpr7, implicit $agpr0_agpr1_agpr2_agpr3
     ;
-    ; GFX940-LABEL: name: copy_agpr_to_agpr_tuple
-    ; GFX940: liveins: $agpr0, $agpr2_agpr3
-    ; GFX940-NEXT: {{  $}}
-    ; GFX940-NEXT: S_NOP 0, implicit-def dead $agpr0_agpr1
-    ; GFX940-NEXT: $agpr7 = V_ACCVGPR_MOV_B32 $agpr3, implicit $exec, implicit-def $agpr4_agpr5_agpr6_agpr7, implicit $agpr0_agpr1_agpr2_agpr3
-    ; GFX940-NEXT: $agpr6 = V_ACCVGPR_MOV_B32 $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3
-    ; GFX940-NEXT: $agpr5 = V_ACCVGPR_MOV_B32 $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3
-    ; GFX940-NEXT: $agpr4 = V_ACCVGPR_MOV_B32 $agpr0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3, implicit $exec
-    ; GFX940-NEXT: S_ENDPGM 0, implicit $agpr4_agpr5_agpr6_agpr7, implicit $agpr0_agpr1_agpr2_agpr3
+    ; GFX942-LABEL: name: copy_agpr_to_agpr_tuple
+    ; GFX942: liveins: $agpr0, $agpr2_agpr3
+    ; GFX942-NEXT: {{  $}}
+    ; GFX942-NEXT: S_NOP 0, implicit-def dead $agpr0_agpr1
+    ; GFX942-NEXT: $agpr7 = V_ACCVGPR_MOV_B32 $agpr3, implicit $exec, implicit-def $agpr4_agpr5_agpr6_agpr7, implicit $agpr0_agpr1_agpr2_agpr3
+    ; GFX942-NEXT: $agpr6 = V_ACCVGPR_MOV_B32 $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3
+    ; GFX942-NEXT: $agpr5 = V_ACCVGPR_MOV_B32 $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3
+    ; GFX942-NEXT: $agpr4 = V_ACCVGPR_MOV_B32 $agpr0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3, implicit $exec
+    ; GFX942-NEXT: S_ENDPGM 0, implicit $agpr4_agpr5_agpr6_agpr7, implicit $agpr0_agpr1_agpr2_agpr3
     S_NOP 0, implicit-def dead $agpr0_agpr1
     renamable $agpr4_agpr5_agpr6_agpr7 = COPY renamable $agpr0_agpr1_agpr2_agpr3, implicit $exec
     S_ENDPGM 0, implicit $agpr4_agpr5_agpr6_agpr7, implicit $agpr0_agpr1_agpr2_agpr3
@@ -1502,15 +1502,15 @@ body:             |
     ; GFX90A-NEXT: $agpr4 = V_ACCVGPR_MOV_B32 $agpr0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3, implicit $exec
     ; GFX90A-NEXT: S_ENDPGM 0, implicit $agpr4_agpr5_agpr6_agpr7
     ;
-    ; GFX940-LABEL: name: copy_agpr_to_agpr_tuple_kill
-    ; GFX940: liveins: $agpr0, $agpr2_agpr3
-    ; GFX940-NEXT: {{  $}}
-    ; GFX940-NEXT: S_NOP 0, implicit-def dead $agpr0_agpr1
-    ; GFX940-NEXT: $agpr7 = V_ACCVGPR_MOV_B32 $agpr3, implicit $exec, implicit-def $agpr4_agpr5_agpr6_agpr7, implicit $agpr0_agpr1_agpr2_agpr3
-    ; GFX940-NEXT: $agpr6 = V_ACCVGPR_MOV_B32 $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3
-    ; GFX940-NEXT: $agpr5 = V_ACCVGPR_MOV_B32 $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3
-    ; GFX940-NEXT: $agpr4 = V_ACCVGPR_MOV_B32 $agpr0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3, implicit $exec
-    ; GFX940-NEXT: S_ENDPGM 0, implicit $agpr4_agpr5_agpr6_agpr7
+    ; GFX942-LABEL: name: copy_agpr_to_agpr_tuple_kill
+    ; GFX942: liveins: $agpr0, $agpr2_agpr3
+    ; GFX942-NEXT: {{  $}}
+    ; GFX942-NEXT: S_NOP 0, implicit-def dead $agpr0_agpr1
+    ; GFX942-NEXT: $agpr7 = V_ACCVGPR_MOV_B32 $agpr3, implicit $exec, implicit-def $agpr4_agpr5_agpr6_agpr7, implicit $agpr0_agpr1_agpr2_agpr3
+    ; GFX942-NEXT: $agpr6 = V_ACCVGPR_MOV_B32 $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3
+    ; GFX942-NEXT: $agpr5 = V_ACCVGPR_MOV_B32 $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3
+    ; GFX942-NEXT: $agpr4 = V_ACCVGPR_MOV_B32 $agpr0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3, implicit $exec
+    ; GFX942-NEXT: S_ENDPGM 0, implicit $agpr4_agpr5_agpr6_agpr7
     S_NOP 0, implicit-def dead $agpr0_agpr1
     renamable $agpr4_agpr5_agpr6_agpr7 = COPY renamable killed $agpr0_agpr1_agpr2_agpr3, implicit $exec
     S_ENDPGM 0, implicit $agpr4_agpr5_agpr6_agpr7
diff --git llvm/test/CodeGen/AMDGPU/amdhsa-kernarg-preload-num-sgprs.ll llvm/test/CodeGen/AMDGPU/amdhsa-kernarg-preload-num-sgprs.ll
index 835e5e5f06ef..0114de738ce8 100644
--- llvm/test/CodeGen/AMDGPU/amdhsa-kernarg-preload-num-sgprs.ll
+++ llvm/test/CodeGen/AMDGPU/amdhsa-kernarg-preload-num-sgprs.ll
@@ -1,5 +1,5 @@
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -filetype=obj < %s | llvm-objdump -s -j .rodata - | FileCheck --check-prefix=OBJDUMP %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck --check-prefix=ASM %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -filetype=obj < %s | llvm-objdump -s -j .rodata - | FileCheck --check-prefix=OBJDUMP %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 < %s | FileCheck --check-prefix=ASM %s
 
 ; OBJDUMP: Contents of section .rodata:
 ; OBJDUMP-NEXT: 0000 00000000 00000000 10010000 00000000  ................
diff --git llvm/test/CodeGen/AMDGPU/atomicrmw-expand.ll llvm/test/CodeGen/AMDGPU/atomicrmw-expand.ll
index f5c9b1a79b47..5f56568ef88e 100644
--- llvm/test/CodeGen/AMDGPU/atomicrmw-expand.ll
+++ llvm/test/CodeGen/AMDGPU/atomicrmw-expand.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx908 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX908 %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -check-prefix=GFX90A %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx940 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX940 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX942 %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX1100 %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX1200 %s
 
@@ -80,14 +80,14 @@ define float @syncscope_system(ptr %addr, float %val) #0 {
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0)
 ; GFX90A-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: syncscope_system:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_wbl2 sc0 sc1
-; GFX940-NEXT:    flat_atomic_add_f32 v0, v[0:1], v2 sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc0 sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: syncscope_system:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_wbl2 sc0 sc1
+; GFX942-NEXT:    flat_atomic_add_f32 v0, v[0:1], v2 sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc0 sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX1100-LABEL: syncscope_system:
 ; GFX1100:       ; %bb.0:
@@ -187,12 +187,12 @@ define float @syncscope_workgroup_rtn(ptr %addr, float %val) #0 {
 ; GFX90A-NEXT:    v_mov_b32_e32 v0, v3
 ; GFX90A-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: syncscope_workgroup_rtn:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    flat_atomic_add_f32 v0, v[0:1], v2 sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: syncscope_workgroup_rtn:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    flat_atomic_add_f32 v0, v[0:1], v2 sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX1100-LABEL: syncscope_workgroup_rtn:
 ; GFX1100:       ; %bb.0:
@@ -320,12 +320,12 @@ define void @syncscope_workgroup_nortn(ptr %addr, float %val) #0 {
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0)
 ; GFX90A-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: syncscope_workgroup_nortn:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    flat_atomic_add_f32 v[0:1], v2
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: syncscope_workgroup_nortn:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    flat_atomic_add_f32 v[0:1], v2
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX1100-LABEL: syncscope_workgroup_nortn:
 ; GFX1100:       ; %bb.0:
@@ -396,12 +396,12 @@ define float @no_unsafe(ptr %addr, float %val) {
 ; GFX90A-NEXT:    v_mov_b32_e32 v0, v3
 ; GFX90A-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: no_unsafe:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    flat_atomic_add_f32 v0, v[0:1], v2 sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: no_unsafe:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    flat_atomic_add_f32 v0, v[0:1], v2 sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX1100-LABEL: no_unsafe:
 ; GFX1100:       ; %bb.0:
diff --git llvm/test/CodeGen/AMDGPU/back-off-barrier-subtarget-feature.ll llvm/test/CodeGen/AMDGPU/back-off-barrier-subtarget-feature.ll
index 632f7dbc5337..80fb20f93b3e 100644
--- llvm/test/CodeGen/AMDGPU/back-off-barrier-subtarget-feature.ll
+++ llvm/test/CodeGen/AMDGPU/back-off-barrier-subtarget-feature.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX9-NO-BACKOFF %s
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX9-BACKOFF %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX940-BACKOFF %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX942-BACKOFF %s
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -mattr=-back-off-barrier -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX9-NO-BACKOFF %s
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX10-BACKOFF %s
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX11-BACKOFF %s
@@ -30,15 +30,15 @@ define void @back_off_barrier_no_fence(ptr %in, ptr %out) #0 {
 ; GFX9-BACKOFF-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX9-BACKOFF-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-BACKOFF-LABEL: back_off_barrier_no_fence:
-; GFX940-BACKOFF:       ; %bb.0:
-; GFX940-BACKOFF-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-BACKOFF-NEXT:    flat_load_dword v0, v[0:1]
-; GFX940-BACKOFF-NEXT:    s_barrier
-; GFX940-BACKOFF-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-BACKOFF-NEXT:    flat_store_dword v[2:3], v0 sc0 sc1
-; GFX940-BACKOFF-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-BACKOFF-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-BACKOFF-LABEL: back_off_barrier_no_fence:
+; GFX942-BACKOFF:       ; %bb.0:
+; GFX942-BACKOFF-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-BACKOFF-NEXT:    flat_load_dword v0, v[0:1]
+; GFX942-BACKOFF-NEXT:    s_barrier
+; GFX942-BACKOFF-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-BACKOFF-NEXT:    flat_store_dword v[2:3], v0 sc0 sc1
+; GFX942-BACKOFF-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-BACKOFF-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-BACKOFF-LABEL: back_off_barrier_no_fence:
 ; GFX10-BACKOFF:       ; %bb.0:
@@ -88,16 +88,16 @@ define void @back_off_barrier_with_fence(ptr %in, ptr %out) #0 {
 ; GFX9-BACKOFF-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX9-BACKOFF-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-BACKOFF-LABEL: back_off_barrier_with_fence:
-; GFX940-BACKOFF:       ; %bb.0:
-; GFX940-BACKOFF-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-BACKOFF-NEXT:    flat_load_dword v0, v[0:1]
-; GFX940-BACKOFF-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-BACKOFF-NEXT:    s_barrier
-; GFX940-BACKOFF-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-BACKOFF-NEXT:    flat_store_dword v[2:3], v0 sc0 sc1
-; GFX940-BACKOFF-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-BACKOFF-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-BACKOFF-LABEL: back_off_barrier_with_fence:
+; GFX942-BACKOFF:       ; %bb.0:
+; GFX942-BACKOFF-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-BACKOFF-NEXT:    flat_load_dword v0, v[0:1]
+; GFX942-BACKOFF-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-BACKOFF-NEXT:    s_barrier
+; GFX942-BACKOFF-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-BACKOFF-NEXT:    flat_store_dword v[2:3], v0 sc0 sc1
+; GFX942-BACKOFF-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-BACKOFF-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-BACKOFF-LABEL: back_off_barrier_with_fence:
 ; GFX10-BACKOFF:       ; %bb.0:
diff --git llvm/test/CodeGen/AMDGPU/bf16-conversions.ll llvm/test/CodeGen/AMDGPU/bf16-conversions.ll
index 59e47cbc12b2..74164016f9a9 100644
--- llvm/test/CodeGen/AMDGPU/bf16-conversions.ll
+++ llvm/test/CodeGen/AMDGPU/bf16-conversions.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
-; RUN: llc -mtriple=amdgcn -mcpu=gfx940 < %s | FileCheck --check-prefixes=GCN,GFX-940 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx942 < %s | FileCheck --check-prefixes=GCN,GFX-942 %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx950 < %s | FileCheck --check-prefixes=GCN,GFX-950 %s
 
 ; TODO: Add global-isel when it can support bf16
@@ -24,24 +24,24 @@ define amdgpu_ps float @v_test_cvt_bf16_f32_s(bfloat inreg %v) {
 }
 
 define amdgpu_ps float @v_test_cvt_v2f32_v2bf16_v(<2 x float> %src) {
-; GFX-940-LABEL: v_test_cvt_v2f32_v2bf16_v:
-; GFX-940:       ; %bb.0:
-; GFX-940-NEXT:    v_bfe_u32 v2, v0, 16, 1
-; GFX-940-NEXT:    s_movk_i32 s0, 0x7fff
-; GFX-940-NEXT:    v_add3_u32 v2, v2, v0, s0
-; GFX-940-NEXT:    v_or_b32_e32 v3, 0x400000, v0
-; GFX-940-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
-; GFX-940-NEXT:    s_nop 1
-; GFX-940-NEXT:    v_cndmask_b32_e32 v0, v2, v3, vcc
-; GFX-940-NEXT:    v_bfe_u32 v2, v1, 16, 1
-; GFX-940-NEXT:    v_add3_u32 v2, v2, v1, s0
-; GFX-940-NEXT:    v_or_b32_e32 v3, 0x400000, v1
-; GFX-940-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
-; GFX-940-NEXT:    s_mov_b32 s0, 0x7060302
-; GFX-940-NEXT:    s_nop 0
-; GFX-940-NEXT:    v_cndmask_b32_e32 v1, v2, v3, vcc
-; GFX-940-NEXT:    v_perm_b32 v0, v1, v0, s0
-; GFX-940-NEXT:    ; return to shader part epilog
+; GFX-942-LABEL: v_test_cvt_v2f32_v2bf16_v:
+; GFX-942:       ; %bb.0:
+; GFX-942-NEXT:    v_bfe_u32 v2, v0, 16, 1
+; GFX-942-NEXT:    s_movk_i32 s0, 0x7fff
+; GFX-942-NEXT:    v_add3_u32 v2, v2, v0, s0
+; GFX-942-NEXT:    v_or_b32_e32 v3, 0x400000, v0
+; GFX-942-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX-942-NEXT:    s_nop 1
+; GFX-942-NEXT:    v_cndmask_b32_e32 v0, v2, v3, vcc
+; GFX-942-NEXT:    v_bfe_u32 v2, v1, 16, 1
+; GFX-942-NEXT:    v_add3_u32 v2, v2, v1, s0
+; GFX-942-NEXT:    v_or_b32_e32 v3, 0x400000, v1
+; GFX-942-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX-942-NEXT:    s_mov_b32 s0, 0x7060302
+; GFX-942-NEXT:    s_nop 0
+; GFX-942-NEXT:    v_cndmask_b32_e32 v1, v2, v3, vcc
+; GFX-942-NEXT:    v_perm_b32 v0, v1, v0, s0
+; GFX-942-NEXT:    ; return to shader part epilog
 ;
 ; GFX-950-LABEL: v_test_cvt_v2f32_v2bf16_v:
 ; GFX-950:       ; %bb.0:
@@ -53,27 +53,27 @@ define amdgpu_ps float @v_test_cvt_v2f32_v2bf16_v(<2 x float> %src) {
 }
 
 define amdgpu_ps float @v_test_cvt_v2f32_v2bf16_s(<2 x float> inreg %src) {
-; GFX-940-LABEL: v_test_cvt_v2f32_v2bf16_s:
-; GFX-940:       ; %bb.0:
-; GFX-940-NEXT:    s_bfe_u32 s2, s1, 0x10010
-; GFX-940-NEXT:    s_add_i32 s2, s2, s1
-; GFX-940-NEXT:    s_or_b32 s4, s1, 0x400000
-; GFX-940-NEXT:    s_add_i32 s5, s2, 0x7fff
-; GFX-940-NEXT:    v_cmp_u_f32_e64 s[2:3], s1, s1
-; GFX-940-NEXT:    s_and_b64 s[2:3], s[2:3], exec
-; GFX-940-NEXT:    s_cselect_b32 s1, s4, s5
-; GFX-940-NEXT:    s_lshr_b32 s2, s1, 16
-; GFX-940-NEXT:    s_bfe_u32 s1, s0, 0x10010
-; GFX-940-NEXT:    s_add_i32 s1, s1, s0
-; GFX-940-NEXT:    s_or_b32 s3, s0, 0x400000
-; GFX-940-NEXT:    s_add_i32 s4, s1, 0x7fff
-; GFX-940-NEXT:    v_cmp_u_f32_e64 s[0:1], s0, s0
-; GFX-940-NEXT:    s_and_b64 s[0:1], s[0:1], exec
-; GFX-940-NEXT:    s_cselect_b32 s0, s3, s4
-; GFX-940-NEXT:    s_lshr_b32 s0, s0, 16
-; GFX-940-NEXT:    s_pack_ll_b32_b16 s0, s0, s2
-; GFX-940-NEXT:    v_mov_b32_e32 v0, s0
-; GFX-940-NEXT:    ; return to shader part epilog
+; GFX-942-LABEL: v_test_cvt_v2f32_v2bf16_s:
+; GFX-942:       ; %bb.0:
+; GFX-942-NEXT:    s_bfe_u32 s2, s1, 0x10010
+; GFX-942-NEXT:    s_add_i32 s2, s2, s1
+; GFX-942-NEXT:    s_or_b32 s4, s1, 0x400000
+; GFX-942-NEXT:    s_add_i32 s5, s2, 0x7fff
+; GFX-942-NEXT:    v_cmp_u_f32_e64 s[2:3], s1, s1
+; GFX-942-NEXT:    s_and_b64 s[2:3], s[2:3], exec
+; GFX-942-NEXT:    s_cselect_b32 s1, s4, s5
+; GFX-942-NEXT:    s_lshr_b32 s2, s1, 16
+; GFX-942-NEXT:    s_bfe_u32 s1, s0, 0x10010
+; GFX-942-NEXT:    s_add_i32 s1, s1, s0
+; GFX-942-NEXT:    s_or_b32 s3, s0, 0x400000
+; GFX-942-NEXT:    s_add_i32 s4, s1, 0x7fff
+; GFX-942-NEXT:    v_cmp_u_f32_e64 s[0:1], s0, s0
+; GFX-942-NEXT:    s_and_b64 s[0:1], s[0:1], exec
+; GFX-942-NEXT:    s_cselect_b32 s0, s3, s4
+; GFX-942-NEXT:    s_lshr_b32 s0, s0, 16
+; GFX-942-NEXT:    s_pack_ll_b32_b16 s0, s0, s2
+; GFX-942-NEXT:    v_mov_b32_e32 v0, s0
+; GFX-942-NEXT:    ; return to shader part epilog
 ;
 ; GFX-950-LABEL: v_test_cvt_v2f32_v2bf16_s:
 ; GFX-950:       ; %bb.0:
@@ -86,17 +86,17 @@ define amdgpu_ps float @v_test_cvt_v2f32_v2bf16_s(<2 x float> inreg %src) {
 }
 
 define amdgpu_ps float @v_test_cvt_f32_bf16_v(float %src) {
-; GFX-940-LABEL: v_test_cvt_f32_bf16_v:
-; GFX-940:       ; %bb.0:
-; GFX-940-NEXT:    v_bfe_u32 v1, v0, 16, 1
-; GFX-940-NEXT:    s_movk_i32 s0, 0x7fff
-; GFX-940-NEXT:    v_add3_u32 v1, v1, v0, s0
-; GFX-940-NEXT:    v_or_b32_e32 v2, 0x400000, v0
-; GFX-940-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
-; GFX-940-NEXT:    s_nop 1
-; GFX-940-NEXT:    v_cndmask_b32_e32 v0, v1, v2, vcc
-; GFX-940-NEXT:    v_and_b32_e32 v0, 0xffff0000, v0
-; GFX-940-NEXT:    ; return to shader part epilog
+; GFX-942-LABEL: v_test_cvt_f32_bf16_v:
+; GFX-942:       ; %bb.0:
+; GFX-942-NEXT:    v_bfe_u32 v1, v0, 16, 1
+; GFX-942-NEXT:    s_movk_i32 s0, 0x7fff
+; GFX-942-NEXT:    v_add3_u32 v1, v1, v0, s0
+; GFX-942-NEXT:    v_or_b32_e32 v2, 0x400000, v0
+; GFX-942-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX-942-NEXT:    s_nop 1
+; GFX-942-NEXT:    v_cndmask_b32_e32 v0, v1, v2, vcc
+; GFX-942-NEXT:    v_and_b32_e32 v0, 0xffff0000, v0
+; GFX-942-NEXT:    ; return to shader part epilog
 ;
 ; GFX-950-LABEL: v_test_cvt_f32_bf16_v:
 ; GFX-950:       ; %bb.0:
@@ -109,47 +109,47 @@ define amdgpu_ps float @v_test_cvt_f32_bf16_v(float %src) {
 }
 
 define amdgpu_ps float @v_test_cvt_v2f64_v2bf16_v(<2 x double> %src) {
-; GFX-940-LABEL: v_test_cvt_v2f64_v2bf16_v:
-; GFX-940:       ; %bb.0:
-; GFX-940-NEXT:    v_cvt_f32_f64_e64 v6, |v[0:1]|
-; GFX-940-NEXT:    v_cvt_f64_f32_e32 v[4:5], v6
-; GFX-940-NEXT:    v_and_b32_e32 v7, 1, v6
-; GFX-940-NEXT:    v_cmp_gt_f64_e64 s[2:3], |v[0:1]|, v[4:5]
-; GFX-940-NEXT:    v_cmp_nlg_f64_e64 s[0:1], |v[0:1]|, v[4:5]
-; GFX-940-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v7
-; GFX-940-NEXT:    v_cndmask_b32_e64 v4, -1, 1, s[2:3]
-; GFX-940-NEXT:    v_add_u32_e32 v4, v6, v4
-; GFX-940-NEXT:    s_or_b64 vcc, s[0:1], vcc
-; GFX-940-NEXT:    v_cndmask_b32_e32 v4, v4, v6, vcc
-; GFX-940-NEXT:    s_brev_b32 s4, 1
-; GFX-940-NEXT:    v_and_or_b32 v5, v1, s4, v4
-; GFX-940-NEXT:    v_bfe_u32 v4, v4, 16, 1
-; GFX-940-NEXT:    s_movk_i32 s5, 0x7fff
-; GFX-940-NEXT:    v_add3_u32 v4, v4, v5, s5
-; GFX-940-NEXT:    v_or_b32_e32 v5, 0x400000, v5
-; GFX-940-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
-; GFX-940-NEXT:    s_nop 1
-; GFX-940-NEXT:    v_cndmask_b32_e32 v4, v4, v5, vcc
-; GFX-940-NEXT:    v_cvt_f32_f64_e64 v5, |v[2:3]|
-; GFX-940-NEXT:    v_cvt_f64_f32_e32 v[0:1], v5
-; GFX-940-NEXT:    v_and_b32_e32 v6, 1, v5
-; GFX-940-NEXT:    v_cmp_gt_f64_e64 s[2:3], |v[2:3]|, v[0:1]
-; GFX-940-NEXT:    v_cmp_nlg_f64_e64 s[0:1], |v[2:3]|, v[0:1]
-; GFX-940-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v6
-; GFX-940-NEXT:    v_cndmask_b32_e64 v0, -1, 1, s[2:3]
-; GFX-940-NEXT:    v_add_u32_e32 v0, v5, v0
-; GFX-940-NEXT:    s_or_b64 vcc, s[0:1], vcc
-; GFX-940-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
-; GFX-940-NEXT:    v_and_or_b32 v1, v3, s4, v0
-; GFX-940-NEXT:    v_bfe_u32 v0, v0, 16, 1
-; GFX-940-NEXT:    v_add3_u32 v0, v0, v1, s5
-; GFX-940-NEXT:    v_or_b32_e32 v1, 0x400000, v1
-; GFX-940-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[2:3]
-; GFX-940-NEXT:    s_mov_b32 s0, 0x7060302
-; GFX-940-NEXT:    s_nop 0
-; GFX-940-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
-; GFX-940-NEXT:    v_perm_b32 v0, v0, v4, s0
-; GFX-940-NEXT:    ; return to shader part epilog
+; GFX-942-LABEL: v_test_cvt_v2f64_v2bf16_v:
+; GFX-942:       ; %bb.0:
+; GFX-942-NEXT:    v_cvt_f32_f64_e64 v6, |v[0:1]|
+; GFX-942-NEXT:    v_cvt_f64_f32_e32 v[4:5], v6
+; GFX-942-NEXT:    v_and_b32_e32 v7, 1, v6
+; GFX-942-NEXT:    v_cmp_gt_f64_e64 s[2:3], |v[0:1]|, v[4:5]
+; GFX-942-NEXT:    v_cmp_nlg_f64_e64 s[0:1], |v[0:1]|, v[4:5]
+; GFX-942-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v7
+; GFX-942-NEXT:    v_cndmask_b32_e64 v4, -1, 1, s[2:3]
+; GFX-942-NEXT:    v_add_u32_e32 v4, v6, v4
+; GFX-942-NEXT:    s_or_b64 vcc, s[0:1], vcc
+; GFX-942-NEXT:    v_cndmask_b32_e32 v4, v4, v6, vcc
+; GFX-942-NEXT:    s_brev_b32 s4, 1
+; GFX-942-NEXT:    v_and_or_b32 v5, v1, s4, v4
+; GFX-942-NEXT:    v_bfe_u32 v4, v4, 16, 1
+; GFX-942-NEXT:    s_movk_i32 s5, 0x7fff
+; GFX-942-NEXT:    v_add3_u32 v4, v4, v5, s5
+; GFX-942-NEXT:    v_or_b32_e32 v5, 0x400000, v5
+; GFX-942-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX-942-NEXT:    s_nop 1
+; GFX-942-NEXT:    v_cndmask_b32_e32 v4, v4, v5, vcc
+; GFX-942-NEXT:    v_cvt_f32_f64_e64 v5, |v[2:3]|
+; GFX-942-NEXT:    v_cvt_f64_f32_e32 v[0:1], v5
+; GFX-942-NEXT:    v_and_b32_e32 v6, 1, v5
+; GFX-942-NEXT:    v_cmp_gt_f64_e64 s[2:3], |v[2:3]|, v[0:1]
+; GFX-942-NEXT:    v_cmp_nlg_f64_e64 s[0:1], |v[2:3]|, v[0:1]
+; GFX-942-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v6
+; GFX-942-NEXT:    v_cndmask_b32_e64 v0, -1, 1, s[2:3]
+; GFX-942-NEXT:    v_add_u32_e32 v0, v5, v0
+; GFX-942-NEXT:    s_or_b64 vcc, s[0:1], vcc
+; GFX-942-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
+; GFX-942-NEXT:    v_and_or_b32 v1, v3, s4, v0
+; GFX-942-NEXT:    v_bfe_u32 v0, v0, 16, 1
+; GFX-942-NEXT:    v_add3_u32 v0, v0, v1, s5
+; GFX-942-NEXT:    v_or_b32_e32 v1, 0x400000, v1
+; GFX-942-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[2:3]
+; GFX-942-NEXT:    s_mov_b32 s0, 0x7060302
+; GFX-942-NEXT:    s_nop 0
+; GFX-942-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX-942-NEXT:    v_perm_b32 v0, v0, v4, s0
+; GFX-942-NEXT:    ; return to shader part epilog
 ;
 ; GFX-950-LABEL: v_test_cvt_v2f64_v2bf16_v:
 ; GFX-950:       ; %bb.0:
@@ -163,24 +163,24 @@ define amdgpu_ps float @v_test_cvt_v2f64_v2bf16_v(<2 x double> %src) {
 }
 
 define amdgpu_ps float @fptrunc_f32_f32_to_v2bf16(float %a, float %b) {
-; GFX-940-LABEL: fptrunc_f32_f32_to_v2bf16:
-; GFX-940:       ; %bb.0: ; %entry
-; GFX-940-NEXT:    v_bfe_u32 v2, v0, 16, 1
-; GFX-940-NEXT:    s_movk_i32 s0, 0x7fff
-; GFX-940-NEXT:    v_add3_u32 v2, v2, v0, s0
-; GFX-940-NEXT:    v_or_b32_e32 v3, 0x400000, v0
-; GFX-940-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
-; GFX-940-NEXT:    s_nop 1
-; GFX-940-NEXT:    v_cndmask_b32_e32 v0, v2, v3, vcc
-; GFX-940-NEXT:    v_bfe_u32 v2, v1, 16, 1
-; GFX-940-NEXT:    v_add3_u32 v2, v2, v1, s0
-; GFX-940-NEXT:    v_or_b32_e32 v3, 0x400000, v1
-; GFX-940-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
-; GFX-940-NEXT:    s_mov_b32 s0, 0x7060302
-; GFX-940-NEXT:    s_nop 0
-; GFX-940-NEXT:    v_cndmask_b32_e32 v1, v2, v3, vcc
-; GFX-940-NEXT:    v_perm_b32 v0, v1, v0, s0
-; GFX-940-NEXT:    ; return to shader part epilog
+; GFX-942-LABEL: fptrunc_f32_f32_to_v2bf16:
+; GFX-942:       ; %bb.0: ; %entry
+; GFX-942-NEXT:    v_bfe_u32 v2, v0, 16, 1
+; GFX-942-NEXT:    s_movk_i32 s0, 0x7fff
+; GFX-942-NEXT:    v_add3_u32 v2, v2, v0, s0
+; GFX-942-NEXT:    v_or_b32_e32 v3, 0x400000, v0
+; GFX-942-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX-942-NEXT:    s_nop 1
+; GFX-942-NEXT:    v_cndmask_b32_e32 v0, v2, v3, vcc
+; GFX-942-NEXT:    v_bfe_u32 v2, v1, 16, 1
+; GFX-942-NEXT:    v_add3_u32 v2, v2, v1, s0
+; GFX-942-NEXT:    v_or_b32_e32 v3, 0x400000, v1
+; GFX-942-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX-942-NEXT:    s_mov_b32 s0, 0x7060302
+; GFX-942-NEXT:    s_nop 0
+; GFX-942-NEXT:    v_cndmask_b32_e32 v1, v2, v3, vcc
+; GFX-942-NEXT:    v_perm_b32 v0, v1, v0, s0
+; GFX-942-NEXT:    ; return to shader part epilog
 ;
 ; GFX-950-LABEL: fptrunc_f32_f32_to_v2bf16:
 ; GFX-950:       ; %bb.0: ; %entry
@@ -196,26 +196,26 @@ entry:
 }
 
 define amdgpu_ps float @fptrunc_f32_f32_to_v2bf16_mods(float %a, float %b) {
-; GFX-940-LABEL: fptrunc_f32_f32_to_v2bf16_mods:
-; GFX-940:       ; %bb.0: ; %entry
-; GFX-940-NEXT:    v_xor_b32_e32 v2, 0x80000000, v0
-; GFX-940-NEXT:    v_bfe_u32 v3, v2, 16, 1
-; GFX-940-NEXT:    s_movk_i32 s0, 0x7fff
-; GFX-940-NEXT:    v_add3_u32 v3, v3, v2, s0
-; GFX-940-NEXT:    v_or_b32_e32 v2, 0x400000, v2
-; GFX-940-NEXT:    v_cmp_u_f32_e64 vcc, -v0, -v0
-; GFX-940-NEXT:    s_nop 1
-; GFX-940-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX-940-NEXT:    v_and_b32_e32 v2, 0x7fffffff, v1
-; GFX-940-NEXT:    v_bfe_u32 v3, v2, 16, 1
-; GFX-940-NEXT:    v_add3_u32 v3, v3, v2, s0
-; GFX-940-NEXT:    v_or_b32_e32 v2, 0x400000, v2
-; GFX-940-NEXT:    v_cmp_u_f32_e64 vcc, |v1|, |v1|
-; GFX-940-NEXT:    s_mov_b32 s0, 0x7060302
-; GFX-940-NEXT:    s_nop 0
-; GFX-940-NEXT:    v_cndmask_b32_e32 v1, v3, v2, vcc
-; GFX-940-NEXT:    v_perm_b32 v0, v1, v0, s0
-; GFX-940-NEXT:    ; return to shader part epilog
+; GFX-942-LABEL: fptrunc_f32_f32_to_v2bf16_mods:
+; GFX-942:       ; %bb.0: ; %entry
+; GFX-942-NEXT:    v_xor_b32_e32 v2, 0x80000000, v0
+; GFX-942-NEXT:    v_bfe_u32 v3, v2, 16, 1
+; GFX-942-NEXT:    s_movk_i32 s0, 0x7fff
+; GFX-942-NEXT:    v_add3_u32 v3, v3, v2, s0
+; GFX-942-NEXT:    v_or_b32_e32 v2, 0x400000, v2
+; GFX-942-NEXT:    v_cmp_u_f32_e64 vcc, -v0, -v0
+; GFX-942-NEXT:    s_nop 1
+; GFX-942-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX-942-NEXT:    v_and_b32_e32 v2, 0x7fffffff, v1
+; GFX-942-NEXT:    v_bfe_u32 v3, v2, 16, 1
+; GFX-942-NEXT:    v_add3_u32 v3, v3, v2, s0
+; GFX-942-NEXT:    v_or_b32_e32 v2, 0x400000, v2
+; GFX-942-NEXT:    v_cmp_u_f32_e64 vcc, |v1|, |v1|
+; GFX-942-NEXT:    s_mov_b32 s0, 0x7060302
+; GFX-942-NEXT:    s_nop 0
+; GFX-942-NEXT:    v_cndmask_b32_e32 v1, v3, v2, vcc
+; GFX-942-NEXT:    v_perm_b32 v0, v1, v0, s0
+; GFX-942-NEXT:    ; return to shader part epilog
 ;
 ; GFX-950-LABEL: fptrunc_f32_f32_to_v2bf16_mods:
 ; GFX-950:       ; %bb.0: ; %entry
@@ -233,19 +233,19 @@ entry:
 }
 
 define amdgpu_ps void @fptrunc_f32_to_bf16(float %a, ptr %out) {
-; GFX-940-LABEL: fptrunc_f32_to_bf16:
-; GFX-940:       ; %bb.0: ; %entry
-; GFX-940-NEXT:    v_mov_b32_e32 v3, v2
-; GFX-940-NEXT:    v_mov_b32_e32 v2, v1
-; GFX-940-NEXT:    v_bfe_u32 v1, v0, 16, 1
-; GFX-940-NEXT:    s_movk_i32 s0, 0x7fff
-; GFX-940-NEXT:    v_add3_u32 v1, v1, v0, s0
-; GFX-940-NEXT:    v_or_b32_e32 v4, 0x400000, v0
-; GFX-940-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
-; GFX-940-NEXT:    s_nop 1
-; GFX-940-NEXT:    v_cndmask_b32_e32 v0, v1, v4, vcc
-; GFX-940-NEXT:    flat_store_short_d16_hi v[2:3], v0 sc0 sc1
-; GFX-940-NEXT:    s_endpgm
+; GFX-942-LABEL: fptrunc_f32_to_bf16:
+; GFX-942:       ; %bb.0: ; %entry
+; GFX-942-NEXT:    v_mov_b32_e32 v3, v2
+; GFX-942-NEXT:    v_mov_b32_e32 v2, v1
+; GFX-942-NEXT:    v_bfe_u32 v1, v0, 16, 1
+; GFX-942-NEXT:    s_movk_i32 s0, 0x7fff
+; GFX-942-NEXT:    v_add3_u32 v1, v1, v0, s0
+; GFX-942-NEXT:    v_or_b32_e32 v4, 0x400000, v0
+; GFX-942-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX-942-NEXT:    s_nop 1
+; GFX-942-NEXT:    v_cndmask_b32_e32 v0, v1, v4, vcc
+; GFX-942-NEXT:    flat_store_short_d16_hi v[2:3], v0 sc0 sc1
+; GFX-942-NEXT:    s_endpgm
 ;
 ; GFX-950-LABEL: fptrunc_f32_to_bf16:
 ; GFX-950:       ; %bb.0: ; %entry
@@ -261,20 +261,20 @@ entry:
 }
 
 define amdgpu_ps void @fptrunc_f32_to_bf16_abs(float %a, ptr %out) {
-; GFX-940-LABEL: fptrunc_f32_to_bf16_abs:
-; GFX-940:       ; %bb.0: ; %entry
-; GFX-940-NEXT:    v_mov_b32_e32 v3, v2
-; GFX-940-NEXT:    v_mov_b32_e32 v2, v1
-; GFX-940-NEXT:    v_and_b32_e32 v1, 0x7fffffff, v0
-; GFX-940-NEXT:    v_bfe_u32 v4, v1, 16, 1
-; GFX-940-NEXT:    s_movk_i32 s0, 0x7fff
-; GFX-940-NEXT:    v_add3_u32 v4, v4, v1, s0
-; GFX-940-NEXT:    v_or_b32_e32 v1, 0x400000, v1
-; GFX-940-NEXT:    v_cmp_u_f32_e64 vcc, |v0|, |v0|
-; GFX-940-NEXT:    s_nop 1
-; GFX-940-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
-; GFX-940-NEXT:    flat_store_short_d16_hi v[2:3], v0 sc0 sc1
-; GFX-940-NEXT:    s_endpgm
+; GFX-942-LABEL: fptrunc_f32_to_bf16_abs:
+; GFX-942:       ; %bb.0: ; %entry
+; GFX-942-NEXT:    v_mov_b32_e32 v3, v2
+; GFX-942-NEXT:    v_mov_b32_e32 v2, v1
+; GFX-942-NEXT:    v_and_b32_e32 v1, 0x7fffffff, v0
+; GFX-942-NEXT:    v_bfe_u32 v4, v1, 16, 1
+; GFX-942-NEXT:    s_movk_i32 s0, 0x7fff
+; GFX-942-NEXT:    v_add3_u32 v4, v4, v1, s0
+; GFX-942-NEXT:    v_or_b32_e32 v1, 0x400000, v1
+; GFX-942-NEXT:    v_cmp_u_f32_e64 vcc, |v0|, |v0|
+; GFX-942-NEXT:    s_nop 1
+; GFX-942-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
+; GFX-942-NEXT:    flat_store_short_d16_hi v[2:3], v0 sc0 sc1
+; GFX-942-NEXT:    s_endpgm
 ;
 ; GFX-950-LABEL: fptrunc_f32_to_bf16_abs:
 ; GFX-950:       ; %bb.0: ; %entry
@@ -291,20 +291,20 @@ entry:
 }
 
 define amdgpu_ps void @fptrunc_f32_to_bf16_neg(float %a, ptr %out) {
-; GFX-940-LABEL: fptrunc_f32_to_bf16_neg:
-; GFX-940:       ; %bb.0: ; %entry
-; GFX-940-NEXT:    v_mov_b32_e32 v3, v2
-; GFX-940-NEXT:    v_mov_b32_e32 v2, v1
-; GFX-940-NEXT:    v_xor_b32_e32 v1, 0x80000000, v0
-; GFX-940-NEXT:    v_bfe_u32 v4, v1, 16, 1
-; GFX-940-NEXT:    s_movk_i32 s0, 0x7fff
-; GFX-940-NEXT:    v_add3_u32 v4, v4, v1, s0
-; GFX-940-NEXT:    v_or_b32_e32 v1, 0x400000, v1
-; GFX-940-NEXT:    v_cmp_u_f32_e64 vcc, -v0, -v0
-; GFX-940-NEXT:    s_nop 1
-; GFX-940-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
-; GFX-940-NEXT:    flat_store_short_d16_hi v[2:3], v0 sc0 sc1
-; GFX-940-NEXT:    s_endpgm
+; GFX-942-LABEL: fptrunc_f32_to_bf16_neg:
+; GFX-942:       ; %bb.0: ; %entry
+; GFX-942-NEXT:    v_mov_b32_e32 v3, v2
+; GFX-942-NEXT:    v_mov_b32_e32 v2, v1
+; GFX-942-NEXT:    v_xor_b32_e32 v1, 0x80000000, v0
+; GFX-942-NEXT:    v_bfe_u32 v4, v1, 16, 1
+; GFX-942-NEXT:    s_movk_i32 s0, 0x7fff
+; GFX-942-NEXT:    v_add3_u32 v4, v4, v1, s0
+; GFX-942-NEXT:    v_or_b32_e32 v1, 0x400000, v1
+; GFX-942-NEXT:    v_cmp_u_f32_e64 vcc, -v0, -v0
+; GFX-942-NEXT:    s_nop 1
+; GFX-942-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
+; GFX-942-NEXT:    flat_store_short_d16_hi v[2:3], v0 sc0 sc1
+; GFX-942-NEXT:    s_endpgm
 ;
 ; GFX-950-LABEL: fptrunc_f32_to_bf16_neg:
 ; GFX-950:       ; %bb.0: ; %entry
@@ -321,29 +321,29 @@ entry:
 }
 
 define amdgpu_ps void @fptrunc_f64_to_bf16(double %a, ptr %out) {
-; GFX-940-LABEL: fptrunc_f64_to_bf16:
-; GFX-940:       ; %bb.0: ; %entry
-; GFX-940-NEXT:    v_cvt_f32_f64_e64 v6, |v[0:1]|
-; GFX-940-NEXT:    v_cvt_f64_f32_e32 v[4:5], v6
-; GFX-940-NEXT:    v_and_b32_e32 v7, 1, v6
-; GFX-940-NEXT:    v_cmp_gt_f64_e64 s[2:3], |v[0:1]|, v[4:5]
-; GFX-940-NEXT:    v_cmp_nlg_f64_e64 s[0:1], |v[0:1]|, v[4:5]
-; GFX-940-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v7
-; GFX-940-NEXT:    v_cndmask_b32_e64 v4, -1, 1, s[2:3]
-; GFX-940-NEXT:    v_add_u32_e32 v4, v6, v4
-; GFX-940-NEXT:    s_or_b64 vcc, s[0:1], vcc
-; GFX-940-NEXT:    v_cndmask_b32_e32 v4, v4, v6, vcc
-; GFX-940-NEXT:    s_brev_b32 s0, 1
-; GFX-940-NEXT:    v_and_or_b32 v5, v1, s0, v4
-; GFX-940-NEXT:    v_bfe_u32 v4, v4, 16, 1
-; GFX-940-NEXT:    s_movk_i32 s0, 0x7fff
-; GFX-940-NEXT:    v_add3_u32 v4, v4, v5, s0
-; GFX-940-NEXT:    v_or_b32_e32 v5, 0x400000, v5
-; GFX-940-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
-; GFX-940-NEXT:    s_nop 1
-; GFX-940-NEXT:    v_cndmask_b32_e32 v0, v4, v5, vcc
-; GFX-940-NEXT:    flat_store_short_d16_hi v[2:3], v0 sc0 sc1
-; GFX-940-NEXT:    s_endpgm
+; GFX-942-LABEL: fptrunc_f64_to_bf16:
+; GFX-942:       ; %bb.0: ; %entry
+; GFX-942-NEXT:    v_cvt_f32_f64_e64 v6, |v[0:1]|
+; GFX-942-NEXT:    v_cvt_f64_f32_e32 v[4:5], v6
+; GFX-942-NEXT:    v_and_b32_e32 v7, 1, v6
+; GFX-942-NEXT:    v_cmp_gt_f64_e64 s[2:3], |v[0:1]|, v[4:5]
+; GFX-942-NEXT:    v_cmp_nlg_f64_e64 s[0:1], |v[0:1]|, v[4:5]
+; GFX-942-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v7
+; GFX-942-NEXT:    v_cndmask_b32_e64 v4, -1, 1, s[2:3]
+; GFX-942-NEXT:    v_add_u32_e32 v4, v6, v4
+; GFX-942-NEXT:    s_or_b64 vcc, s[0:1], vcc
+; GFX-942-NEXT:    v_cndmask_b32_e32 v4, v4, v6, vcc
+; GFX-942-NEXT:    s_brev_b32 s0, 1
+; GFX-942-NEXT:    v_and_or_b32 v5, v1, s0, v4
+; GFX-942-NEXT:    v_bfe_u32 v4, v4, 16, 1
+; GFX-942-NEXT:    s_movk_i32 s0, 0x7fff
+; GFX-942-NEXT:    v_add3_u32 v4, v4, v5, s0
+; GFX-942-NEXT:    v_or_b32_e32 v5, 0x400000, v5
+; GFX-942-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX-942-NEXT:    s_nop 1
+; GFX-942-NEXT:    v_cndmask_b32_e32 v0, v4, v5, vcc
+; GFX-942-NEXT:    flat_store_short_d16_hi v[2:3], v0 sc0 sc1
+; GFX-942-NEXT:    s_endpgm
 ;
 ; GFX-950-LABEL: fptrunc_f64_to_bf16:
 ; GFX-950:       ; %bb.0: ; %entry
@@ -358,30 +358,30 @@ entry:
 }
 
 define amdgpu_ps void @fptrunc_f64_to_bf16_neg(double %a, ptr %out) {
-; GFX-940-LABEL: fptrunc_f64_to_bf16_neg:
-; GFX-940:       ; %bb.0: ; %entry
-; GFX-940-NEXT:    v_cvt_f32_f64_e64 v7, |v[0:1]|
-; GFX-940-NEXT:    v_cvt_f64_f32_e32 v[4:5], v7
-; GFX-940-NEXT:    v_and_b32_e32 v8, 1, v7
-; GFX-940-NEXT:    v_cmp_gt_f64_e64 s[2:3], |v[0:1]|, v[4:5]
-; GFX-940-NEXT:    v_cmp_nlg_f64_e64 s[0:1], |v[0:1]|, v[4:5]
-; GFX-940-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v8
-; GFX-940-NEXT:    v_cndmask_b32_e64 v4, -1, 1, s[2:3]
-; GFX-940-NEXT:    v_add_u32_e32 v4, v7, v4
-; GFX-940-NEXT:    s_or_b64 vcc, s[0:1], vcc
-; GFX-940-NEXT:    s_brev_b32 s4, 1
-; GFX-940-NEXT:    v_xor_b32_e32 v6, 0x80000000, v1
-; GFX-940-NEXT:    v_cndmask_b32_e32 v4, v4, v7, vcc
-; GFX-940-NEXT:    v_and_or_b32 v5, v6, s4, v4
-; GFX-940-NEXT:    v_bfe_u32 v4, v4, 16, 1
-; GFX-940-NEXT:    s_movk_i32 s0, 0x7fff
-; GFX-940-NEXT:    v_add3_u32 v4, v4, v5, s0
-; GFX-940-NEXT:    v_or_b32_e32 v5, 0x400000, v5
-; GFX-940-NEXT:    v_cmp_u_f64_e64 vcc, -v[0:1], -v[0:1]
-; GFX-940-NEXT:    s_nop 1
-; GFX-940-NEXT:    v_cndmask_b32_e32 v0, v4, v5, vcc
-; GFX-940-NEXT:    flat_store_short_d16_hi v[2:3], v0 sc0 sc1
-; GFX-940-NEXT:    s_endpgm
+; GFX-942-LABEL: fptrunc_f64_to_bf16_neg:
+; GFX-942:       ; %bb.0: ; %entry
+; GFX-942-NEXT:    v_cvt_f32_f64_e64 v7, |v[0:1]|
+; GFX-942-NEXT:    v_cvt_f64_f32_e32 v[4:5], v7
+; GFX-942-NEXT:    v_and_b32_e32 v8, 1, v7
+; GFX-942-NEXT:    v_cmp_gt_f64_e64 s[2:3], |v[0:1]|, v[4:5]
+; GFX-942-NEXT:    v_cmp_nlg_f64_e64 s[0:1], |v[0:1]|, v[4:5]
+; GFX-942-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v8
+; GFX-942-NEXT:    v_cndmask_b32_e64 v4, -1, 1, s[2:3]
+; GFX-942-NEXT:    v_add_u32_e32 v4, v7, v4
+; GFX-942-NEXT:    s_or_b64 vcc, s[0:1], vcc
+; GFX-942-NEXT:    s_brev_b32 s4, 1
+; GFX-942-NEXT:    v_xor_b32_e32 v6, 0x80000000, v1
+; GFX-942-NEXT:    v_cndmask_b32_e32 v4, v4, v7, vcc
+; GFX-942-NEXT:    v_and_or_b32 v5, v6, s4, v4
+; GFX-942-NEXT:    v_bfe_u32 v4, v4, 16, 1
+; GFX-942-NEXT:    s_movk_i32 s0, 0x7fff
+; GFX-942-NEXT:    v_add3_u32 v4, v4, v5, s0
+; GFX-942-NEXT:    v_or_b32_e32 v5, 0x400000, v5
+; GFX-942-NEXT:    v_cmp_u_f64_e64 vcc, -v[0:1], -v[0:1]
+; GFX-942-NEXT:    s_nop 1
+; GFX-942-NEXT:    v_cndmask_b32_e32 v0, v4, v5, vcc
+; GFX-942-NEXT:    flat_store_short_d16_hi v[2:3], v0 sc0 sc1
+; GFX-942-NEXT:    s_endpgm
 ;
 ; GFX-950-LABEL: fptrunc_f64_to_bf16_neg:
 ; GFX-950:       ; %bb.0: ; %entry
@@ -397,30 +397,30 @@ entry:
 }
 
 define amdgpu_ps void @fptrunc_f64_to_bf16_abs(double %a, ptr %out) {
-; GFX-940-LABEL: fptrunc_f64_to_bf16_abs:
-; GFX-940:       ; %bb.0: ; %entry
-; GFX-940-NEXT:    v_cvt_f32_f64_e64 v7, |v[0:1]|
-; GFX-940-NEXT:    v_cvt_f64_f32_e32 v[4:5], v7
-; GFX-940-NEXT:    v_and_b32_e32 v8, 1, v7
-; GFX-940-NEXT:    v_cmp_gt_f64_e64 s[2:3], |v[0:1]|, v[4:5]
-; GFX-940-NEXT:    v_cmp_nlg_f64_e64 s[0:1], |v[0:1]|, v[4:5]
-; GFX-940-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v8
-; GFX-940-NEXT:    v_cndmask_b32_e64 v4, -1, 1, s[2:3]
-; GFX-940-NEXT:    v_add_u32_e32 v4, v7, v4
-; GFX-940-NEXT:    s_or_b64 vcc, s[0:1], vcc
-; GFX-940-NEXT:    v_and_b32_e32 v6, 0x7fffffff, v1
-; GFX-940-NEXT:    v_cndmask_b32_e32 v4, v4, v7, vcc
-; GFX-940-NEXT:    s_brev_b32 s0, 1
-; GFX-940-NEXT:    v_and_or_b32 v5, v6, s0, v4
-; GFX-940-NEXT:    v_bfe_u32 v4, v4, 16, 1
-; GFX-940-NEXT:    s_movk_i32 s0, 0x7fff
-; GFX-940-NEXT:    v_add3_u32 v4, v4, v5, s0
-; GFX-940-NEXT:    v_or_b32_e32 v5, 0x400000, v5
-; GFX-940-NEXT:    v_cmp_u_f64_e64 vcc, |v[0:1]|, |v[0:1]|
-; GFX-940-NEXT:    s_nop 1
-; GFX-940-NEXT:    v_cndmask_b32_e32 v0, v4, v5, vcc
-; GFX-940-NEXT:    flat_store_short_d16_hi v[2:3], v0 sc0 sc1
-; GFX-940-NEXT:    s_endpgm
+; GFX-942-LABEL: fptrunc_f64_to_bf16_abs:
+; GFX-942:       ; %bb.0: ; %entry
+; GFX-942-NEXT:    v_cvt_f32_f64_e64 v7, |v[0:1]|
+; GFX-942-NEXT:    v_cvt_f64_f32_e32 v[4:5], v7
+; GFX-942-NEXT:    v_and_b32_e32 v8, 1, v7
+; GFX-942-NEXT:    v_cmp_gt_f64_e64 s[2:3], |v[0:1]|, v[4:5]
+; GFX-942-NEXT:    v_cmp_nlg_f64_e64 s[0:1], |v[0:1]|, v[4:5]
+; GFX-942-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v8
+; GFX-942-NEXT:    v_cndmask_b32_e64 v4, -1, 1, s[2:3]
+; GFX-942-NEXT:    v_add_u32_e32 v4, v7, v4
+; GFX-942-NEXT:    s_or_b64 vcc, s[0:1], vcc
+; GFX-942-NEXT:    v_and_b32_e32 v6, 0x7fffffff, v1
+; GFX-942-NEXT:    v_cndmask_b32_e32 v4, v4, v7, vcc
+; GFX-942-NEXT:    s_brev_b32 s0, 1
+; GFX-942-NEXT:    v_and_or_b32 v5, v6, s0, v4
+; GFX-942-NEXT:    v_bfe_u32 v4, v4, 16, 1
+; GFX-942-NEXT:    s_movk_i32 s0, 0x7fff
+; GFX-942-NEXT:    v_add3_u32 v4, v4, v5, s0
+; GFX-942-NEXT:    v_or_b32_e32 v5, 0x400000, v5
+; GFX-942-NEXT:    v_cmp_u_f64_e64 vcc, |v[0:1]|, |v[0:1]|
+; GFX-942-NEXT:    s_nop 1
+; GFX-942-NEXT:    v_cndmask_b32_e32 v0, v4, v5, vcc
+; GFX-942-NEXT:    flat_store_short_d16_hi v[2:3], v0 sc0 sc1
+; GFX-942-NEXT:    s_endpgm
 ;
 ; GFX-950-LABEL: fptrunc_f64_to_bf16_abs:
 ; GFX-950:       ; %bb.0: ; %entry
diff --git llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.f32-no-rtn.ll llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.f32-no-rtn.ll
index 85a701b23a6c..a14114358433 100644
--- llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.f32-no-rtn.ll
+++ llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.f32-no-rtn.ll
@@ -1,11 +1,11 @@
 ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx908 -verify-machineinstrs -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX908_GFX11 %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX940 %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx940 -verify-machineinstrs -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX940 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX942 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -verify-machineinstrs -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX942 %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX908_GFX11 %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx908 -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX908_GFX11 %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX940 %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx940 -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX940 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX942 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX942 %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX908_GFX11 %s
 
 define amdgpu_ps void @buffer_atomic_fadd_f32_offset_no_rtn(float %val, <4 x i32> inreg %rsrc, i32 inreg %soffset) {
@@ -23,19 +23,19 @@ define amdgpu_ps void @buffer_atomic_fadd_f32_offset_no_rtn(float %val, <4 x i32
   ; GFX908_GFX11-NEXT:   BUFFER_ATOMIC_ADD_F32_OFFSET [[COPY5]], killed [[REG_SEQUENCE]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
   ; GFX908_GFX11-NEXT:   S_ENDPGM 0
   ;
-  ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_f32_offset_no_rtn
-  ; GFX90A_GFX940: bb.0 (%ir-block.0):
-  ; GFX90A_GFX940-NEXT:   liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4
-  ; GFX90A_GFX940-NEXT: {{  $}}
-  ; GFX90A_GFX940-NEXT:   [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
-  ; GFX90A_GFX940-NEXT:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr3
-  ; GFX90A_GFX940-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY1]], %subreg.sub3
-  ; GFX90A_GFX940-NEXT:   BUFFER_ATOMIC_ADD_F32_OFFSET [[COPY5]], killed [[REG_SEQUENCE]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
-  ; GFX90A_GFX940-NEXT:   S_ENDPGM 0
+  ; GFX90A_GFX942-LABEL: name: buffer_atomic_fadd_f32_offset_no_rtn
+  ; GFX90A_GFX942: bb.0 (%ir-block.0):
+  ; GFX90A_GFX942-NEXT:   liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4
+  ; GFX90A_GFX942-NEXT: {{  $}}
+  ; GFX90A_GFX942-NEXT:   [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+  ; GFX90A_GFX942-NEXT:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+  ; GFX90A_GFX942-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY1]], %subreg.sub3
+  ; GFX90A_GFX942-NEXT:   BUFFER_ATOMIC_ADD_F32_OFFSET [[COPY5]], killed [[REG_SEQUENCE]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
+  ; GFX90A_GFX942-NEXT:   S_ENDPGM 0
   %ret = call float @llvm.amdgcn.raw.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 0, i32 %soffset, i32 0)
   ret void
 }
@@ -56,20 +56,20 @@ define amdgpu_ps void @buffer_atomic_fadd_f32_offen_no_rtn(float %val, <4 x i32>
   ; GFX908_GFX11-NEXT:   BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
   ; GFX908_GFX11-NEXT:   S_ENDPGM 0
   ;
-  ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_f32_offen_no_rtn
-  ; GFX90A_GFX940: bb.0 (%ir-block.0):
-  ; GFX90A_GFX940-NEXT:   liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4
-  ; GFX90A_GFX940-NEXT: {{  $}}
-  ; GFX90A_GFX940-NEXT:   [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
-  ; GFX90A_GFX940-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
-  ; GFX90A_GFX940-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3
-  ; GFX90A_GFX940-NEXT:   BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
-  ; GFX90A_GFX940-NEXT:   S_ENDPGM 0
+  ; GFX90A_GFX942-LABEL: name: buffer_atomic_fadd_f32_offen_no_rtn
+  ; GFX90A_GFX942: bb.0 (%ir-block.0):
+  ; GFX90A_GFX942-NEXT:   liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4
+  ; GFX90A_GFX942-NEXT: {{  $}}
+  ; GFX90A_GFX942-NEXT:   [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+  ; GFX90A_GFX942-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+  ; GFX90A_GFX942-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3
+  ; GFX90A_GFX942-NEXT:   BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
+  ; GFX90A_GFX942-NEXT:   S_ENDPGM 0
   %ret = call float @llvm.amdgcn.raw.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
   ret void
 }
@@ -90,20 +90,20 @@ define amdgpu_ps void @buffer_atomic_fadd_f32_idxen_no_rtn(float %val, <4 x i32>
   ; GFX908_GFX11-NEXT:   BUFFER_ATOMIC_ADD_F32_IDXEN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
   ; GFX908_GFX11-NEXT:   S_ENDPGM 0
   ;
-  ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_f32_idxen_no_rtn
-  ; GFX90A_GFX940: bb.0 (%ir-block.0):
-  ; GFX90A_GFX940-NEXT:   liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4
-  ; GFX90A_GFX940-NEXT: {{  $}}
-  ; GFX90A_GFX940-NEXT:   [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
-  ; GFX90A_GFX940-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
-  ; GFX90A_GFX940-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3
-  ; GFX90A_GFX940-NEXT:   BUFFER_ATOMIC_ADD_F32_IDXEN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
-  ; GFX90A_GFX940-NEXT:   S_ENDPGM 0
+  ; GFX90A_GFX942-LABEL: name: buffer_atomic_fadd_f32_idxen_no_rtn
+  ; GFX90A_GFX942: bb.0 (%ir-block.0):
+  ; GFX90A_GFX942-NEXT:   liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4
+  ; GFX90A_GFX942-NEXT: {{  $}}
+  ; GFX90A_GFX942-NEXT:   [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+  ; GFX90A_GFX942-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+  ; GFX90A_GFX942-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3
+  ; GFX90A_GFX942-NEXT:   BUFFER_ATOMIC_ADD_F32_IDXEN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
+  ; GFX90A_GFX942-NEXT:   S_ENDPGM 0
   %ret = call float @llvm.amdgcn.struct.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0)
   ret void
 }
@@ -126,22 +126,22 @@ define amdgpu_ps void @buffer_atomic_fadd_f32_bothen_no_rtn(float %val, <4 x i32
   ; GFX908_GFX11-NEXT:   BUFFER_ATOMIC_ADD_F32_BOTHEN [[COPY7]], killed [[REG_SEQUENCE1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
   ; GFX908_GFX11-NEXT:   S_ENDPGM 0
   ;
-  ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_f32_bothen_no_rtn
-  ; GFX90A_GFX940: bb.0 (%ir-block.0):
-  ; GFX90A_GFX940-NEXT:   liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $vgpr2, $sgpr4
-  ; GFX90A_GFX940-NEXT: {{  $}}
-  ; GFX90A_GFX940-NEXT:   [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
-  ; GFX90A_GFX940-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr3
-  ; GFX90A_GFX940-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY3]], %subreg.sub3
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
-  ; GFX90A_GFX940-NEXT:   BUFFER_ATOMIC_ADD_F32_BOTHEN [[COPY7]], killed [[REG_SEQUENCE1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
-  ; GFX90A_GFX940-NEXT:   S_ENDPGM 0
+  ; GFX90A_GFX942-LABEL: name: buffer_atomic_fadd_f32_bothen_no_rtn
+  ; GFX90A_GFX942: bb.0 (%ir-block.0):
+  ; GFX90A_GFX942-NEXT:   liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $vgpr2, $sgpr4
+  ; GFX90A_GFX942-NEXT: {{  $}}
+  ; GFX90A_GFX942-NEXT:   [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+  ; GFX90A_GFX942-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+  ; GFX90A_GFX942-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY3]], %subreg.sub3
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
+  ; GFX90A_GFX942-NEXT:   BUFFER_ATOMIC_ADD_F32_BOTHEN [[COPY7]], killed [[REG_SEQUENCE1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
+  ; GFX90A_GFX942-NEXT:   S_ENDPGM 0
   %ret = call float @llvm.amdgcn.struct.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 2)
   ret void
 }
@@ -167,25 +167,25 @@ define amdgpu_ps void @buffer_ptr_atomic_fadd_f32_offset_no_rtn(float %val, ptr
   ; GFX908_GFX11-NEXT:   BUFFER_ATOMIC_ADD_F32_OFFSET [[COPY5]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
   ; GFX908_GFX11-NEXT:   S_ENDPGM 0
   ;
-  ; GFX90A_GFX940-LABEL: name: buffer_ptr_atomic_fadd_f32_offset_no_rtn
-  ; GFX90A_GFX940: bb.0 (%ir-block.0):
-  ; GFX90A_GFX940-NEXT:   liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4
-  ; GFX90A_GFX940-NEXT: {{  $}}
-  ; GFX90A_GFX940-NEXT:   [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
-  ; GFX90A_GFX940-NEXT:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr3
-  ; GFX90A_GFX940-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY6:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY9]], %subreg.sub0, killed [[COPY8]], %subreg.sub1, killed [[COPY7]], %subreg.sub2, killed [[COPY6]], %subreg.sub3
-  ; GFX90A_GFX940-NEXT:   BUFFER_ATOMIC_ADD_F32_OFFSET [[COPY5]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
-  ; GFX90A_GFX940-NEXT:   S_ENDPGM 0
+  ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_f32_offset_no_rtn
+  ; GFX90A_GFX942: bb.0 (%ir-block.0):
+  ; GFX90A_GFX942-NEXT:   liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4
+  ; GFX90A_GFX942-NEXT: {{  $}}
+  ; GFX90A_GFX942-NEXT:   [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+  ; GFX90A_GFX942-NEXT:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+  ; GFX90A_GFX942-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY6:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY9]], %subreg.sub0, killed [[COPY8]], %subreg.sub1, killed [[COPY7]], %subreg.sub2, killed [[COPY6]], %subreg.sub3
+  ; GFX90A_GFX942-NEXT:   BUFFER_ATOMIC_ADD_F32_OFFSET [[COPY5]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
+  ; GFX90A_GFX942-NEXT:   S_ENDPGM 0
   %ret = call float @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f32(float %val, ptr addrspace(8) %rsrc, i32 0, i32 %soffset, i32 0)
   ret void
 }
@@ -212,26 +212,26 @@ define amdgpu_ps void @buffer_ptr_atomic_fadd_f32_offen_no_rtn(float %val, ptr a
   ; GFX908_GFX11-NEXT:   BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
   ; GFX908_GFX11-NEXT:   S_ENDPGM 0
   ;
-  ; GFX90A_GFX940-LABEL: name: buffer_ptr_atomic_fadd_f32_offen_no_rtn
-  ; GFX90A_GFX940: bb.0 (%ir-block.0):
-  ; GFX90A_GFX940-NEXT:   liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4
-  ; GFX90A_GFX940-NEXT: {{  $}}
-  ; GFX90A_GFX940-NEXT:   [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
-  ; GFX90A_GFX940-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
-  ; GFX90A_GFX940-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY10]], %subreg.sub0, killed [[COPY9]], %subreg.sub1, killed [[COPY8]], %subreg.sub2, killed [[COPY7]], %subreg.sub3
-  ; GFX90A_GFX940-NEXT:   BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
-  ; GFX90A_GFX940-NEXT:   S_ENDPGM 0
+  ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_f32_offen_no_rtn
+  ; GFX90A_GFX942: bb.0 (%ir-block.0):
+  ; GFX90A_GFX942-NEXT:   liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4
+  ; GFX90A_GFX942-NEXT: {{  $}}
+  ; GFX90A_GFX942-NEXT:   [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+  ; GFX90A_GFX942-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+  ; GFX90A_GFX942-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY10]], %subreg.sub0, killed [[COPY9]], %subreg.sub1, killed [[COPY8]], %subreg.sub2, killed [[COPY7]], %subreg.sub3
+  ; GFX90A_GFX942-NEXT:   BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
+  ; GFX90A_GFX942-NEXT:   S_ENDPGM 0
   %ret = call float @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f32(float %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0)
   ret void
 }
@@ -258,26 +258,26 @@ define amdgpu_ps void @buffer_ptr_atomic_fadd_f32_idxen_no_rtn(float %val, ptr a
   ; GFX908_GFX11-NEXT:   BUFFER_ATOMIC_ADD_F32_IDXEN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
   ; GFX908_GFX11-NEXT:   S_ENDPGM 0
   ;
-  ; GFX90A_GFX940-LABEL: name: buffer_ptr_atomic_fadd_f32_idxen_no_rtn
-  ; GFX90A_GFX940: bb.0 (%ir-block.0):
-  ; GFX90A_GFX940-NEXT:   liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4
-  ; GFX90A_GFX940-NEXT: {{  $}}
-  ; GFX90A_GFX940-NEXT:   [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
-  ; GFX90A_GFX940-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
-  ; GFX90A_GFX940-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY10]], %subreg.sub0, killed [[COPY9]], %subreg.sub1, killed [[COPY8]], %subreg.sub2, killed [[COPY7]], %subreg.sub3
-  ; GFX90A_GFX940-NEXT:   BUFFER_ATOMIC_ADD_F32_IDXEN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
-  ; GFX90A_GFX940-NEXT:   S_ENDPGM 0
+  ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_f32_idxen_no_rtn
+  ; GFX90A_GFX942: bb.0 (%ir-block.0):
+  ; GFX90A_GFX942-NEXT:   liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4
+  ; GFX90A_GFX942-NEXT: {{  $}}
+  ; GFX90A_GFX942-NEXT:   [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+  ; GFX90A_GFX942-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+  ; GFX90A_GFX942-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY10]], %subreg.sub0, killed [[COPY9]], %subreg.sub1, killed [[COPY8]], %subreg.sub2, killed [[COPY7]], %subreg.sub3
+  ; GFX90A_GFX942-NEXT:   BUFFER_ATOMIC_ADD_F32_IDXEN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
+  ; GFX90A_GFX942-NEXT:   S_ENDPGM 0
   %ret = call float @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f32(float %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0)
   ret void
 }
@@ -306,28 +306,28 @@ define amdgpu_ps void @buffer_ptr_atomic_fadd_f32_bothen_no_rtn(float %val, ptr
   ; GFX908_GFX11-NEXT:   BUFFER_ATOMIC_ADD_F32_BOTHEN [[COPY7]], killed [[REG_SEQUENCE3]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
   ; GFX908_GFX11-NEXT:   S_ENDPGM 0
   ;
-  ; GFX90A_GFX940-LABEL: name: buffer_ptr_atomic_fadd_f32_bothen_no_rtn
-  ; GFX90A_GFX940: bb.0 (%ir-block.0):
-  ; GFX90A_GFX940-NEXT:   liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $vgpr2, $sgpr4
-  ; GFX90A_GFX940-NEXT: {{  $}}
-  ; GFX90A_GFX940-NEXT:   [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
-  ; GFX90A_GFX940-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr3
-  ; GFX90A_GFX940-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY11:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY11]], %subreg.sub0, killed [[COPY10]], %subreg.sub1, killed [[COPY9]], %subreg.sub2, killed [[COPY8]], %subreg.sub3
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE3:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
-  ; GFX90A_GFX940-NEXT:   BUFFER_ATOMIC_ADD_F32_BOTHEN [[COPY7]], killed [[REG_SEQUENCE3]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
-  ; GFX90A_GFX940-NEXT:   S_ENDPGM 0
+  ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_f32_bothen_no_rtn
+  ; GFX90A_GFX942: bb.0 (%ir-block.0):
+  ; GFX90A_GFX942-NEXT:   liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $vgpr2, $sgpr4
+  ; GFX90A_GFX942-NEXT: {{  $}}
+  ; GFX90A_GFX942-NEXT:   [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+  ; GFX90A_GFX942-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+  ; GFX90A_GFX942-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY11:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY11]], %subreg.sub0, killed [[COPY10]], %subreg.sub1, killed [[COPY9]], %subreg.sub2, killed [[COPY8]], %subreg.sub3
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE3:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
+  ; GFX90A_GFX942-NEXT:   BUFFER_ATOMIC_ADD_F32_BOTHEN [[COPY7]], killed [[REG_SEQUENCE3]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
+  ; GFX90A_GFX942-NEXT:   S_ENDPGM 0
   %ret = call float @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f32(float %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 2)
   ret void
 }
diff --git llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.f32-rtn.ll llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.f32-rtn.ll
index 417dee573c5d..eb452dc4b874 100644
--- llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.f32-rtn.ll
+++ llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.f32-rtn.ll
@@ -1,26 +1,26 @@
 ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX940 %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx940 -verify-machineinstrs -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX940 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX942 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -verify-machineinstrs -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX942 %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX11 %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX940 %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx940 -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX940 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX942 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX942 %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX11 %s
 
 define amdgpu_ps float @buffer_atomic_fadd_f32_offset_rtn(float %val, <4 x i32> inreg %rsrc, i32 inreg %soffset) {
-  ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_f32_offset_rtn
-  ; GFX90A_GFX940: bb.0 (%ir-block.0):
-  ; GFX90A_GFX940-NEXT:   liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4
-  ; GFX90A_GFX940-NEXT: {{  $}}
-  ; GFX90A_GFX940-NEXT:   [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
-  ; GFX90A_GFX940-NEXT:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr3
-  ; GFX90A_GFX940-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY1]], %subreg.sub3
-  ; GFX90A_GFX940-NEXT:   [[BUFFER_ATOMIC_ADD_F32_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFSET_RTN [[COPY5]], killed [[REG_SEQUENCE]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
-  ; GFX90A_GFX940-NEXT:   $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_OFFSET_RTN]]
-  ; GFX90A_GFX940-NEXT:   SI_RETURN_TO_EPILOG $vgpr0
+  ; GFX90A_GFX942-LABEL: name: buffer_atomic_fadd_f32_offset_rtn
+  ; GFX90A_GFX942: bb.0 (%ir-block.0):
+  ; GFX90A_GFX942-NEXT:   liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4
+  ; GFX90A_GFX942-NEXT: {{  $}}
+  ; GFX90A_GFX942-NEXT:   [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+  ; GFX90A_GFX942-NEXT:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+  ; GFX90A_GFX942-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY1]], %subreg.sub3
+  ; GFX90A_GFX942-NEXT:   [[BUFFER_ATOMIC_ADD_F32_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFSET_RTN [[COPY5]], killed [[REG_SEQUENCE]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
+  ; GFX90A_GFX942-NEXT:   $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_OFFSET_RTN]]
+  ; GFX90A_GFX942-NEXT:   SI_RETURN_TO_EPILOG $vgpr0
   ;
   ; GFX11-LABEL: name: buffer_atomic_fadd_f32_offset_rtn
   ; GFX11: bb.0 (%ir-block.0):
@@ -41,21 +41,21 @@ define amdgpu_ps float @buffer_atomic_fadd_f32_offset_rtn(float %val, <4 x i32>
 }
 
 define amdgpu_ps float @buffer_atomic_fadd_f32_offen_rtn(float %val, <4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) {
-  ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_f32_offen_rtn
-  ; GFX90A_GFX940: bb.0 (%ir-block.0):
-  ; GFX90A_GFX940-NEXT:   liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4
-  ; GFX90A_GFX940-NEXT: {{  $}}
-  ; GFX90A_GFX940-NEXT:   [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
-  ; GFX90A_GFX940-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
-  ; GFX90A_GFX940-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3
-  ; GFX90A_GFX940-NEXT:   [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFEN_RTN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
-  ; GFX90A_GFX940-NEXT:   $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN]]
-  ; GFX90A_GFX940-NEXT:   SI_RETURN_TO_EPILOG $vgpr0
+  ; GFX90A_GFX942-LABEL: name: buffer_atomic_fadd_f32_offen_rtn
+  ; GFX90A_GFX942: bb.0 (%ir-block.0):
+  ; GFX90A_GFX942-NEXT:   liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4
+  ; GFX90A_GFX942-NEXT: {{  $}}
+  ; GFX90A_GFX942-NEXT:   [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+  ; GFX90A_GFX942-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+  ; GFX90A_GFX942-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3
+  ; GFX90A_GFX942-NEXT:   [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFEN_RTN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
+  ; GFX90A_GFX942-NEXT:   $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN]]
+  ; GFX90A_GFX942-NEXT:   SI_RETURN_TO_EPILOG $vgpr0
   ;
   ; GFX11-LABEL: name: buffer_atomic_fadd_f32_offen_rtn
   ; GFX11: bb.0 (%ir-block.0):
@@ -77,21 +77,21 @@ define amdgpu_ps float @buffer_atomic_fadd_f32_offen_rtn(float %val, <4 x i32> i
 }
 
 define amdgpu_ps float @buffer_atomic_fadd_f32_idxen_rtn(float %val, <4 x i32> inreg %rsrc, i32 %vindex, i32 inreg %soffset) {
-  ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_f32_idxen_rtn
-  ; GFX90A_GFX940: bb.0 (%ir-block.0):
-  ; GFX90A_GFX940-NEXT:   liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4
-  ; GFX90A_GFX940-NEXT: {{  $}}
-  ; GFX90A_GFX940-NEXT:   [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
-  ; GFX90A_GFX940-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
-  ; GFX90A_GFX940-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3
-  ; GFX90A_GFX940-NEXT:   [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_IDXEN_RTN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
-  ; GFX90A_GFX940-NEXT:   $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN]]
-  ; GFX90A_GFX940-NEXT:   SI_RETURN_TO_EPILOG $vgpr0
+  ; GFX90A_GFX942-LABEL: name: buffer_atomic_fadd_f32_idxen_rtn
+  ; GFX90A_GFX942: bb.0 (%ir-block.0):
+  ; GFX90A_GFX942-NEXT:   liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4
+  ; GFX90A_GFX942-NEXT: {{  $}}
+  ; GFX90A_GFX942-NEXT:   [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+  ; GFX90A_GFX942-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+  ; GFX90A_GFX942-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3
+  ; GFX90A_GFX942-NEXT:   [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_IDXEN_RTN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
+  ; GFX90A_GFX942-NEXT:   $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN]]
+  ; GFX90A_GFX942-NEXT:   SI_RETURN_TO_EPILOG $vgpr0
   ;
   ; GFX11-LABEL: name: buffer_atomic_fadd_f32_idxen_rtn
   ; GFX11: bb.0 (%ir-block.0):
@@ -113,23 +113,23 @@ define amdgpu_ps float @buffer_atomic_fadd_f32_idxen_rtn(float %val, <4 x i32> i
 }
 
 define amdgpu_ps float @buffer_atomic_fadd_f32_bothen_rtn(float %val, <4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) {
-  ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_f32_bothen_rtn
-  ; GFX90A_GFX940: bb.0 (%ir-block.0):
-  ; GFX90A_GFX940-NEXT:   liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $vgpr2, $sgpr4
-  ; GFX90A_GFX940-NEXT: {{  $}}
-  ; GFX90A_GFX940-NEXT:   [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
-  ; GFX90A_GFX940-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr3
-  ; GFX90A_GFX940-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY3]], %subreg.sub3
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
-  ; GFX90A_GFX940-NEXT:   [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN [[COPY7]], killed [[REG_SEQUENCE1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
-  ; GFX90A_GFX940-NEXT:   $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN]]
-  ; GFX90A_GFX940-NEXT:   SI_RETURN_TO_EPILOG $vgpr0
+  ; GFX90A_GFX942-LABEL: name: buffer_atomic_fadd_f32_bothen_rtn
+  ; GFX90A_GFX942: bb.0 (%ir-block.0):
+  ; GFX90A_GFX942-NEXT:   liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $vgpr2, $sgpr4
+  ; GFX90A_GFX942-NEXT: {{  $}}
+  ; GFX90A_GFX942-NEXT:   [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+  ; GFX90A_GFX942-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+  ; GFX90A_GFX942-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY3]], %subreg.sub3
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
+  ; GFX90A_GFX942-NEXT:   [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN [[COPY7]], killed [[REG_SEQUENCE1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
+  ; GFX90A_GFX942-NEXT:   $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN]]
+  ; GFX90A_GFX942-NEXT:   SI_RETURN_TO_EPILOG $vgpr0
   ;
   ; GFX11-LABEL: name: buffer_atomic_fadd_f32_bothen_rtn
   ; GFX11: bb.0 (%ir-block.0):
@@ -153,26 +153,26 @@ define amdgpu_ps float @buffer_atomic_fadd_f32_bothen_rtn(float %val, <4 x i32>
 }
 
 define amdgpu_ps float @buffer_ptr_atomic_fadd_f32_offset_rtn(float %val, ptr addrspace(8) inreg %rsrc, i32 inreg %soffset) {
-  ; GFX90A_GFX940-LABEL: name: buffer_ptr_atomic_fadd_f32_offset_rtn
-  ; GFX90A_GFX940: bb.0 (%ir-block.0):
-  ; GFX90A_GFX940-NEXT:   liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4
-  ; GFX90A_GFX940-NEXT: {{  $}}
-  ; GFX90A_GFX940-NEXT:   [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
-  ; GFX90A_GFX940-NEXT:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr3
-  ; GFX90A_GFX940-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY6:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY9]], %subreg.sub0, killed [[COPY8]], %subreg.sub1, killed [[COPY7]], %subreg.sub2, killed [[COPY6]], %subreg.sub3
-  ; GFX90A_GFX940-NEXT:   [[BUFFER_ATOMIC_ADD_F32_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFSET_RTN [[COPY5]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
-  ; GFX90A_GFX940-NEXT:   $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_OFFSET_RTN]]
-  ; GFX90A_GFX940-NEXT:   SI_RETURN_TO_EPILOG $vgpr0
+  ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_f32_offset_rtn
+  ; GFX90A_GFX942: bb.0 (%ir-block.0):
+  ; GFX90A_GFX942-NEXT:   liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4
+  ; GFX90A_GFX942-NEXT: {{  $}}
+  ; GFX90A_GFX942-NEXT:   [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+  ; GFX90A_GFX942-NEXT:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+  ; GFX90A_GFX942-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY6:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY9]], %subreg.sub0, killed [[COPY8]], %subreg.sub1, killed [[COPY7]], %subreg.sub2, killed [[COPY6]], %subreg.sub3
+  ; GFX90A_GFX942-NEXT:   [[BUFFER_ATOMIC_ADD_F32_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFSET_RTN [[COPY5]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
+  ; GFX90A_GFX942-NEXT:   $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_OFFSET_RTN]]
+  ; GFX90A_GFX942-NEXT:   SI_RETURN_TO_EPILOG $vgpr0
   ;
   ; GFX11-LABEL: name: buffer_ptr_atomic_fadd_f32_offset_rtn
   ; GFX11: bb.0 (%ir-block.0):
@@ -199,27 +199,27 @@ define amdgpu_ps float @buffer_ptr_atomic_fadd_f32_offset_rtn(float %val, ptr ad
 }
 
 define amdgpu_ps float @buffer_ptr_atomic_fadd_f32_offen_rtn(float %val, ptr addrspace(8) inreg %rsrc, i32 %voffset, i32 inreg %soffset) {
-  ; GFX90A_GFX940-LABEL: name: buffer_ptr_atomic_fadd_f32_offen_rtn
-  ; GFX90A_GFX940: bb.0 (%ir-block.0):
-  ; GFX90A_GFX940-NEXT:   liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4
-  ; GFX90A_GFX940-NEXT: {{  $}}
-  ; GFX90A_GFX940-NEXT:   [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
-  ; GFX90A_GFX940-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
-  ; GFX90A_GFX940-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY10]], %subreg.sub0, killed [[COPY9]], %subreg.sub1, killed [[COPY8]], %subreg.sub2, killed [[COPY7]], %subreg.sub3
-  ; GFX90A_GFX940-NEXT:   [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFEN_RTN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
-  ; GFX90A_GFX940-NEXT:   $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN]]
-  ; GFX90A_GFX940-NEXT:   SI_RETURN_TO_EPILOG $vgpr0
+  ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_f32_offen_rtn
+  ; GFX90A_GFX942: bb.0 (%ir-block.0):
+  ; GFX90A_GFX942-NEXT:   liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4
+  ; GFX90A_GFX942-NEXT: {{  $}}
+  ; GFX90A_GFX942-NEXT:   [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+  ; GFX90A_GFX942-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+  ; GFX90A_GFX942-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY10]], %subreg.sub0, killed [[COPY9]], %subreg.sub1, killed [[COPY8]], %subreg.sub2, killed [[COPY7]], %subreg.sub3
+  ; GFX90A_GFX942-NEXT:   [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFEN_RTN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
+  ; GFX90A_GFX942-NEXT:   $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN]]
+  ; GFX90A_GFX942-NEXT:   SI_RETURN_TO_EPILOG $vgpr0
   ;
   ; GFX11-LABEL: name: buffer_ptr_atomic_fadd_f32_offen_rtn
   ; GFX11: bb.0 (%ir-block.0):
@@ -247,27 +247,27 @@ define amdgpu_ps float @buffer_ptr_atomic_fadd_f32_offen_rtn(float %val, ptr add
 }
 
 define amdgpu_ps float @buffer_ptr_atomic_fadd_f32_idxen_rtn(float %val, ptr addrspace(8) inreg %rsrc, i32 %vindex, i32 inreg %soffset) {
-  ; GFX90A_GFX940-LABEL: name: buffer_ptr_atomic_fadd_f32_idxen_rtn
-  ; GFX90A_GFX940: bb.0 (%ir-block.0):
-  ; GFX90A_GFX940-NEXT:   liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4
-  ; GFX90A_GFX940-NEXT: {{  $}}
-  ; GFX90A_GFX940-NEXT:   [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
-  ; GFX90A_GFX940-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
-  ; GFX90A_GFX940-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY10]], %subreg.sub0, killed [[COPY9]], %subreg.sub1, killed [[COPY8]], %subreg.sub2, killed [[COPY7]], %subreg.sub3
-  ; GFX90A_GFX940-NEXT:   [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_IDXEN_RTN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
-  ; GFX90A_GFX940-NEXT:   $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN]]
-  ; GFX90A_GFX940-NEXT:   SI_RETURN_TO_EPILOG $vgpr0
+  ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_f32_idxen_rtn
+  ; GFX90A_GFX942: bb.0 (%ir-block.0):
+  ; GFX90A_GFX942-NEXT:   liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4
+  ; GFX90A_GFX942-NEXT: {{  $}}
+  ; GFX90A_GFX942-NEXT:   [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+  ; GFX90A_GFX942-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+  ; GFX90A_GFX942-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY10]], %subreg.sub0, killed [[COPY9]], %subreg.sub1, killed [[COPY8]], %subreg.sub2, killed [[COPY7]], %subreg.sub3
+  ; GFX90A_GFX942-NEXT:   [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_IDXEN_RTN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
+  ; GFX90A_GFX942-NEXT:   $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN]]
+  ; GFX90A_GFX942-NEXT:   SI_RETURN_TO_EPILOG $vgpr0
   ;
   ; GFX11-LABEL: name: buffer_ptr_atomic_fadd_f32_idxen_rtn
   ; GFX11: bb.0 (%ir-block.0):
@@ -295,29 +295,29 @@ define amdgpu_ps float @buffer_ptr_atomic_fadd_f32_idxen_rtn(float %val, ptr add
 }
 
 define amdgpu_ps float @buffer_ptr_atomic_fadd_f32_bothen_rtn(float %val, ptr addrspace(8) inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) {
-  ; GFX90A_GFX940-LABEL: name: buffer_ptr_atomic_fadd_f32_bothen_rtn
-  ; GFX90A_GFX940: bb.0 (%ir-block.0):
-  ; GFX90A_GFX940-NEXT:   liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $vgpr2, $sgpr4
-  ; GFX90A_GFX940-NEXT: {{  $}}
-  ; GFX90A_GFX940-NEXT:   [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
-  ; GFX90A_GFX940-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr3
-  ; GFX90A_GFX940-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY11:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY11]], %subreg.sub0, killed [[COPY10]], %subreg.sub1, killed [[COPY9]], %subreg.sub2, killed [[COPY8]], %subreg.sub3
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE3:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
-  ; GFX90A_GFX940-NEXT:   [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN [[COPY7]], killed [[REG_SEQUENCE3]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
-  ; GFX90A_GFX940-NEXT:   $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN]]
-  ; GFX90A_GFX940-NEXT:   SI_RETURN_TO_EPILOG $vgpr0
+  ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_f32_bothen_rtn
+  ; GFX90A_GFX942: bb.0 (%ir-block.0):
+  ; GFX90A_GFX942-NEXT:   liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $vgpr2, $sgpr4
+  ; GFX90A_GFX942-NEXT: {{  $}}
+  ; GFX90A_GFX942-NEXT:   [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+  ; GFX90A_GFX942-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+  ; GFX90A_GFX942-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY11:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY11]], %subreg.sub0, killed [[COPY10]], %subreg.sub1, killed [[COPY9]], %subreg.sub2, killed [[COPY8]], %subreg.sub3
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE3:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
+  ; GFX90A_GFX942-NEXT:   [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN [[COPY7]], killed [[REG_SEQUENCE3]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
+  ; GFX90A_GFX942-NEXT:   $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN]]
+  ; GFX90A_GFX942-NEXT:   SI_RETURN_TO_EPILOG $vgpr0
   ;
   ; GFX11-LABEL: name: buffer_ptr_atomic_fadd_f32_bothen_rtn
   ; GFX11: bb.0 (%ir-block.0):
diff --git llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.f64.ll llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.f64.ll
index ff087715e0fc..6885657bbfa3 100644
--- llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.f64.ll
+++ llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.f64.ll
@@ -1,441 +1,441 @@
 ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX940 %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx940 -verify-machineinstrs -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX940 %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX940 %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx940 -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX940 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX942 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -verify-machineinstrs -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX942 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX942 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX942 %s
 
 define amdgpu_ps void @buffer_atomic_fadd_f64_offset_no_rtn(double %val, <4 x i32> inreg %rsrc, i32 inreg %soffset) {
-  ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_f64_offset_no_rtn
-  ; GFX90A_GFX940: bb.0 (%ir-block.0):
-  ; GFX90A_GFX940-NEXT:   liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4
-  ; GFX90A_GFX940-NEXT: {{  $}}
-  ; GFX90A_GFX940-NEXT:   [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
-  ; GFX90A_GFX940-NEXT:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr3
-  ; GFX90A_GFX940-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY1]], %subreg.sub3
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY7:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]]
-  ; GFX90A_GFX940-NEXT:   BUFFER_ATOMIC_ADD_F64_OFFSET killed [[COPY7]], killed [[REG_SEQUENCE]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 8)
-  ; GFX90A_GFX940-NEXT:   S_ENDPGM 0
+  ; GFX90A_GFX942-LABEL: name: buffer_atomic_fadd_f64_offset_no_rtn
+  ; GFX90A_GFX942: bb.0 (%ir-block.0):
+  ; GFX90A_GFX942-NEXT:   liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4
+  ; GFX90A_GFX942-NEXT: {{  $}}
+  ; GFX90A_GFX942-NEXT:   [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+  ; GFX90A_GFX942-NEXT:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+  ; GFX90A_GFX942-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY1]], %subreg.sub3
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY7:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]]
+  ; GFX90A_GFX942-NEXT:   BUFFER_ATOMIC_ADD_F64_OFFSET killed [[COPY7]], killed [[REG_SEQUENCE]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 8)
+  ; GFX90A_GFX942-NEXT:   S_ENDPGM 0
   %ret = call double @llvm.amdgcn.raw.buffer.atomic.fadd.f64(double %val, <4 x i32> %rsrc, i32 0, i32 %soffset, i32 0)
   ret void
 }
 
 define amdgpu_ps void @buffer_atomic_fadd_f64_offen_no_rtn(double %val, <4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) {
-  ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_f64_offen_no_rtn
-  ; GFX90A_GFX940: bb.0 (%ir-block.0):
-  ; GFX90A_GFX940-NEXT:   liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr2, $sgpr4
-  ; GFX90A_GFX940-NEXT: {{  $}}
-  ; GFX90A_GFX940-NEXT:   [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
-  ; GFX90A_GFX940-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
-  ; GFX90A_GFX940-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY6]], %subreg.sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY8:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]]
-  ; GFX90A_GFX940-NEXT:   BUFFER_ATOMIC_ADD_F64_OFFEN killed [[COPY8]], [[COPY1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 8)
-  ; GFX90A_GFX940-NEXT:   S_ENDPGM 0
+  ; GFX90A_GFX942-LABEL: name: buffer_atomic_fadd_f64_offen_no_rtn
+  ; GFX90A_GFX942: bb.0 (%ir-block.0):
+  ; GFX90A_GFX942-NEXT:   liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr2, $sgpr4
+  ; GFX90A_GFX942-NEXT: {{  $}}
+  ; GFX90A_GFX942-NEXT:   [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+  ; GFX90A_GFX942-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+  ; GFX90A_GFX942-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY6]], %subreg.sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY8:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]]
+  ; GFX90A_GFX942-NEXT:   BUFFER_ATOMIC_ADD_F64_OFFEN killed [[COPY8]], [[COPY1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 8)
+  ; GFX90A_GFX942-NEXT:   S_ENDPGM 0
   %ret = call double @llvm.amdgcn.raw.buffer.atomic.fadd.f64(double %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
   ret void
 }
 
 define amdgpu_ps void @buffer_atomic_fadd_f64_idxen_no_rtn(double %val, <4 x i32> inreg %rsrc, i32 %vindex, i32 inreg %soffset) {
-  ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_f64_idxen_no_rtn
-  ; GFX90A_GFX940: bb.0 (%ir-block.0):
-  ; GFX90A_GFX940-NEXT:   liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr2, $sgpr4
-  ; GFX90A_GFX940-NEXT: {{  $}}
-  ; GFX90A_GFX940-NEXT:   [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
-  ; GFX90A_GFX940-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
-  ; GFX90A_GFX940-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY6]], %subreg.sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY8:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]]
-  ; GFX90A_GFX940-NEXT:   BUFFER_ATOMIC_ADD_F64_IDXEN killed [[COPY8]], [[COPY1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 8)
-  ; GFX90A_GFX940-NEXT:   S_ENDPGM 0
+  ; GFX90A_GFX942-LABEL: name: buffer_atomic_fadd_f64_idxen_no_rtn
+  ; GFX90A_GFX942: bb.0 (%ir-block.0):
+  ; GFX90A_GFX942-NEXT:   liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr2, $sgpr4
+  ; GFX90A_GFX942-NEXT: {{  $}}
+  ; GFX90A_GFX942-NEXT:   [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+  ; GFX90A_GFX942-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+  ; GFX90A_GFX942-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY6]], %subreg.sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY8:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]]
+  ; GFX90A_GFX942-NEXT:   BUFFER_ATOMIC_ADD_F64_IDXEN killed [[COPY8]], [[COPY1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 8)
+  ; GFX90A_GFX942-NEXT:   S_ENDPGM 0
   %ret = call double @llvm.amdgcn.struct.buffer.atomic.fadd.f64(double %val, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0)
   ret void
 }
 
 define amdgpu_ps void @buffer_atomic_fadd_f64_bothen_no_rtn(double %val, <4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) {
-  ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_f64_bothen_no_rtn
-  ; GFX90A_GFX940: bb.0 (%ir-block.0):
-  ; GFX90A_GFX940-NEXT:   liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr2, $vgpr3, $sgpr4
-  ; GFX90A_GFX940-NEXT: {{  $}}
-  ; GFX90A_GFX940-NEXT:   [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
-  ; GFX90A_GFX940-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr3
-  ; GFX90A_GFX940-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr3
-  ; GFX90A_GFX940-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY3]], %subreg.sub3
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY7]], %subreg.sub1
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY9:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]]
-  ; GFX90A_GFX940-NEXT:   BUFFER_ATOMIC_ADD_F64_BOTHEN killed [[COPY9]], killed [[REG_SEQUENCE2]], killed [[REG_SEQUENCE]], [[COPY]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 8)
-  ; GFX90A_GFX940-NEXT:   S_ENDPGM 0
+  ; GFX90A_GFX942-LABEL: name: buffer_atomic_fadd_f64_bothen_no_rtn
+  ; GFX90A_GFX942: bb.0 (%ir-block.0):
+  ; GFX90A_GFX942-NEXT:   liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr2, $vgpr3, $sgpr4
+  ; GFX90A_GFX942-NEXT: {{  $}}
+  ; GFX90A_GFX942-NEXT:   [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+  ; GFX90A_GFX942-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+  ; GFX90A_GFX942-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+  ; GFX90A_GFX942-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY3]], %subreg.sub3
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY7]], %subreg.sub1
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY9:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]]
+  ; GFX90A_GFX942-NEXT:   BUFFER_ATOMIC_ADD_F64_BOTHEN killed [[COPY9]], killed [[REG_SEQUENCE2]], killed [[REG_SEQUENCE]], [[COPY]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 8)
+  ; GFX90A_GFX942-NEXT:   S_ENDPGM 0
   %ret = call double @llvm.amdgcn.struct.buffer.atomic.fadd.f64(double %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 2)
   ret void
 }
 
 define amdgpu_ps double @buffer_atomic_fadd_f64_offset_rtn(double %val, <4 x i32> inreg %rsrc, i32 inreg %soffset) {
-  ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_f64_offset_rtn
-  ; GFX90A_GFX940: bb.0 (%ir-block.0):
-  ; GFX90A_GFX940-NEXT:   liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4
-  ; GFX90A_GFX940-NEXT: {{  $}}
-  ; GFX90A_GFX940-NEXT:   [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
-  ; GFX90A_GFX940-NEXT:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr3
-  ; GFX90A_GFX940-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY1]], %subreg.sub3
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY7:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]]
-  ; GFX90A_GFX940-NEXT:   [[BUFFER_ATOMIC_ADD_F64_OFFSET_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_OFFSET_RTN [[COPY7]], killed [[REG_SEQUENCE]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 8)
-  ; GFX90A_GFX940-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFSET_RTN]].sub0
-  ; GFX90A_GFX940-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFSET_RTN]].sub1
-  ; GFX90A_GFX940-NEXT:   $sgpr0 = COPY [[COPY8]]
-  ; GFX90A_GFX940-NEXT:   $sgpr1 = COPY [[COPY9]]
-  ; GFX90A_GFX940-NEXT:   SI_RETURN_TO_EPILOG $sgpr0, $sgpr1
+  ; GFX90A_GFX942-LABEL: name: buffer_atomic_fadd_f64_offset_rtn
+  ; GFX90A_GFX942: bb.0 (%ir-block.0):
+  ; GFX90A_GFX942-NEXT:   liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4
+  ; GFX90A_GFX942-NEXT: {{  $}}
+  ; GFX90A_GFX942-NEXT:   [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+  ; GFX90A_GFX942-NEXT:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+  ; GFX90A_GFX942-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY1]], %subreg.sub3
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY7:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]]
+  ; GFX90A_GFX942-NEXT:   [[BUFFER_ATOMIC_ADD_F64_OFFSET_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_OFFSET_RTN [[COPY7]], killed [[REG_SEQUENCE]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 8)
+  ; GFX90A_GFX942-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFSET_RTN]].sub0
+  ; GFX90A_GFX942-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFSET_RTN]].sub1
+  ; GFX90A_GFX942-NEXT:   $sgpr0 = COPY [[COPY8]]
+  ; GFX90A_GFX942-NEXT:   $sgpr1 = COPY [[COPY9]]
+  ; GFX90A_GFX942-NEXT:   SI_RETURN_TO_EPILOG $sgpr0, $sgpr1
   %ret = call double @llvm.amdgcn.raw.buffer.atomic.fadd.f64(double %val, <4 x i32> %rsrc, i32 0, i32 %soffset, i32 0)
   ret double %ret
 }
 
 define amdgpu_ps double @buffer_atomic_fadd_f64_offen_rtn(double %val, <4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) {
-  ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_f64_offen_rtn
-  ; GFX90A_GFX940: bb.0 (%ir-block.0):
-  ; GFX90A_GFX940-NEXT:   liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr2, $sgpr4
-  ; GFX90A_GFX940-NEXT: {{  $}}
-  ; GFX90A_GFX940-NEXT:   [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
-  ; GFX90A_GFX940-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
-  ; GFX90A_GFX940-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY6]], %subreg.sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY8:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]]
-  ; GFX90A_GFX940-NEXT:   [[BUFFER_ATOMIC_ADD_F64_OFFEN_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_OFFEN_RTN [[COPY8]], [[COPY1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 8)
-  ; GFX90A_GFX940-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFEN_RTN]].sub0
-  ; GFX90A_GFX940-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFEN_RTN]].sub1
-  ; GFX90A_GFX940-NEXT:   $sgpr0 = COPY [[COPY9]]
-  ; GFX90A_GFX940-NEXT:   $sgpr1 = COPY [[COPY10]]
-  ; GFX90A_GFX940-NEXT:   SI_RETURN_TO_EPILOG $sgpr0, $sgpr1
+  ; GFX90A_GFX942-LABEL: name: buffer_atomic_fadd_f64_offen_rtn
+  ; GFX90A_GFX942: bb.0 (%ir-block.0):
+  ; GFX90A_GFX942-NEXT:   liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr2, $sgpr4
+  ; GFX90A_GFX942-NEXT: {{  $}}
+  ; GFX90A_GFX942-NEXT:   [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+  ; GFX90A_GFX942-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+  ; GFX90A_GFX942-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY6]], %subreg.sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY8:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]]
+  ; GFX90A_GFX942-NEXT:   [[BUFFER_ATOMIC_ADD_F64_OFFEN_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_OFFEN_RTN [[COPY8]], [[COPY1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 8)
+  ; GFX90A_GFX942-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFEN_RTN]].sub0
+  ; GFX90A_GFX942-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFEN_RTN]].sub1
+  ; GFX90A_GFX942-NEXT:   $sgpr0 = COPY [[COPY9]]
+  ; GFX90A_GFX942-NEXT:   $sgpr1 = COPY [[COPY10]]
+  ; GFX90A_GFX942-NEXT:   SI_RETURN_TO_EPILOG $sgpr0, $sgpr1
   %ret = call double @llvm.amdgcn.raw.buffer.atomic.fadd.f64(double %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
   ret double %ret
 }
 
 define amdgpu_ps double @buffer_atomic_fadd_f64_idxen_rtn(double %val, <4 x i32> inreg %rsrc, i32 %vindex, i32 inreg %soffset) {
-  ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_f64_idxen_rtn
-  ; GFX90A_GFX940: bb.0 (%ir-block.0):
-  ; GFX90A_GFX940-NEXT:   liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr2, $sgpr4
-  ; GFX90A_GFX940-NEXT: {{  $}}
-  ; GFX90A_GFX940-NEXT:   [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
-  ; GFX90A_GFX940-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
-  ; GFX90A_GFX940-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY6]], %subreg.sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY8:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]]
-  ; GFX90A_GFX940-NEXT:   [[BUFFER_ATOMIC_ADD_F64_IDXEN_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_IDXEN_RTN [[COPY8]], [[COPY1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 8)
-  ; GFX90A_GFX940-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_IDXEN_RTN]].sub0
-  ; GFX90A_GFX940-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_IDXEN_RTN]].sub1
-  ; GFX90A_GFX940-NEXT:   $sgpr0 = COPY [[COPY9]]
-  ; GFX90A_GFX940-NEXT:   $sgpr1 = COPY [[COPY10]]
-  ; GFX90A_GFX940-NEXT:   SI_RETURN_TO_EPILOG $sgpr0, $sgpr1
+  ; GFX90A_GFX942-LABEL: name: buffer_atomic_fadd_f64_idxen_rtn
+  ; GFX90A_GFX942: bb.0 (%ir-block.0):
+  ; GFX90A_GFX942-NEXT:   liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr2, $sgpr4
+  ; GFX90A_GFX942-NEXT: {{  $}}
+  ; GFX90A_GFX942-NEXT:   [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+  ; GFX90A_GFX942-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+  ; GFX90A_GFX942-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY6]], %subreg.sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY8:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]]
+  ; GFX90A_GFX942-NEXT:   [[BUFFER_ATOMIC_ADD_F64_IDXEN_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_IDXEN_RTN [[COPY8]], [[COPY1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 8)
+  ; GFX90A_GFX942-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_IDXEN_RTN]].sub0
+  ; GFX90A_GFX942-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_IDXEN_RTN]].sub1
+  ; GFX90A_GFX942-NEXT:   $sgpr0 = COPY [[COPY9]]
+  ; GFX90A_GFX942-NEXT:   $sgpr1 = COPY [[COPY10]]
+  ; GFX90A_GFX942-NEXT:   SI_RETURN_TO_EPILOG $sgpr0, $sgpr1
   %ret = call double @llvm.amdgcn.struct.buffer.atomic.fadd.f64(double %val, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0)
   ret double %ret
 }
 
 define amdgpu_ps double @buffer_atomic_fadd_f64_bothen_rtn(double %val, <4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) {
-  ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_f64_bothen_rtn
-  ; GFX90A_GFX940: bb.0 (%ir-block.0):
-  ; GFX90A_GFX940-NEXT:   liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr2, $vgpr3, $sgpr4
-  ; GFX90A_GFX940-NEXT: {{  $}}
-  ; GFX90A_GFX940-NEXT:   [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
-  ; GFX90A_GFX940-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr3
-  ; GFX90A_GFX940-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr3
-  ; GFX90A_GFX940-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY3]], %subreg.sub3
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY7]], %subreg.sub1
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY9:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]]
-  ; GFX90A_GFX940-NEXT:   [[BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN [[COPY9]], killed [[REG_SEQUENCE2]], killed [[REG_SEQUENCE]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 8)
-  ; GFX90A_GFX940-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN]].sub0
-  ; GFX90A_GFX940-NEXT:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN]].sub1
-  ; GFX90A_GFX940-NEXT:   $sgpr0 = COPY [[COPY10]]
-  ; GFX90A_GFX940-NEXT:   $sgpr1 = COPY [[COPY11]]
-  ; GFX90A_GFX940-NEXT:   SI_RETURN_TO_EPILOG $sgpr0, $sgpr1
+  ; GFX90A_GFX942-LABEL: name: buffer_atomic_fadd_f64_bothen_rtn
+  ; GFX90A_GFX942: bb.0 (%ir-block.0):
+  ; GFX90A_GFX942-NEXT:   liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr2, $vgpr3, $sgpr4
+  ; GFX90A_GFX942-NEXT: {{  $}}
+  ; GFX90A_GFX942-NEXT:   [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+  ; GFX90A_GFX942-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+  ; GFX90A_GFX942-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+  ; GFX90A_GFX942-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY3]], %subreg.sub3
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY7]], %subreg.sub1
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY9:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]]
+  ; GFX90A_GFX942-NEXT:   [[BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN [[COPY9]], killed [[REG_SEQUENCE2]], killed [[REG_SEQUENCE]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 8)
+  ; GFX90A_GFX942-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN]].sub0
+  ; GFX90A_GFX942-NEXT:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN]].sub1
+  ; GFX90A_GFX942-NEXT:   $sgpr0 = COPY [[COPY10]]
+  ; GFX90A_GFX942-NEXT:   $sgpr1 = COPY [[COPY11]]
+  ; GFX90A_GFX942-NEXT:   SI_RETURN_TO_EPILOG $sgpr0, $sgpr1
   %ret = call double @llvm.amdgcn.struct.buffer.atomic.fadd.f64(double %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
   ret double %ret
 }
 
 define amdgpu_ps void @buffer_ptr_atomic_fadd_f64_offset_no_rtn(double %val, ptr addrspace(8) inreg %rsrc, i32 inreg %soffset) {
-  ; GFX90A_GFX940-LABEL: name: buffer_ptr_atomic_fadd_f64_offset_no_rtn
-  ; GFX90A_GFX940: bb.0 (%ir-block.0):
-  ; GFX90A_GFX940-NEXT:   liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4
-  ; GFX90A_GFX940-NEXT: {{  $}}
-  ; GFX90A_GFX940-NEXT:   [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
-  ; GFX90A_GFX940-NEXT:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr3
-  ; GFX90A_GFX940-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY10]], %subreg.sub0, killed [[COPY9]], %subreg.sub1, killed [[COPY8]], %subreg.sub2, killed [[COPY7]], %subreg.sub3
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY11:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE3]]
-  ; GFX90A_GFX940-NEXT:   BUFFER_ATOMIC_ADD_F64_OFFSET killed [[COPY11]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8)
-  ; GFX90A_GFX940-NEXT:   S_ENDPGM 0
+  ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_f64_offset_no_rtn
+  ; GFX90A_GFX942: bb.0 (%ir-block.0):
+  ; GFX90A_GFX942-NEXT:   liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4
+  ; GFX90A_GFX942-NEXT: {{  $}}
+  ; GFX90A_GFX942-NEXT:   [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+  ; GFX90A_GFX942-NEXT:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+  ; GFX90A_GFX942-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY10]], %subreg.sub0, killed [[COPY9]], %subreg.sub1, killed [[COPY8]], %subreg.sub2, killed [[COPY7]], %subreg.sub3
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY11:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE3]]
+  ; GFX90A_GFX942-NEXT:   BUFFER_ATOMIC_ADD_F64_OFFSET killed [[COPY11]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8)
+  ; GFX90A_GFX942-NEXT:   S_ENDPGM 0
   %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f64(double %val, ptr addrspace(8) %rsrc, i32 0, i32 %soffset, i32 0)
   ret void
 }
 
 define amdgpu_ps void @buffer_ptr_atomic_fadd_f64_offen_no_rtn(double %val, ptr addrspace(8) inreg %rsrc, i32 %voffset, i32 inreg %soffset) {
-  ; GFX90A_GFX940-LABEL: name: buffer_ptr_atomic_fadd_f64_offen_no_rtn
-  ; GFX90A_GFX940: bb.0 (%ir-block.0):
-  ; GFX90A_GFX940-NEXT:   liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr2, $sgpr4
-  ; GFX90A_GFX940-NEXT: {{  $}}
-  ; GFX90A_GFX940-NEXT:   [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
-  ; GFX90A_GFX940-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
-  ; GFX90A_GFX940-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY11:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY11]], %subreg.sub0, killed [[COPY10]], %subreg.sub1, killed [[COPY9]], %subreg.sub2, killed [[COPY8]], %subreg.sub3
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY6]], %subreg.sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY12:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE3]]
-  ; GFX90A_GFX940-NEXT:   BUFFER_ATOMIC_ADD_F64_OFFEN killed [[COPY12]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8)
-  ; GFX90A_GFX940-NEXT:   S_ENDPGM 0
+  ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_f64_offen_no_rtn
+  ; GFX90A_GFX942: bb.0 (%ir-block.0):
+  ; GFX90A_GFX942-NEXT:   liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr2, $sgpr4
+  ; GFX90A_GFX942-NEXT: {{  $}}
+  ; GFX90A_GFX942-NEXT:   [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+  ; GFX90A_GFX942-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+  ; GFX90A_GFX942-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY11:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY11]], %subreg.sub0, killed [[COPY10]], %subreg.sub1, killed [[COPY9]], %subreg.sub2, killed [[COPY8]], %subreg.sub3
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY6]], %subreg.sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY12:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE3]]
+  ; GFX90A_GFX942-NEXT:   BUFFER_ATOMIC_ADD_F64_OFFEN killed [[COPY12]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8)
+  ; GFX90A_GFX942-NEXT:   S_ENDPGM 0
   %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f64(double %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0)
   ret void
 }
 
 define amdgpu_ps void @buffer_ptr_atomic_fadd_f64_idxen_no_rtn(double %val, ptr addrspace(8) inreg %rsrc, i32 %vindex, i32 inreg %soffset) {
-  ; GFX90A_GFX940-LABEL: name: buffer_ptr_atomic_fadd_f64_idxen_no_rtn
-  ; GFX90A_GFX940: bb.0 (%ir-block.0):
-  ; GFX90A_GFX940-NEXT:   liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr2, $sgpr4
-  ; GFX90A_GFX940-NEXT: {{  $}}
-  ; GFX90A_GFX940-NEXT:   [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
-  ; GFX90A_GFX940-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
-  ; GFX90A_GFX940-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY11:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY11]], %subreg.sub0, killed [[COPY10]], %subreg.sub1, killed [[COPY9]], %subreg.sub2, killed [[COPY8]], %subreg.sub3
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY6]], %subreg.sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY12:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE3]]
-  ; GFX90A_GFX940-NEXT:   BUFFER_ATOMIC_ADD_F64_IDXEN killed [[COPY12]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8)
-  ; GFX90A_GFX940-NEXT:   S_ENDPGM 0
+  ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_f64_idxen_no_rtn
+  ; GFX90A_GFX942: bb.0 (%ir-block.0):
+  ; GFX90A_GFX942-NEXT:   liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr2, $sgpr4
+  ; GFX90A_GFX942-NEXT: {{  $}}
+  ; GFX90A_GFX942-NEXT:   [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+  ; GFX90A_GFX942-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+  ; GFX90A_GFX942-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY11:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY11]], %subreg.sub0, killed [[COPY10]], %subreg.sub1, killed [[COPY9]], %subreg.sub2, killed [[COPY8]], %subreg.sub3
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY6]], %subreg.sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY12:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE3]]
+  ; GFX90A_GFX942-NEXT:   BUFFER_ATOMIC_ADD_F64_IDXEN killed [[COPY12]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8)
+  ; GFX90A_GFX942-NEXT:   S_ENDPGM 0
   %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f64(double %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0)
   ret void
 }
 
 define amdgpu_ps void @buffer_ptr_atomic_fadd_f64_bothen_no_rtn(double %val, ptr addrspace(8) inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) {
-  ; GFX90A_GFX940-LABEL: name: buffer_ptr_atomic_fadd_f64_bothen_no_rtn
-  ; GFX90A_GFX940: bb.0 (%ir-block.0):
-  ; GFX90A_GFX940-NEXT:   liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr2, $vgpr3, $sgpr4
-  ; GFX90A_GFX940-NEXT: {{  $}}
-  ; GFX90A_GFX940-NEXT:   [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
-  ; GFX90A_GFX940-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr3
-  ; GFX90A_GFX940-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr3
-  ; GFX90A_GFX940-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY11:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY12:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY12]], %subreg.sub0, killed [[COPY11]], %subreg.sub1, killed [[COPY10]], %subreg.sub2, killed [[COPY9]], %subreg.sub3
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY7]], %subreg.sub1
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE4:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY13:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE3]]
-  ; GFX90A_GFX940-NEXT:   BUFFER_ATOMIC_ADD_F64_BOTHEN killed [[COPY13]], killed [[REG_SEQUENCE4]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8)
-  ; GFX90A_GFX940-NEXT:   S_ENDPGM 0
+  ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_f64_bothen_no_rtn
+  ; GFX90A_GFX942: bb.0 (%ir-block.0):
+  ; GFX90A_GFX942-NEXT:   liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr2, $vgpr3, $sgpr4
+  ; GFX90A_GFX942-NEXT: {{  $}}
+  ; GFX90A_GFX942-NEXT:   [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+  ; GFX90A_GFX942-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+  ; GFX90A_GFX942-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+  ; GFX90A_GFX942-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY11:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY12:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY12]], %subreg.sub0, killed [[COPY11]], %subreg.sub1, killed [[COPY10]], %subreg.sub2, killed [[COPY9]], %subreg.sub3
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY7]], %subreg.sub1
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE4:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY13:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE3]]
+  ; GFX90A_GFX942-NEXT:   BUFFER_ATOMIC_ADD_F64_BOTHEN killed [[COPY13]], killed [[REG_SEQUENCE4]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8)
+  ; GFX90A_GFX942-NEXT:   S_ENDPGM 0
   %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f64(double %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 2)
   ret void
 }
 
 define amdgpu_ps double @buffer_ptr_atomic_fadd_f64_offset_rtn(double %val, ptr addrspace(8) inreg %rsrc, i32 inreg %soffset) {
-  ; GFX90A_GFX940-LABEL: name: buffer_ptr_atomic_fadd_f64_offset_rtn
-  ; GFX90A_GFX940: bb.0 (%ir-block.0):
-  ; GFX90A_GFX940-NEXT:   liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4
-  ; GFX90A_GFX940-NEXT: {{  $}}
-  ; GFX90A_GFX940-NEXT:   [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
-  ; GFX90A_GFX940-NEXT:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr3
-  ; GFX90A_GFX940-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY10]], %subreg.sub0, killed [[COPY9]], %subreg.sub1, killed [[COPY8]], %subreg.sub2, killed [[COPY7]], %subreg.sub3
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY11:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE3]]
-  ; GFX90A_GFX940-NEXT:   [[BUFFER_ATOMIC_ADD_F64_OFFSET_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_OFFSET_RTN [[COPY11]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8)
-  ; GFX90A_GFX940-NEXT:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFSET_RTN]].sub0
-  ; GFX90A_GFX940-NEXT:   [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFSET_RTN]].sub1
-  ; GFX90A_GFX940-NEXT:   $sgpr0 = COPY [[COPY12]]
-  ; GFX90A_GFX940-NEXT:   $sgpr1 = COPY [[COPY13]]
-  ; GFX90A_GFX940-NEXT:   SI_RETURN_TO_EPILOG $sgpr0, $sgpr1
+  ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_f64_offset_rtn
+  ; GFX90A_GFX942: bb.0 (%ir-block.0):
+  ; GFX90A_GFX942-NEXT:   liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4
+  ; GFX90A_GFX942-NEXT: {{  $}}
+  ; GFX90A_GFX942-NEXT:   [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+  ; GFX90A_GFX942-NEXT:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+  ; GFX90A_GFX942-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY10]], %subreg.sub0, killed [[COPY9]], %subreg.sub1, killed [[COPY8]], %subreg.sub2, killed [[COPY7]], %subreg.sub3
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY11:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE3]]
+  ; GFX90A_GFX942-NEXT:   [[BUFFER_ATOMIC_ADD_F64_OFFSET_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_OFFSET_RTN [[COPY11]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8)
+  ; GFX90A_GFX942-NEXT:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFSET_RTN]].sub0
+  ; GFX90A_GFX942-NEXT:   [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFSET_RTN]].sub1
+  ; GFX90A_GFX942-NEXT:   $sgpr0 = COPY [[COPY12]]
+  ; GFX90A_GFX942-NEXT:   $sgpr1 = COPY [[COPY13]]
+  ; GFX90A_GFX942-NEXT:   SI_RETURN_TO_EPILOG $sgpr0, $sgpr1
   %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f64(double %val, ptr addrspace(8) %rsrc, i32 0, i32 %soffset, i32 0)
   ret double %ret
 }
 
 define amdgpu_ps double @buffer_ptr_atomic_fadd_f64_offen_rtn(double %val, ptr addrspace(8) inreg %rsrc, i32 %voffset, i32 inreg %soffset) {
-  ; GFX90A_GFX940-LABEL: name: buffer_ptr_atomic_fadd_f64_offen_rtn
-  ; GFX90A_GFX940: bb.0 (%ir-block.0):
-  ; GFX90A_GFX940-NEXT:   liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr2, $sgpr4
-  ; GFX90A_GFX940-NEXT: {{  $}}
-  ; GFX90A_GFX940-NEXT:   [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
-  ; GFX90A_GFX940-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
-  ; GFX90A_GFX940-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY11:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY11]], %subreg.sub0, killed [[COPY10]], %subreg.sub1, killed [[COPY9]], %subreg.sub2, killed [[COPY8]], %subreg.sub3
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY6]], %subreg.sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY12:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE3]]
-  ; GFX90A_GFX940-NEXT:   [[BUFFER_ATOMIC_ADD_F64_OFFEN_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_OFFEN_RTN [[COPY12]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8)
-  ; GFX90A_GFX940-NEXT:   [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFEN_RTN]].sub0
-  ; GFX90A_GFX940-NEXT:   [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFEN_RTN]].sub1
-  ; GFX90A_GFX940-NEXT:   $sgpr0 = COPY [[COPY13]]
-  ; GFX90A_GFX940-NEXT:   $sgpr1 = COPY [[COPY14]]
-  ; GFX90A_GFX940-NEXT:   SI_RETURN_TO_EPILOG $sgpr0, $sgpr1
+  ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_f64_offen_rtn
+  ; GFX90A_GFX942: bb.0 (%ir-block.0):
+  ; GFX90A_GFX942-NEXT:   liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr2, $sgpr4
+  ; GFX90A_GFX942-NEXT: {{  $}}
+  ; GFX90A_GFX942-NEXT:   [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+  ; GFX90A_GFX942-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+  ; GFX90A_GFX942-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY11:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY11]], %subreg.sub0, killed [[COPY10]], %subreg.sub1, killed [[COPY9]], %subreg.sub2, killed [[COPY8]], %subreg.sub3
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY6]], %subreg.sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY12:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE3]]
+  ; GFX90A_GFX942-NEXT:   [[BUFFER_ATOMIC_ADD_F64_OFFEN_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_OFFEN_RTN [[COPY12]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8)
+  ; GFX90A_GFX942-NEXT:   [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFEN_RTN]].sub0
+  ; GFX90A_GFX942-NEXT:   [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFEN_RTN]].sub1
+  ; GFX90A_GFX942-NEXT:   $sgpr0 = COPY [[COPY13]]
+  ; GFX90A_GFX942-NEXT:   $sgpr1 = COPY [[COPY14]]
+  ; GFX90A_GFX942-NEXT:   SI_RETURN_TO_EPILOG $sgpr0, $sgpr1
   %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f64(double %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0)
   ret double %ret
 }
 
 define amdgpu_ps double @buffer_ptr_atomic_fadd_f64_idxen_rtn(double %val, ptr addrspace(8) inreg %rsrc, i32 %vindex, i32 inreg %soffset) {
-  ; GFX90A_GFX940-LABEL: name: buffer_ptr_atomic_fadd_f64_idxen_rtn
-  ; GFX90A_GFX940: bb.0 (%ir-block.0):
-  ; GFX90A_GFX940-NEXT:   liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr2, $sgpr4
-  ; GFX90A_GFX940-NEXT: {{  $}}
-  ; GFX90A_GFX940-NEXT:   [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
-  ; GFX90A_GFX940-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
-  ; GFX90A_GFX940-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY11:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY11]], %subreg.sub0, killed [[COPY10]], %subreg.sub1, killed [[COPY9]], %subreg.sub2, killed [[COPY8]], %subreg.sub3
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY6]], %subreg.sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY12:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE3]]
-  ; GFX90A_GFX940-NEXT:   [[BUFFER_ATOMIC_ADD_F64_IDXEN_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_IDXEN_RTN [[COPY12]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8)
-  ; GFX90A_GFX940-NEXT:   [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_IDXEN_RTN]].sub0
-  ; GFX90A_GFX940-NEXT:   [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_IDXEN_RTN]].sub1
-  ; GFX90A_GFX940-NEXT:   $sgpr0 = COPY [[COPY13]]
-  ; GFX90A_GFX940-NEXT:   $sgpr1 = COPY [[COPY14]]
-  ; GFX90A_GFX940-NEXT:   SI_RETURN_TO_EPILOG $sgpr0, $sgpr1
+  ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_f64_idxen_rtn
+  ; GFX90A_GFX942: bb.0 (%ir-block.0):
+  ; GFX90A_GFX942-NEXT:   liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr2, $sgpr4
+  ; GFX90A_GFX942-NEXT: {{  $}}
+  ; GFX90A_GFX942-NEXT:   [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+  ; GFX90A_GFX942-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+  ; GFX90A_GFX942-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY11:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY11]], %subreg.sub0, killed [[COPY10]], %subreg.sub1, killed [[COPY9]], %subreg.sub2, killed [[COPY8]], %subreg.sub3
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY6]], %subreg.sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY12:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE3]]
+  ; GFX90A_GFX942-NEXT:   [[BUFFER_ATOMIC_ADD_F64_IDXEN_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_IDXEN_RTN [[COPY12]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8)
+  ; GFX90A_GFX942-NEXT:   [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_IDXEN_RTN]].sub0
+  ; GFX90A_GFX942-NEXT:   [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_IDXEN_RTN]].sub1
+  ; GFX90A_GFX942-NEXT:   $sgpr0 = COPY [[COPY13]]
+  ; GFX90A_GFX942-NEXT:   $sgpr1 = COPY [[COPY14]]
+  ; GFX90A_GFX942-NEXT:   SI_RETURN_TO_EPILOG $sgpr0, $sgpr1
   %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f64(double %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0)
   ret double %ret
 }
 
 define amdgpu_ps double @buffer_ptr_atomic_fadd_f64_bothen_rtn(double %val, ptr addrspace(8) inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) {
-  ; GFX90A_GFX940-LABEL: name: buffer_ptr_atomic_fadd_f64_bothen_rtn
-  ; GFX90A_GFX940: bb.0 (%ir-block.0):
-  ; GFX90A_GFX940-NEXT:   liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr2, $vgpr3, $sgpr4
-  ; GFX90A_GFX940-NEXT: {{  $}}
-  ; GFX90A_GFX940-NEXT:   [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
-  ; GFX90A_GFX940-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr3
-  ; GFX90A_GFX940-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr3
-  ; GFX90A_GFX940-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY11:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY12:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY12]], %subreg.sub0, killed [[COPY11]], %subreg.sub1, killed [[COPY10]], %subreg.sub2, killed [[COPY9]], %subreg.sub3
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY7]], %subreg.sub1
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE4:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY13:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE3]]
-  ; GFX90A_GFX940-NEXT:   [[BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN [[COPY13]], killed [[REG_SEQUENCE4]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8)
-  ; GFX90A_GFX940-NEXT:   [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN]].sub0
-  ; GFX90A_GFX940-NEXT:   [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN]].sub1
-  ; GFX90A_GFX940-NEXT:   $sgpr0 = COPY [[COPY14]]
-  ; GFX90A_GFX940-NEXT:   $sgpr1 = COPY [[COPY15]]
-  ; GFX90A_GFX940-NEXT:   SI_RETURN_TO_EPILOG $sgpr0, $sgpr1
+  ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_f64_bothen_rtn
+  ; GFX90A_GFX942: bb.0 (%ir-block.0):
+  ; GFX90A_GFX942-NEXT:   liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr2, $vgpr3, $sgpr4
+  ; GFX90A_GFX942-NEXT: {{  $}}
+  ; GFX90A_GFX942-NEXT:   [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+  ; GFX90A_GFX942-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+  ; GFX90A_GFX942-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+  ; GFX90A_GFX942-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY11:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY12:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY12]], %subreg.sub0, killed [[COPY11]], %subreg.sub1, killed [[COPY10]], %subreg.sub2, killed [[COPY9]], %subreg.sub3
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY7]], %subreg.sub1
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE4:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY13:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE3]]
+  ; GFX90A_GFX942-NEXT:   [[BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN [[COPY13]], killed [[REG_SEQUENCE4]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8)
+  ; GFX90A_GFX942-NEXT:   [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN]].sub0
+  ; GFX90A_GFX942-NEXT:   [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN]].sub1
+  ; GFX90A_GFX942-NEXT:   $sgpr0 = COPY [[COPY14]]
+  ; GFX90A_GFX942-NEXT:   $sgpr1 = COPY [[COPY15]]
+  ; GFX90A_GFX942-NEXT:   SI_RETURN_TO_EPILOG $sgpr0, $sgpr1
   %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f64(double %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
   ret double %ret
 }
diff --git llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.v2f16-no-rtn.ll llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.v2f16-no-rtn.ll
index 44fddc372293..790cd8ef9ecc 100644
--- llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.v2f16-no-rtn.ll
+++ llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.v2f16-no-rtn.ll
@@ -1,10 +1,10 @@
 ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx908 -verify-machineinstrs -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX908 %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX940 %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx940 -verify-machineinstrs -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX940 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX942 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -verify-machineinstrs -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX942 %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx908 -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX908 %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX940 %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx940 -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX940 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX942 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX942 %s
 
 define amdgpu_ps void @buffer_atomic_fadd_v2f16_offset_no_rtn(<2 x half> %val, <4 x i32> inreg %rsrc, i32 inreg %soffset) {
   ; GFX908-LABEL: name: buffer_atomic_fadd_v2f16_offset_no_rtn
@@ -21,19 +21,19 @@ define amdgpu_ps void @buffer_atomic_fadd_v2f16_offset_no_rtn(<2 x half> %val, <
   ; GFX908-NEXT:   BUFFER_ATOMIC_PK_ADD_F16_OFFSET [[COPY5]], killed [[REG_SEQUENCE]], [[COPY]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
   ; GFX908-NEXT:   S_ENDPGM 0
   ;
-  ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_v2f16_offset_no_rtn
-  ; GFX90A_GFX940: bb.0 (%ir-block.0):
-  ; GFX90A_GFX940-NEXT:   liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4
-  ; GFX90A_GFX940-NEXT: {{  $}}
-  ; GFX90A_GFX940-NEXT:   [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
-  ; GFX90A_GFX940-NEXT:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr3
-  ; GFX90A_GFX940-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY1]], %subreg.sub3
-  ; GFX90A_GFX940-NEXT:   BUFFER_ATOMIC_PK_ADD_F16_OFFSET [[COPY5]], killed [[REG_SEQUENCE]], [[COPY]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
-  ; GFX90A_GFX940-NEXT:   S_ENDPGM 0
+  ; GFX90A_GFX942-LABEL: name: buffer_atomic_fadd_v2f16_offset_no_rtn
+  ; GFX90A_GFX942: bb.0 (%ir-block.0):
+  ; GFX90A_GFX942-NEXT:   liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4
+  ; GFX90A_GFX942-NEXT: {{  $}}
+  ; GFX90A_GFX942-NEXT:   [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+  ; GFX90A_GFX942-NEXT:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+  ; GFX90A_GFX942-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY1]], %subreg.sub3
+  ; GFX90A_GFX942-NEXT:   BUFFER_ATOMIC_PK_ADD_F16_OFFSET [[COPY5]], killed [[REG_SEQUENCE]], [[COPY]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
+  ; GFX90A_GFX942-NEXT:   S_ENDPGM 0
   %ret = call <2 x half> @llvm.amdgcn.raw.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 4095, i32 %soffset, i32 0)
   ret void
 }
@@ -54,20 +54,20 @@ define amdgpu_ps void @buffer_atomic_fadd_v2f16_offen_no_rtn(<2 x half> %val, <4
   ; GFX908-NEXT:   BUFFER_ATOMIC_PK_ADD_F16_OFFEN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
   ; GFX908-NEXT:   S_ENDPGM 0
   ;
-  ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_v2f16_offen_no_rtn
-  ; GFX90A_GFX940: bb.0 (%ir-block.0):
-  ; GFX90A_GFX940-NEXT:   liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4
-  ; GFX90A_GFX940-NEXT: {{  $}}
-  ; GFX90A_GFX940-NEXT:   [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
-  ; GFX90A_GFX940-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
-  ; GFX90A_GFX940-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3
-  ; GFX90A_GFX940-NEXT:   BUFFER_ATOMIC_PK_ADD_F16_OFFEN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
-  ; GFX90A_GFX940-NEXT:   S_ENDPGM 0
+  ; GFX90A_GFX942-LABEL: name: buffer_atomic_fadd_v2f16_offen_no_rtn
+  ; GFX90A_GFX942: bb.0 (%ir-block.0):
+  ; GFX90A_GFX942-NEXT:   liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4
+  ; GFX90A_GFX942-NEXT: {{  $}}
+  ; GFX90A_GFX942-NEXT:   [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+  ; GFX90A_GFX942-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+  ; GFX90A_GFX942-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3
+  ; GFX90A_GFX942-NEXT:   BUFFER_ATOMIC_PK_ADD_F16_OFFEN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
+  ; GFX90A_GFX942-NEXT:   S_ENDPGM 0
   %ret = call <2 x half> @llvm.amdgcn.raw.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
   ret void
 }
@@ -88,20 +88,20 @@ define amdgpu_ps void @buffer_atomic_fadd_v2f16_idxen_no_rtn(<2 x half> %val, <4
   ; GFX908-NEXT:   BUFFER_ATOMIC_PK_ADD_F16_IDXEN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
   ; GFX908-NEXT:   S_ENDPGM 0
   ;
-  ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_v2f16_idxen_no_rtn
-  ; GFX90A_GFX940: bb.0 (%ir-block.0):
-  ; GFX90A_GFX940-NEXT:   liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4
-  ; GFX90A_GFX940-NEXT: {{  $}}
-  ; GFX90A_GFX940-NEXT:   [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
-  ; GFX90A_GFX940-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
-  ; GFX90A_GFX940-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3
-  ; GFX90A_GFX940-NEXT:   BUFFER_ATOMIC_PK_ADD_F16_IDXEN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
-  ; GFX90A_GFX940-NEXT:   S_ENDPGM 0
+  ; GFX90A_GFX942-LABEL: name: buffer_atomic_fadd_v2f16_idxen_no_rtn
+  ; GFX90A_GFX942: bb.0 (%ir-block.0):
+  ; GFX90A_GFX942-NEXT:   liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4
+  ; GFX90A_GFX942-NEXT: {{  $}}
+  ; GFX90A_GFX942-NEXT:   [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+  ; GFX90A_GFX942-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+  ; GFX90A_GFX942-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3
+  ; GFX90A_GFX942-NEXT:   BUFFER_ATOMIC_PK_ADD_F16_IDXEN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
+  ; GFX90A_GFX942-NEXT:   S_ENDPGM 0
   %ret = call <2 x half> @llvm.amdgcn.struct.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0)
   ret void
 }
@@ -124,22 +124,22 @@ define amdgpu_ps void @buffer_atomic_fadd_v2f16_bothen_no_rtn(<2 x half> %val, <
   ; GFX908-NEXT:   BUFFER_ATOMIC_PK_ADD_F16_BOTHEN [[COPY7]], killed [[REG_SEQUENCE1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
   ; GFX908-NEXT:   S_ENDPGM 0
   ;
-  ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_v2f16_bothen_no_rtn
-  ; GFX90A_GFX940: bb.0 (%ir-block.0):
-  ; GFX90A_GFX940-NEXT:   liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $vgpr2, $sgpr4
-  ; GFX90A_GFX940-NEXT: {{  $}}
-  ; GFX90A_GFX940-NEXT:   [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
-  ; GFX90A_GFX940-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr3
-  ; GFX90A_GFX940-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY3]], %subreg.sub3
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
-  ; GFX90A_GFX940-NEXT:   BUFFER_ATOMIC_PK_ADD_F16_BOTHEN [[COPY7]], killed [[REG_SEQUENCE1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
-  ; GFX90A_GFX940-NEXT:   S_ENDPGM 0
+  ; GFX90A_GFX942-LABEL: name: buffer_atomic_fadd_v2f16_bothen_no_rtn
+  ; GFX90A_GFX942: bb.0 (%ir-block.0):
+  ; GFX90A_GFX942-NEXT:   liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $vgpr2, $sgpr4
+  ; GFX90A_GFX942-NEXT: {{  $}}
+  ; GFX90A_GFX942-NEXT:   [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+  ; GFX90A_GFX942-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+  ; GFX90A_GFX942-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY3]], %subreg.sub3
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
+  ; GFX90A_GFX942-NEXT:   BUFFER_ATOMIC_PK_ADD_F16_BOTHEN [[COPY7]], killed [[REG_SEQUENCE1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
+  ; GFX90A_GFX942-NEXT:   S_ENDPGM 0
   %ret = call <2 x half> @llvm.amdgcn.struct.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 2)
   ret void
 }
@@ -165,25 +165,25 @@ define amdgpu_ps void @buffer_ptr_atomic_fadd_v2f16_offset_no_rtn(<2 x half> %va
   ; GFX908-NEXT:   BUFFER_ATOMIC_PK_ADD_F16_OFFSET [[COPY5]], killed [[REG_SEQUENCE2]], [[COPY]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
   ; GFX908-NEXT:   S_ENDPGM 0
   ;
-  ; GFX90A_GFX940-LABEL: name: buffer_ptr_atomic_fadd_v2f16_offset_no_rtn
-  ; GFX90A_GFX940: bb.0 (%ir-block.0):
-  ; GFX90A_GFX940-NEXT:   liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4
-  ; GFX90A_GFX940-NEXT: {{  $}}
-  ; GFX90A_GFX940-NEXT:   [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
-  ; GFX90A_GFX940-NEXT:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr3
-  ; GFX90A_GFX940-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY6:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY9]], %subreg.sub0, killed [[COPY8]], %subreg.sub1, killed [[COPY7]], %subreg.sub2, killed [[COPY6]], %subreg.sub3
-  ; GFX90A_GFX940-NEXT:   BUFFER_ATOMIC_PK_ADD_F16_OFFSET [[COPY5]], killed [[REG_SEQUENCE2]], [[COPY]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
-  ; GFX90A_GFX940-NEXT:   S_ENDPGM 0
+  ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_v2f16_offset_no_rtn
+  ; GFX90A_GFX942: bb.0 (%ir-block.0):
+  ; GFX90A_GFX942-NEXT:   liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4
+  ; GFX90A_GFX942-NEXT: {{  $}}
+  ; GFX90A_GFX942-NEXT:   [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+  ; GFX90A_GFX942-NEXT:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+  ; GFX90A_GFX942-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY6:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY9]], %subreg.sub0, killed [[COPY8]], %subreg.sub1, killed [[COPY7]], %subreg.sub2, killed [[COPY6]], %subreg.sub3
+  ; GFX90A_GFX942-NEXT:   BUFFER_ATOMIC_PK_ADD_F16_OFFSET [[COPY5]], killed [[REG_SEQUENCE2]], [[COPY]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
+  ; GFX90A_GFX942-NEXT:   S_ENDPGM 0
   %ret = call <2 x half> @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.v2f16(<2 x half> %val, ptr addrspace(8) %rsrc, i32 4095, i32 %soffset, i32 0)
   ret void
 }
@@ -210,26 +210,26 @@ define amdgpu_ps void @buffer_ptr_atomic_fadd_v2f16_offen_no_rtn(<2 x half> %val
   ; GFX908-NEXT:   BUFFER_ATOMIC_PK_ADD_F16_OFFEN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
   ; GFX908-NEXT:   S_ENDPGM 0
   ;
-  ; GFX90A_GFX940-LABEL: name: buffer_ptr_atomic_fadd_v2f16_offen_no_rtn
-  ; GFX90A_GFX940: bb.0 (%ir-block.0):
-  ; GFX90A_GFX940-NEXT:   liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4
-  ; GFX90A_GFX940-NEXT: {{  $}}
-  ; GFX90A_GFX940-NEXT:   [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
-  ; GFX90A_GFX940-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
-  ; GFX90A_GFX940-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY10]], %subreg.sub0, killed [[COPY9]], %subreg.sub1, killed [[COPY8]], %subreg.sub2, killed [[COPY7]], %subreg.sub3
-  ; GFX90A_GFX940-NEXT:   BUFFER_ATOMIC_PK_ADD_F16_OFFEN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
-  ; GFX90A_GFX940-NEXT:   S_ENDPGM 0
+  ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_v2f16_offen_no_rtn
+  ; GFX90A_GFX942: bb.0 (%ir-block.0):
+  ; GFX90A_GFX942-NEXT:   liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4
+  ; GFX90A_GFX942-NEXT: {{  $}}
+  ; GFX90A_GFX942-NEXT:   [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+  ; GFX90A_GFX942-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+  ; GFX90A_GFX942-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY10]], %subreg.sub0, killed [[COPY9]], %subreg.sub1, killed [[COPY8]], %subreg.sub2, killed [[COPY7]], %subreg.sub3
+  ; GFX90A_GFX942-NEXT:   BUFFER_ATOMIC_PK_ADD_F16_OFFEN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
+  ; GFX90A_GFX942-NEXT:   S_ENDPGM 0
   %ret = call <2 x half> @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.v2f16(<2 x half> %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0)
   ret void
 }
@@ -256,26 +256,26 @@ define amdgpu_ps void @buffer_ptr_atomic_fadd_v2f16_idxen_no_rtn(<2 x half> %val
   ; GFX908-NEXT:   BUFFER_ATOMIC_PK_ADD_F16_IDXEN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
   ; GFX908-NEXT:   S_ENDPGM 0
   ;
-  ; GFX90A_GFX940-LABEL: name: buffer_ptr_atomic_fadd_v2f16_idxen_no_rtn
-  ; GFX90A_GFX940: bb.0 (%ir-block.0):
-  ; GFX90A_GFX940-NEXT:   liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4
-  ; GFX90A_GFX940-NEXT: {{  $}}
-  ; GFX90A_GFX940-NEXT:   [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
-  ; GFX90A_GFX940-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
-  ; GFX90A_GFX940-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY10]], %subreg.sub0, killed [[COPY9]], %subreg.sub1, killed [[COPY8]], %subreg.sub2, killed [[COPY7]], %subreg.sub3
-  ; GFX90A_GFX940-NEXT:   BUFFER_ATOMIC_PK_ADD_F16_IDXEN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
-  ; GFX90A_GFX940-NEXT:   S_ENDPGM 0
+  ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_v2f16_idxen_no_rtn
+  ; GFX90A_GFX942: bb.0 (%ir-block.0):
+  ; GFX90A_GFX942-NEXT:   liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4
+  ; GFX90A_GFX942-NEXT: {{  $}}
+  ; GFX90A_GFX942-NEXT:   [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+  ; GFX90A_GFX942-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+  ; GFX90A_GFX942-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY10]], %subreg.sub0, killed [[COPY9]], %subreg.sub1, killed [[COPY8]], %subreg.sub2, killed [[COPY7]], %subreg.sub3
+  ; GFX90A_GFX942-NEXT:   BUFFER_ATOMIC_PK_ADD_F16_IDXEN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
+  ; GFX90A_GFX942-NEXT:   S_ENDPGM 0
   %ret = call <2 x half> @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.v2f16(<2 x half> %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0)
   ret void
 }
@@ -304,28 +304,28 @@ define amdgpu_ps void @buffer_ptr_atomic_fadd_v2f16_bothen_no_rtn(<2 x half> %va
   ; GFX908-NEXT:   BUFFER_ATOMIC_PK_ADD_F16_BOTHEN [[COPY7]], killed [[REG_SEQUENCE3]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
   ; GFX908-NEXT:   S_ENDPGM 0
   ;
-  ; GFX90A_GFX940-LABEL: name: buffer_ptr_atomic_fadd_v2f16_bothen_no_rtn
-  ; GFX90A_GFX940: bb.0 (%ir-block.0):
-  ; GFX90A_GFX940-NEXT:   liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $vgpr2, $sgpr4
-  ; GFX90A_GFX940-NEXT: {{  $}}
-  ; GFX90A_GFX940-NEXT:   [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
-  ; GFX90A_GFX940-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr3
-  ; GFX90A_GFX940-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY11:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY11]], %subreg.sub0, killed [[COPY10]], %subreg.sub1, killed [[COPY9]], %subreg.sub2, killed [[COPY8]], %subreg.sub3
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE3:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
-  ; GFX90A_GFX940-NEXT:   BUFFER_ATOMIC_PK_ADD_F16_BOTHEN [[COPY7]], killed [[REG_SEQUENCE3]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
-  ; GFX90A_GFX940-NEXT:   S_ENDPGM 0
+  ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_v2f16_bothen_no_rtn
+  ; GFX90A_GFX942: bb.0 (%ir-block.0):
+  ; GFX90A_GFX942-NEXT:   liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $vgpr2, $sgpr4
+  ; GFX90A_GFX942-NEXT: {{  $}}
+  ; GFX90A_GFX942-NEXT:   [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+  ; GFX90A_GFX942-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+  ; GFX90A_GFX942-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY11:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY11]], %subreg.sub0, killed [[COPY10]], %subreg.sub1, killed [[COPY9]], %subreg.sub2, killed [[COPY8]], %subreg.sub3
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE3:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
+  ; GFX90A_GFX942-NEXT:   BUFFER_ATOMIC_PK_ADD_F16_BOTHEN [[COPY7]], killed [[REG_SEQUENCE3]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
+  ; GFX90A_GFX942-NEXT:   S_ENDPGM 0
   %ret = call <2 x half> @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.v2f16(<2 x half> %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 2)
   ret void
 }
diff --git llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.v2f16-rtn.ll llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.v2f16-rtn.ll
index c4ef1390a288..89e1a4be4e16 100644
--- llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.v2f16-rtn.ll
+++ llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.v2f16-rtn.ll
@@ -1,191 +1,191 @@
 ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX940 %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx940 -verify-machineinstrs -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX940 %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX940 %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx940 -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX940 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX942 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -verify-machineinstrs -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX942 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX942 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX942 %s
 
 define amdgpu_ps <2 x half> @buffer_atomic_fadd_v2f16_offset_rtn(<2 x half> %val, <4 x i32> inreg %rsrc, i32 inreg %soffset) {
-  ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_v2f16_offset_rtn
-  ; GFX90A_GFX940: bb.0 (%ir-block.0):
-  ; GFX90A_GFX940-NEXT:   liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4
-  ; GFX90A_GFX940-NEXT: {{  $}}
-  ; GFX90A_GFX940-NEXT:   [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
-  ; GFX90A_GFX940-NEXT:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr3
-  ; GFX90A_GFX940-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY1]], %subreg.sub3
-  ; GFX90A_GFX940-NEXT:   [[BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN [[COPY5]], killed [[REG_SEQUENCE]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
-  ; GFX90A_GFX940-NEXT:   $vgpr0 = COPY [[BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN]]
-  ; GFX90A_GFX940-NEXT:   SI_RETURN_TO_EPILOG $vgpr0
+  ; GFX90A_GFX942-LABEL: name: buffer_atomic_fadd_v2f16_offset_rtn
+  ; GFX90A_GFX942: bb.0 (%ir-block.0):
+  ; GFX90A_GFX942-NEXT:   liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4
+  ; GFX90A_GFX942-NEXT: {{  $}}
+  ; GFX90A_GFX942-NEXT:   [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+  ; GFX90A_GFX942-NEXT:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+  ; GFX90A_GFX942-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY1]], %subreg.sub3
+  ; GFX90A_GFX942-NEXT:   [[BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN [[COPY5]], killed [[REG_SEQUENCE]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
+  ; GFX90A_GFX942-NEXT:   $vgpr0 = COPY [[BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN]]
+  ; GFX90A_GFX942-NEXT:   SI_RETURN_TO_EPILOG $vgpr0
   %ret = call <2 x half> @llvm.amdgcn.raw.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 0, i32 %soffset, i32 0)
   ret <2 x half> %ret
 }
 
 define amdgpu_ps <2 x half> @buffer_atomic_fadd_v2f16_offen_rtn(<2 x half> %val, <4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) {
-  ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_v2f16_offen_rtn
-  ; GFX90A_GFX940: bb.0 (%ir-block.0):
-  ; GFX90A_GFX940-NEXT:   liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4
-  ; GFX90A_GFX940-NEXT: {{  $}}
-  ; GFX90A_GFX940-NEXT:   [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
-  ; GFX90A_GFX940-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
-  ; GFX90A_GFX940-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3
-  ; GFX90A_GFX940-NEXT:   [[BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
-  ; GFX90A_GFX940-NEXT:   $vgpr0 = COPY [[BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN]]
-  ; GFX90A_GFX940-NEXT:   SI_RETURN_TO_EPILOG $vgpr0
+  ; GFX90A_GFX942-LABEL: name: buffer_atomic_fadd_v2f16_offen_rtn
+  ; GFX90A_GFX942: bb.0 (%ir-block.0):
+  ; GFX90A_GFX942-NEXT:   liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4
+  ; GFX90A_GFX942-NEXT: {{  $}}
+  ; GFX90A_GFX942-NEXT:   [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+  ; GFX90A_GFX942-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+  ; GFX90A_GFX942-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3
+  ; GFX90A_GFX942-NEXT:   [[BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
+  ; GFX90A_GFX942-NEXT:   $vgpr0 = COPY [[BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN]]
+  ; GFX90A_GFX942-NEXT:   SI_RETURN_TO_EPILOG $vgpr0
   %ret = call <2 x half> @llvm.amdgcn.raw.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
   ret <2 x half> %ret
 }
 
 define amdgpu_ps <2 x half> @buffer_atomic_fadd_v2f16_idxen_rtn(<2 x half> %val, <4 x i32> inreg %rsrc, i32 %vindex, i32 inreg %soffset) {
-  ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_v2f16_idxen_rtn
-  ; GFX90A_GFX940: bb.0 (%ir-block.0):
-  ; GFX90A_GFX940-NEXT:   liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4
-  ; GFX90A_GFX940-NEXT: {{  $}}
-  ; GFX90A_GFX940-NEXT:   [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
-  ; GFX90A_GFX940-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
-  ; GFX90A_GFX940-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3
-  ; GFX90A_GFX940-NEXT:   [[BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
-  ; GFX90A_GFX940-NEXT:   $vgpr0 = COPY [[BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN]]
-  ; GFX90A_GFX940-NEXT:   SI_RETURN_TO_EPILOG $vgpr0
+  ; GFX90A_GFX942-LABEL: name: buffer_atomic_fadd_v2f16_idxen_rtn
+  ; GFX90A_GFX942: bb.0 (%ir-block.0):
+  ; GFX90A_GFX942-NEXT:   liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4
+  ; GFX90A_GFX942-NEXT: {{  $}}
+  ; GFX90A_GFX942-NEXT:   [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+  ; GFX90A_GFX942-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+  ; GFX90A_GFX942-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3
+  ; GFX90A_GFX942-NEXT:   [[BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
+  ; GFX90A_GFX942-NEXT:   $vgpr0 = COPY [[BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN]]
+  ; GFX90A_GFX942-NEXT:   SI_RETURN_TO_EPILOG $vgpr0
   %ret = call <2 x half> @llvm.amdgcn.struct.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0)
   ret <2 x half> %ret
 }
 
 define amdgpu_ps <2 x half> @buffer_atomic_fadd_v2f16_bothen_rtn(<2 x half> %val, <4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) {
-  ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_v2f16_bothen_rtn
-  ; GFX90A_GFX940: bb.0 (%ir-block.0):
-  ; GFX90A_GFX940-NEXT:   liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $vgpr2, $sgpr4
-  ; GFX90A_GFX940-NEXT: {{  $}}
-  ; GFX90A_GFX940-NEXT:   [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
-  ; GFX90A_GFX940-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr3
-  ; GFX90A_GFX940-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY3]], %subreg.sub3
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
-  ; GFX90A_GFX940-NEXT:   [[BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN [[COPY7]], killed [[REG_SEQUENCE1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 3, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
-  ; GFX90A_GFX940-NEXT:   $vgpr0 = COPY [[BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN]]
-  ; GFX90A_GFX940-NEXT:   SI_RETURN_TO_EPILOG $vgpr0
+  ; GFX90A_GFX942-LABEL: name: buffer_atomic_fadd_v2f16_bothen_rtn
+  ; GFX90A_GFX942: bb.0 (%ir-block.0):
+  ; GFX90A_GFX942-NEXT:   liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $vgpr2, $sgpr4
+  ; GFX90A_GFX942-NEXT: {{  $}}
+  ; GFX90A_GFX942-NEXT:   [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+  ; GFX90A_GFX942-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+  ; GFX90A_GFX942-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY3]], %subreg.sub3
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
+  ; GFX90A_GFX942-NEXT:   [[BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN [[COPY7]], killed [[REG_SEQUENCE1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 3, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
+  ; GFX90A_GFX942-NEXT:   $vgpr0 = COPY [[BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN]]
+  ; GFX90A_GFX942-NEXT:   SI_RETURN_TO_EPILOG $vgpr0
   %ret = call <2 x half> @llvm.amdgcn.struct.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 2)
   ret <2 x half> %ret
 }
 
 define amdgpu_ps <2 x half> @buffer_ptr_atomic_fadd_v2f16_offset_rtn(<2 x half> %val, ptr addrspace(8) inreg %rsrc, i32 inreg %soffset) {
-  ; GFX90A_GFX940-LABEL: name: buffer_ptr_atomic_fadd_v2f16_offset_rtn
-  ; GFX90A_GFX940: bb.0 (%ir-block.0):
-  ; GFX90A_GFX940-NEXT:   liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4
-  ; GFX90A_GFX940-NEXT: {{  $}}
-  ; GFX90A_GFX940-NEXT:   [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
-  ; GFX90A_GFX940-NEXT:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr3
-  ; GFX90A_GFX940-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY6:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY9]], %subreg.sub0, killed [[COPY8]], %subreg.sub1, killed [[COPY7]], %subreg.sub2, killed [[COPY6]], %subreg.sub3
-  ; GFX90A_GFX940-NEXT:   [[BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN [[COPY5]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
-  ; GFX90A_GFX940-NEXT:   $vgpr0 = COPY [[BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN]]
-  ; GFX90A_GFX940-NEXT:   SI_RETURN_TO_EPILOG $vgpr0
+  ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_v2f16_offset_rtn
+  ; GFX90A_GFX942: bb.0 (%ir-block.0):
+  ; GFX90A_GFX942-NEXT:   liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4
+  ; GFX90A_GFX942-NEXT: {{  $}}
+  ; GFX90A_GFX942-NEXT:   [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+  ; GFX90A_GFX942-NEXT:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+  ; GFX90A_GFX942-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY6:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY9]], %subreg.sub0, killed [[COPY8]], %subreg.sub1, killed [[COPY7]], %subreg.sub2, killed [[COPY6]], %subreg.sub3
+  ; GFX90A_GFX942-NEXT:   [[BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN [[COPY5]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
+  ; GFX90A_GFX942-NEXT:   $vgpr0 = COPY [[BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN]]
+  ; GFX90A_GFX942-NEXT:   SI_RETURN_TO_EPILOG $vgpr0
   %ret = call <2 x half> @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.v2f16(<2 x half> %val, ptr addrspace(8) %rsrc, i32 0, i32 %soffset, i32 0)
   ret <2 x half> %ret
 }
 
 define amdgpu_ps <2 x half> @buffer_ptr_atomic_fadd_v2f16_offen_rtn(<2 x half> %val, ptr addrspace(8) inreg %rsrc, i32 %voffset, i32 inreg %soffset) {
-  ; GFX90A_GFX940-LABEL: name: buffer_ptr_atomic_fadd_v2f16_offen_rtn
-  ; GFX90A_GFX940: bb.0 (%ir-block.0):
-  ; GFX90A_GFX940-NEXT:   liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4
-  ; GFX90A_GFX940-NEXT: {{  $}}
-  ; GFX90A_GFX940-NEXT:   [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
-  ; GFX90A_GFX940-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
-  ; GFX90A_GFX940-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY10]], %subreg.sub0, killed [[COPY9]], %subreg.sub1, killed [[COPY8]], %subreg.sub2, killed [[COPY7]], %subreg.sub3
-  ; GFX90A_GFX940-NEXT:   [[BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
-  ; GFX90A_GFX940-NEXT:   $vgpr0 = COPY [[BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN]]
-  ; GFX90A_GFX940-NEXT:   SI_RETURN_TO_EPILOG $vgpr0
+  ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_v2f16_offen_rtn
+  ; GFX90A_GFX942: bb.0 (%ir-block.0):
+  ; GFX90A_GFX942-NEXT:   liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4
+  ; GFX90A_GFX942-NEXT: {{  $}}
+  ; GFX90A_GFX942-NEXT:   [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+  ; GFX90A_GFX942-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+  ; GFX90A_GFX942-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY10]], %subreg.sub0, killed [[COPY9]], %subreg.sub1, killed [[COPY8]], %subreg.sub2, killed [[COPY7]], %subreg.sub3
+  ; GFX90A_GFX942-NEXT:   [[BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
+  ; GFX90A_GFX942-NEXT:   $vgpr0 = COPY [[BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN]]
+  ; GFX90A_GFX942-NEXT:   SI_RETURN_TO_EPILOG $vgpr0
   %ret = call <2 x half> @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.v2f16(<2 x half> %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0)
   ret <2 x half> %ret
 }
 
 define amdgpu_ps <2 x half> @buffer_ptr_atomic_fadd_v2f16_idxen_rtn(<2 x half> %val, ptr addrspace(8) inreg %rsrc, i32 %vindex, i32 inreg %soffset) {
-  ; GFX90A_GFX940-LABEL: name: buffer_ptr_atomic_fadd_v2f16_idxen_rtn
-  ; GFX90A_GFX940: bb.0 (%ir-block.0):
-  ; GFX90A_GFX940-NEXT:   liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4
-  ; GFX90A_GFX940-NEXT: {{  $}}
-  ; GFX90A_GFX940-NEXT:   [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
-  ; GFX90A_GFX940-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
-  ; GFX90A_GFX940-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY10]], %subreg.sub0, killed [[COPY9]], %subreg.sub1, killed [[COPY8]], %subreg.sub2, killed [[COPY7]], %subreg.sub3
-  ; GFX90A_GFX940-NEXT:   [[BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
-  ; GFX90A_GFX940-NEXT:   $vgpr0 = COPY [[BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN]]
-  ; GFX90A_GFX940-NEXT:   SI_RETURN_TO_EPILOG $vgpr0
+  ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_v2f16_idxen_rtn
+  ; GFX90A_GFX942: bb.0 (%ir-block.0):
+  ; GFX90A_GFX942-NEXT:   liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4
+  ; GFX90A_GFX942-NEXT: {{  $}}
+  ; GFX90A_GFX942-NEXT:   [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+  ; GFX90A_GFX942-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+  ; GFX90A_GFX942-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY10]], %subreg.sub0, killed [[COPY9]], %subreg.sub1, killed [[COPY8]], %subreg.sub2, killed [[COPY7]], %subreg.sub3
+  ; GFX90A_GFX942-NEXT:   [[BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
+  ; GFX90A_GFX942-NEXT:   $vgpr0 = COPY [[BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN]]
+  ; GFX90A_GFX942-NEXT:   SI_RETURN_TO_EPILOG $vgpr0
   %ret = call <2 x half> @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.v2f16(<2 x half> %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0)
   ret <2 x half> %ret
 }
 
 define amdgpu_ps <2 x half> @buffer_ptr_atomic_fadd_v2f16_bothen_rtn(<2 x half> %val, ptr addrspace(8) inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) {
-  ; GFX90A_GFX940-LABEL: name: buffer_ptr_atomic_fadd_v2f16_bothen_rtn
-  ; GFX90A_GFX940: bb.0 (%ir-block.0):
-  ; GFX90A_GFX940-NEXT:   liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $vgpr2, $sgpr4
-  ; GFX90A_GFX940-NEXT: {{  $}}
-  ; GFX90A_GFX940-NEXT:   [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
-  ; GFX90A_GFX940-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr3
-  ; GFX90A_GFX940-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY11:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY11]], %subreg.sub0, killed [[COPY10]], %subreg.sub1, killed [[COPY9]], %subreg.sub2, killed [[COPY8]], %subreg.sub3
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE3:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
-  ; GFX90A_GFX940-NEXT:   [[BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN [[COPY7]], killed [[REG_SEQUENCE3]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 3, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
-  ; GFX90A_GFX940-NEXT:   $vgpr0 = COPY [[BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN]]
-  ; GFX90A_GFX940-NEXT:   SI_RETURN_TO_EPILOG $vgpr0
+  ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_v2f16_bothen_rtn
+  ; GFX90A_GFX942: bb.0 (%ir-block.0):
+  ; GFX90A_GFX942-NEXT:   liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $vgpr2, $sgpr4
+  ; GFX90A_GFX942-NEXT: {{  $}}
+  ; GFX90A_GFX942-NEXT:   [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+  ; GFX90A_GFX942-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+  ; GFX90A_GFX942-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY11:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY11]], %subreg.sub0, killed [[COPY10]], %subreg.sub1, killed [[COPY9]], %subreg.sub2, killed [[COPY8]], %subreg.sub3
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE3:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
+  ; GFX90A_GFX942-NEXT:   [[BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN [[COPY7]], killed [[REG_SEQUENCE3]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 3, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
+  ; GFX90A_GFX942-NEXT:   $vgpr0 = COPY [[BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN]]
+  ; GFX90A_GFX942-NEXT:   SI_RETURN_TO_EPILOG $vgpr0
   %ret = call <2 x half> @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.v2f16(<2 x half> %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 2)
   ret <2 x half> %ret
 }
diff --git llvm/test/CodeGen/AMDGPU/buffer-fat-pointer-atomicrmw-fadd.ll llvm/test/CodeGen/AMDGPU/buffer-fat-pointer-atomicrmw-fadd.ll
index 0ea73ad4c501..ba2694fca99f 100644
--- llvm/test/CodeGen/AMDGPU/buffer-fat-pointer-atomicrmw-fadd.ll
+++ llvm/test/CodeGen/AMDGPU/buffer-fat-pointer-atomicrmw-fadd.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
 ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 < %s | FileCheck -check-prefix=GFX12 %s
-; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx940 < %s | FileCheck -check-prefix=GFX940 %s
+; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx942 < %s | FileCheck -check-prefix=GFX942 %s
 ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 < %s | FileCheck -check-prefix=GFX11 %s
 ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s
 ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx90a < %s | FileCheck -check-prefix=GFX90A %s
@@ -28,15 +28,15 @@ define float @buffer_fat_ptr_agent_atomic_fadd_ret_f32__offset__amdgpu_no_fine_g
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_f32__offset__amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v1, s16
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    buffer_atomic_add_f32 v0, v1, s[0:3], 0 offen offset:1024 sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_f32__offset__amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v1, s16
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    buffer_atomic_add_f32 v0, v1, s[0:3], 0 offen offset:1024 sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_f32__offset__amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode:
 ; GFX11:       ; %bb.0:
@@ -216,15 +216,15 @@ define void @buffer_fat_ptr_agent_atomic_fadd_noret_f32__offset__amdgpu_no_fine_
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fadd_noret_f32__offset__amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v1, s16
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    buffer_atomic_add_f32 v0, v1, s[0:3], 0 offen offset:1024
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fadd_noret_f32__offset__amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v1, s16
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    buffer_atomic_add_f32 v0, v1, s[0:3], 0 offen offset:1024
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fadd_noret_f32__offset__amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode:
 ; GFX11:       ; %bb.0:
@@ -403,33 +403,33 @@ define float @buffer_fat_ptr_agent_atomic_fadd_ret_f32__offset__waterfall__amdgp
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_f32__offset__waterfall__amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    s_mov_b64 s[2:3], exec
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:  .LBB2_1: ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    v_readfirstlane_b32 s4, v0
-; GFX940-NEXT:    v_readfirstlane_b32 s5, v1
-; GFX940-NEXT:    v_readfirstlane_b32 s6, v2
-; GFX940-NEXT:    v_readfirstlane_b32 s7, v3
-; GFX940-NEXT:    v_cmp_eq_u64_e32 vcc, s[4:5], v[0:1]
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cmp_eq_u64_e64 s[0:1], s[6:7], v[2:3]
-; GFX940-NEXT:    s_and_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    s_and_saveexec_b64 s[0:1], s[0:1]
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_atomic_add_f32 v5, v4, s[4:7], 0 offen offset:1024 sc0
-; GFX940-NEXT:    ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
-; GFX940-NEXT:    ; implicit-def: $vgpr4
-; GFX940-NEXT:    s_xor_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB2_1
-; GFX940-NEXT:  ; %bb.2:
-; GFX940-NEXT:    s_mov_b64 exec, s[2:3]
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v0, v5
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_f32__offset__waterfall__amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    s_mov_b64 s[2:3], exec
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:  .LBB2_1: ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    v_readfirstlane_b32 s4, v0
+; GFX942-NEXT:    v_readfirstlane_b32 s5, v1
+; GFX942-NEXT:    v_readfirstlane_b32 s6, v2
+; GFX942-NEXT:    v_readfirstlane_b32 s7, v3
+; GFX942-NEXT:    v_cmp_eq_u64_e32 vcc, s[4:5], v[0:1]
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cmp_eq_u64_e64 s[0:1], s[6:7], v[2:3]
+; GFX942-NEXT:    s_and_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    s_and_saveexec_b64 s[0:1], s[0:1]
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_atomic_add_f32 v5, v4, s[4:7], 0 offen offset:1024 sc0
+; GFX942-NEXT:    ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
+; GFX942-NEXT:    ; implicit-def: $vgpr4
+; GFX942-NEXT:    s_xor_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB2_1
+; GFX942-NEXT:  ; %bb.2:
+; GFX942-NEXT:    s_mov_b64 exec, s[2:3]
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v0, v5
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_f32__offset__waterfall__amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode:
 ; GFX11:       ; %bb.0:
@@ -799,15 +799,15 @@ define float @buffer_fat_ptr_agent_atomic_fadd_ret_f32__offset__amdgpu_no_fine_g
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_f32__offset__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v1, s16
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    buffer_atomic_add_f32 v0, v1, s[0:3], 0 offen offset:1024 sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_f32__offset__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v1, s16
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    buffer_atomic_add_f32 v0, v1, s[0:3], 0 offen offset:1024 sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_f32__offset__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -1004,15 +1004,15 @@ define void @buffer_fat_ptr_agent_atomic_fadd_noret_f32__offset__amdgpu_no_fine_
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fadd_noret_f32__offset__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v1, s16
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    buffer_atomic_add_f32 v0, v1, s[0:3], 0 offen offset:1024
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fadd_noret_f32__offset__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v1, s16
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    buffer_atomic_add_f32 v0, v1, s[0:3], 0 offen offset:1024
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fadd_noret_f32__offset__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -1203,15 +1203,15 @@ define float @buffer_fat_ptr_agent_atomic_fadd_ret_f32__offset(ptr addrspace(7)
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_f32__offset:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v1, s16
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    buffer_atomic_add_f32 v0, v1, s[0:3], 0 offen offset:1024 sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_f32__offset:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v1, s16
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    buffer_atomic_add_f32 v0, v1, s[0:3], 0 offen offset:1024 sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_f32__offset:
 ; GFX11:       ; %bb.0:
@@ -1427,15 +1427,15 @@ define float @buffer_fat_ptr_agent_atomic_fadd_ret_f32__offset__amdgpu_no_remote
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_f32__offset__amdgpu_no_remote_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v1, s16
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    buffer_atomic_add_f32 v0, v1, s[0:3], 0 offen offset:1024 sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_f32__offset__amdgpu_no_remote_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v1, s16
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    buffer_atomic_add_f32 v0, v1, s[0:3], 0 offen offset:1024 sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_f32__offset__amdgpu_no_remote_memory:
 ; GFX11:       ; %bb.0:
@@ -1651,15 +1651,15 @@ define float @buffer_fat_ptr_agent_atomic_fadd_ret_f32__offset__amdgpu_no_remote
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_f32__offset__amdgpu_no_remote_memory__amdgpu_ignore_denormal_mode:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v1, s16
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    buffer_atomic_add_f32 v0, v1, s[0:3], 0 offen offset:1024 sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_f32__offset__amdgpu_no_remote_memory__amdgpu_ignore_denormal_mode:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v1, s16
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    buffer_atomic_add_f32 v0, v1, s[0:3], 0 offen offset:1024 sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_f32__offset__amdgpu_no_remote_memory__amdgpu_ignore_denormal_mode:
 ; GFX11:       ; %bb.0:
@@ -1900,15 +1900,15 @@ define double @buffer_fat_ptr_agent_atomic_fadd_ret_f64__offset__amdgpu_no_fine_
 ; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s4
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_f64__offset__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v2, s16
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 offen offset:2048 sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_f64__offset__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v2, s16
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 offen offset:2048 sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_f64__offset__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -2149,15 +2149,15 @@ define void @buffer_fat_ptr_agent_atomic_fadd_noret_f64__offset__amdgpu_no_fine_
 ; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s4
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fadd_noret_f64__offset__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v2, s16
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 offen offset:2048
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fadd_noret_f64__offset__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v2, s16
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 offen offset:2048
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fadd_noret_f64__offset__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -2428,36 +2428,36 @@ define double @buffer_fat_ptr_agent_atomic_fadd_ret_f64__offset__waterfall__amdg
 ; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s1
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_f64__offset__waterfall__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v7, v6
-; GFX940-NEXT:    v_mov_b32_e32 v6, v5
-; GFX940-NEXT:    s_mov_b64 s[2:3], exec
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:  .LBB10_1: ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    v_readfirstlane_b32 s4, v0
-; GFX940-NEXT:    v_readfirstlane_b32 s5, v1
-; GFX940-NEXT:    v_readfirstlane_b32 s6, v2
-; GFX940-NEXT:    v_readfirstlane_b32 s7, v3
-; GFX940-NEXT:    v_cmp_eq_u64_e32 vcc, s[4:5], v[0:1]
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cmp_eq_u64_e64 s[0:1], s[6:7], v[2:3]
-; GFX940-NEXT:    s_and_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    s_and_saveexec_b64 s[0:1], s[0:1]
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_atomic_add_f64 v[6:7], v4, s[4:7], 0 offen offset:2048 sc0
-; GFX940-NEXT:    ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
-; GFX940-NEXT:    ; implicit-def: $vgpr4
-; GFX940-NEXT:    s_xor_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB10_1
-; GFX940-NEXT:  ; %bb.2:
-; GFX940-NEXT:    s_mov_b64 exec, s[2:3]
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v0, v6
-; GFX940-NEXT:    v_mov_b32_e32 v1, v7
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_f64__offset__waterfall__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v7, v6
+; GFX942-NEXT:    v_mov_b32_e32 v6, v5
+; GFX942-NEXT:    s_mov_b64 s[2:3], exec
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:  .LBB10_1: ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    v_readfirstlane_b32 s4, v0
+; GFX942-NEXT:    v_readfirstlane_b32 s5, v1
+; GFX942-NEXT:    v_readfirstlane_b32 s6, v2
+; GFX942-NEXT:    v_readfirstlane_b32 s7, v3
+; GFX942-NEXT:    v_cmp_eq_u64_e32 vcc, s[4:5], v[0:1]
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cmp_eq_u64_e64 s[0:1], s[6:7], v[2:3]
+; GFX942-NEXT:    s_and_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    s_and_saveexec_b64 s[0:1], s[0:1]
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_atomic_add_f64 v[6:7], v4, s[4:7], 0 offen offset:2048 sc0
+; GFX942-NEXT:    ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
+; GFX942-NEXT:    ; implicit-def: $vgpr4
+; GFX942-NEXT:    s_xor_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB10_1
+; GFX942-NEXT:  ; %bb.2:
+; GFX942-NEXT:    s_mov_b64 exec, s[2:3]
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v0, v6
+; GFX942-NEXT:    v_mov_b32_e32 v1, v7
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_f64__offset__waterfall__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -2920,15 +2920,15 @@ define double @buffer_fat_ptr_agent_atomic_fadd_ret_f64__offset__amdgpu_no_remot
 ; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s4
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_f64__offset__amdgpu_no_remote_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v2, s16
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 offen offset:2048 sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_f64__offset__amdgpu_no_remote_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v2, s16
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 offen offset:2048 sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_f64__offset__amdgpu_no_remote_memory:
 ; GFX11:       ; %bb.0:
@@ -3189,15 +3189,15 @@ define double @buffer_fat_ptr_agent_atomic_fadd_ret_f64__offset__amdgpu_no_fine_
 ; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s4
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_f64__offset__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v2, s16
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 offen offset:2048 sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_f64__offset__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v2, s16
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 offen offset:2048 sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_f64__offset__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory:
 ; GFX11:       ; %bb.0:
@@ -3456,39 +3456,39 @@ define half @buffer_fat_ptr_agent_atomic_fadd_ret_f16__offset__amdgpu_no_fine_gr
 ; GFX12-NEXT:    v_lshrrev_b32_e32 v0, s4, v3
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_f16__offset__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    s_addk_i32 s16, 0x200
-; GFX940-NEXT:    s_and_b32 s4, s16, -4
-; GFX940-NEXT:    v_mov_b32_e32 v1, s4
-; GFX940-NEXT:    buffer_load_dword v3, v1, s[0:3], 0 offen
-; GFX940-NEXT:    s_and_b32 s4, s16, 3
-; GFX940-NEXT:    s_lshl_b32 s6, s4, 3
-; GFX940-NEXT:    s_lshl_b32 s4, 0xffff, s6
-; GFX940-NEXT:    s_not_b32 s7, s4
-; GFX940-NEXT:    s_mov_b64 s[4:5], 0
-; GFX940-NEXT:  .LBB13_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_lshrrev_b32_e32 v2, s6, v3
-; GFX940-NEXT:    v_add_f16_e32 v2, v2, v0
-; GFX940-NEXT:    v_lshlrev_b32_e32 v2, s6, v2
-; GFX940-NEXT:    v_and_or_b32 v2, v3, s7, v2
-; GFX940-NEXT:    v_mov_b64_e32 v[4:5], v[2:3]
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    buffer_atomic_cmpswap v[4:5], v1, s[0:3], 0 offen sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v4, v3
-; GFX940-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
-; GFX940-NEXT:    v_mov_b32_e32 v3, v4
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[4:5]
-; GFX940-NEXT:    s_cbranch_execnz .LBB13_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[4:5]
-; GFX940-NEXT:    v_lshrrev_b32_e32 v0, s6, v4
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_f16__offset__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    s_addk_i32 s16, 0x200
+; GFX942-NEXT:    s_and_b32 s4, s16, -4
+; GFX942-NEXT:    v_mov_b32_e32 v1, s4
+; GFX942-NEXT:    buffer_load_dword v3, v1, s[0:3], 0 offen
+; GFX942-NEXT:    s_and_b32 s4, s16, 3
+; GFX942-NEXT:    s_lshl_b32 s6, s4, 3
+; GFX942-NEXT:    s_lshl_b32 s4, 0xffff, s6
+; GFX942-NEXT:    s_not_b32 s7, s4
+; GFX942-NEXT:    s_mov_b64 s[4:5], 0
+; GFX942-NEXT:  .LBB13_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_lshrrev_b32_e32 v2, s6, v3
+; GFX942-NEXT:    v_add_f16_e32 v2, v2, v0
+; GFX942-NEXT:    v_lshlrev_b32_e32 v2, s6, v2
+; GFX942-NEXT:    v_and_or_b32 v2, v3, s7, v2
+; GFX942-NEXT:    v_mov_b64_e32 v[4:5], v[2:3]
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    buffer_atomic_cmpswap v[4:5], v1, s[0:3], 0 offen sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v4, v3
+; GFX942-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
+; GFX942-NEXT:    v_mov_b32_e32 v3, v4
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[4:5]
+; GFX942-NEXT:    s_cbranch_execnz .LBB13_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[4:5]
+; GFX942-NEXT:    v_lshrrev_b32_e32 v0, s6, v4
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_f16__offset__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -3804,38 +3804,38 @@ define void @buffer_fat_ptr_agent_atomic_fadd_noret_f16__offset__amdgpu_no_fine_
 ; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s5
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fadd_noret_f16__offset__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    s_addk_i32 s16, 0x200
-; GFX940-NEXT:    s_and_b32 s4, s16, -4
-; GFX940-NEXT:    v_mov_b32_e32 v1, s4
-; GFX940-NEXT:    buffer_load_dword v3, v1, s[0:3], 0 offen
-; GFX940-NEXT:    s_and_b32 s4, s16, 3
-; GFX940-NEXT:    s_lshl_b32 s6, s4, 3
-; GFX940-NEXT:    s_lshl_b32 s4, 0xffff, s6
-; GFX940-NEXT:    s_not_b32 s7, s4
-; GFX940-NEXT:    s_mov_b64 s[4:5], 0
-; GFX940-NEXT:  .LBB14_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_lshrrev_b32_e32 v2, s6, v3
-; GFX940-NEXT:    v_add_f16_e32 v2, v2, v0
-; GFX940-NEXT:    v_lshlrev_b32_e32 v2, s6, v2
-; GFX940-NEXT:    v_and_or_b32 v2, v3, s7, v2
-; GFX940-NEXT:    v_mov_b64_e32 v[4:5], v[2:3]
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    buffer_atomic_cmpswap v[4:5], v1, s[0:3], 0 offen sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v4, v3
-; GFX940-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
-; GFX940-NEXT:    v_mov_b32_e32 v3, v4
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[4:5]
-; GFX940-NEXT:    s_cbranch_execnz .LBB14_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[4:5]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fadd_noret_f16__offset__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    s_addk_i32 s16, 0x200
+; GFX942-NEXT:    s_and_b32 s4, s16, -4
+; GFX942-NEXT:    v_mov_b32_e32 v1, s4
+; GFX942-NEXT:    buffer_load_dword v3, v1, s[0:3], 0 offen
+; GFX942-NEXT:    s_and_b32 s4, s16, 3
+; GFX942-NEXT:    s_lshl_b32 s6, s4, 3
+; GFX942-NEXT:    s_lshl_b32 s4, 0xffff, s6
+; GFX942-NEXT:    s_not_b32 s7, s4
+; GFX942-NEXT:    s_mov_b64 s[4:5], 0
+; GFX942-NEXT:  .LBB14_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_lshrrev_b32_e32 v2, s6, v3
+; GFX942-NEXT:    v_add_f16_e32 v2, v2, v0
+; GFX942-NEXT:    v_lshlrev_b32_e32 v2, s6, v2
+; GFX942-NEXT:    v_and_or_b32 v2, v3, s7, v2
+; GFX942-NEXT:    v_mov_b64_e32 v[4:5], v[2:3]
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    buffer_atomic_cmpswap v[4:5], v1, s[0:3], 0 offen sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v4, v3
+; GFX942-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
+; GFX942-NEXT:    v_mov_b32_e32 v3, v4
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[4:5]
+; GFX942-NEXT:    s_cbranch_execnz .LBB14_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[4:5]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fadd_noret_f16__offset__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -4179,72 +4179,72 @@ define half @buffer_fat_ptr_agent_atomic_fadd_ret_f16__offset__waterfall__amdgpu
 ; GFX12-NEXT:    v_lshrrev_b32_e32 v0, v4, v8
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_f16__offset__waterfall__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_add_u32_e32 v4, 0x200, v4
-; GFX940-NEXT:    v_and_b32_e32 v10, -4, v4
-; GFX940-NEXT:    v_and_b32_e32 v4, 3, v4
-; GFX940-NEXT:    v_lshlrev_b32_e32 v4, 3, v4
-; GFX940-NEXT:    s_mov_b32 s0, 0xffff
-; GFX940-NEXT:    v_lshlrev_b32_e64 v6, v4, s0
-; GFX940-NEXT:    v_not_b32_e32 v11, v6
-; GFX940-NEXT:    s_mov_b64 s[2:3], exec
-; GFX940-NEXT:  .LBB15_1: ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    v_readfirstlane_b32 s4, v0
-; GFX940-NEXT:    v_readfirstlane_b32 s5, v1
-; GFX940-NEXT:    v_readfirstlane_b32 s6, v2
-; GFX940-NEXT:    v_readfirstlane_b32 s7, v3
-; GFX940-NEXT:    v_cmp_eq_u64_e32 vcc, s[4:5], v[0:1]
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cmp_eq_u64_e64 s[0:1], s[6:7], v[2:3]
-; GFX940-NEXT:    s_and_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    s_and_saveexec_b64 s[0:1], s[0:1]
-; GFX940-NEXT:    buffer_load_dword v7, v10, s[4:7], 0 offen
-; GFX940-NEXT:    s_xor_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB15_1
-; GFX940-NEXT:  ; %bb.2:
-; GFX940-NEXT:    s_mov_b64 exec, s[2:3]
-; GFX940-NEXT:    s_mov_b64 s[2:3], 0
-; GFX940-NEXT:  .LBB15_3: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Loop Header: Depth=1
-; GFX940-NEXT:    ; Child Loop BB15_4 Depth 2
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_lshrrev_b32_e32 v6, v4, v7
-; GFX940-NEXT:    v_add_f16_e32 v6, v6, v5
-; GFX940-NEXT:    v_lshlrev_b32_e32 v6, v4, v6
-; GFX940-NEXT:    v_and_or_b32 v6, v7, v11, v6
-; GFX940-NEXT:    s_mov_b64 s[8:9], exec
-; GFX940-NEXT:    v_mov_b64_e32 v[8:9], v[6:7]
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:  .LBB15_4: ; Parent Loop BB15_3 Depth=1
-; GFX940-NEXT:    ; => This Inner Loop Header: Depth=2
-; GFX940-NEXT:    v_readfirstlane_b32 s4, v0
-; GFX940-NEXT:    v_readfirstlane_b32 s5, v1
-; GFX940-NEXT:    v_readfirstlane_b32 s6, v2
-; GFX940-NEXT:    v_readfirstlane_b32 s7, v3
-; GFX940-NEXT:    v_cmp_eq_u64_e32 vcc, s[4:5], v[0:1]
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cmp_eq_u64_e64 s[0:1], s[6:7], v[2:3]
-; GFX940-NEXT:    s_and_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    s_and_saveexec_b64 s[0:1], s[0:1]
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_atomic_cmpswap v[8:9], v10, s[4:7], 0 offen sc0
-; GFX940-NEXT:    s_xor_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB15_4
-; GFX940-NEXT:  ; %bb.5: ; in Loop: Header=BB15_3 Depth=1
-; GFX940-NEXT:    s_mov_b64 exec, s[8:9]
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v8, v7
-; GFX940-NEXT:    s_or_b64 s[2:3], vcc, s[2:3]
-; GFX940-NEXT:    v_mov_b32_e32 v7, v8
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[2:3]
-; GFX940-NEXT:    s_cbranch_execnz .LBB15_3
-; GFX940-NEXT:  ; %bb.6: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[2:3]
-; GFX940-NEXT:    v_lshrrev_b32_e32 v0, v4, v8
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_f16__offset__waterfall__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_add_u32_e32 v4, 0x200, v4
+; GFX942-NEXT:    v_and_b32_e32 v10, -4, v4
+; GFX942-NEXT:    v_and_b32_e32 v4, 3, v4
+; GFX942-NEXT:    v_lshlrev_b32_e32 v4, 3, v4
+; GFX942-NEXT:    s_mov_b32 s0, 0xffff
+; GFX942-NEXT:    v_lshlrev_b32_e64 v6, v4, s0
+; GFX942-NEXT:    v_not_b32_e32 v11, v6
+; GFX942-NEXT:    s_mov_b64 s[2:3], exec
+; GFX942-NEXT:  .LBB15_1: ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    v_readfirstlane_b32 s4, v0
+; GFX942-NEXT:    v_readfirstlane_b32 s5, v1
+; GFX942-NEXT:    v_readfirstlane_b32 s6, v2
+; GFX942-NEXT:    v_readfirstlane_b32 s7, v3
+; GFX942-NEXT:    v_cmp_eq_u64_e32 vcc, s[4:5], v[0:1]
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cmp_eq_u64_e64 s[0:1], s[6:7], v[2:3]
+; GFX942-NEXT:    s_and_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    s_and_saveexec_b64 s[0:1], s[0:1]
+; GFX942-NEXT:    buffer_load_dword v7, v10, s[4:7], 0 offen
+; GFX942-NEXT:    s_xor_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB15_1
+; GFX942-NEXT:  ; %bb.2:
+; GFX942-NEXT:    s_mov_b64 exec, s[2:3]
+; GFX942-NEXT:    s_mov_b64 s[2:3], 0
+; GFX942-NEXT:  .LBB15_3: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Loop Header: Depth=1
+; GFX942-NEXT:    ; Child Loop BB15_4 Depth 2
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_lshrrev_b32_e32 v6, v4, v7
+; GFX942-NEXT:    v_add_f16_e32 v6, v6, v5
+; GFX942-NEXT:    v_lshlrev_b32_e32 v6, v4, v6
+; GFX942-NEXT:    v_and_or_b32 v6, v7, v11, v6
+; GFX942-NEXT:    s_mov_b64 s[8:9], exec
+; GFX942-NEXT:    v_mov_b64_e32 v[8:9], v[6:7]
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:  .LBB15_4: ; Parent Loop BB15_3 Depth=1
+; GFX942-NEXT:    ; => This Inner Loop Header: Depth=2
+; GFX942-NEXT:    v_readfirstlane_b32 s4, v0
+; GFX942-NEXT:    v_readfirstlane_b32 s5, v1
+; GFX942-NEXT:    v_readfirstlane_b32 s6, v2
+; GFX942-NEXT:    v_readfirstlane_b32 s7, v3
+; GFX942-NEXT:    v_cmp_eq_u64_e32 vcc, s[4:5], v[0:1]
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cmp_eq_u64_e64 s[0:1], s[6:7], v[2:3]
+; GFX942-NEXT:    s_and_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    s_and_saveexec_b64 s[0:1], s[0:1]
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_atomic_cmpswap v[8:9], v10, s[4:7], 0 offen sc0
+; GFX942-NEXT:    s_xor_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB15_4
+; GFX942-NEXT:  ; %bb.5: ; in Loop: Header=BB15_3 Depth=1
+; GFX942-NEXT:    s_mov_b64 exec, s[8:9]
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v8, v7
+; GFX942-NEXT:    s_or_b64 s[2:3], vcc, s[2:3]
+; GFX942-NEXT:    v_mov_b32_e32 v7, v8
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[2:3]
+; GFX942-NEXT:    s_cbranch_execnz .LBB15_3
+; GFX942-NEXT:  ; %bb.6: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[2:3]
+; GFX942-NEXT:    v_lshrrev_b32_e32 v0, v4, v8
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_f16__offset__waterfall__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -4798,47 +4798,47 @@ define bfloat @buffer_fat_ptr_agent_atomic_fadd_ret_bf16__offset__amdgpu_no_fine
 ; GFX12-NEXT:    v_lshrrev_b32_e32 v0, s4, v2
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_bf16__offset__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    s_addk_i32 s16, 0x200
-; GFX940-NEXT:    s_and_b32 s4, s16, -4
-; GFX940-NEXT:    v_mov_b32_e32 v4, s4
-; GFX940-NEXT:    buffer_load_dword v1, v4, s[0:3], 0 offen
-; GFX940-NEXT:    s_and_b32 s4, s16, 3
-; GFX940-NEXT:    s_lshl_b32 s6, s4, 3
-; GFX940-NEXT:    s_lshl_b32 s4, 0xffff, s6
-; GFX940-NEXT:    s_not_b32 s7, s4
-; GFX940-NEXT:    s_mov_b64 s[4:5], 0
-; GFX940-NEXT:    v_lshlrev_b32_e32 v5, 16, v0
-; GFX940-NEXT:    s_movk_i32 s8, 0x7fff
-; GFX940-NEXT:  .LBB16_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_lshrrev_b32_sdwa v0, s6, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    v_add_f32_e32 v0, v0, v5
-; GFX940-NEXT:    v_bfe_u32 v2, v0, 16, 1
-; GFX940-NEXT:    v_or_b32_e32 v3, 0x400000, v0
-; GFX940-NEXT:    v_add3_u32 v2, v2, v0, s8
-; GFX940-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v2, v3, vcc
-; GFX940-NEXT:    v_lshlrev_b32_sdwa v0, s6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX940-NEXT:    v_and_or_b32 v0, v1, s7, v0
-; GFX940-NEXT:    v_mov_b64_e32 v[2:3], v[0:1]
-; GFX940-NEXT:    buffer_atomic_cmpswap v[2:3], v4, s[0:3], 0 offen sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v1
-; GFX940-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
-; GFX940-NEXT:    v_mov_b32_e32 v1, v2
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[4:5]
-; GFX940-NEXT:    s_cbranch_execnz .LBB16_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[4:5]
-; GFX940-NEXT:    v_lshrrev_b32_e32 v0, s6, v2
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_bf16__offset__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    s_addk_i32 s16, 0x200
+; GFX942-NEXT:    s_and_b32 s4, s16, -4
+; GFX942-NEXT:    v_mov_b32_e32 v4, s4
+; GFX942-NEXT:    buffer_load_dword v1, v4, s[0:3], 0 offen
+; GFX942-NEXT:    s_and_b32 s4, s16, 3
+; GFX942-NEXT:    s_lshl_b32 s6, s4, 3
+; GFX942-NEXT:    s_lshl_b32 s4, 0xffff, s6
+; GFX942-NEXT:    s_not_b32 s7, s4
+; GFX942-NEXT:    s_mov_b64 s[4:5], 0
+; GFX942-NEXT:    v_lshlrev_b32_e32 v5, 16, v0
+; GFX942-NEXT:    s_movk_i32 s8, 0x7fff
+; GFX942-NEXT:  .LBB16_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_lshrrev_b32_sdwa v0, s6, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    v_add_f32_e32 v0, v0, v5
+; GFX942-NEXT:    v_bfe_u32 v2, v0, 16, 1
+; GFX942-NEXT:    v_or_b32_e32 v3, 0x400000, v0
+; GFX942-NEXT:    v_add3_u32 v2, v2, v0, s8
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v2, v3, vcc
+; GFX942-NEXT:    v_lshlrev_b32_sdwa v0, s6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX942-NEXT:    v_and_or_b32 v0, v1, s7, v0
+; GFX942-NEXT:    v_mov_b64_e32 v[2:3], v[0:1]
+; GFX942-NEXT:    buffer_atomic_cmpswap v[2:3], v4, s[0:3], 0 offen sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v1
+; GFX942-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
+; GFX942-NEXT:    v_mov_b32_e32 v1, v2
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[4:5]
+; GFX942-NEXT:    s_cbranch_execnz .LBB16_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[4:5]
+; GFX942-NEXT:    v_lshrrev_b32_e32 v0, s6, v2
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_bf16__offset__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -5202,46 +5202,46 @@ define void @buffer_fat_ptr_agent_atomic_fadd_noret_bf16__offset__amdgpu_no_fine
 ; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s5
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fadd_noret_bf16__offset__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    s_addk_i32 s16, 0x200
-; GFX940-NEXT:    s_and_b32 s4, s16, -4
-; GFX940-NEXT:    v_mov_b32_e32 v2, s4
-; GFX940-NEXT:    buffer_load_dword v1, v2, s[0:3], 0 offen
-; GFX940-NEXT:    s_and_b32 s4, s16, 3
-; GFX940-NEXT:    s_lshl_b32 s6, s4, 3
-; GFX940-NEXT:    s_lshl_b32 s4, 0xffff, s6
-; GFX940-NEXT:    s_not_b32 s7, s4
-; GFX940-NEXT:    s_mov_b64 s[4:5], 0
-; GFX940-NEXT:    v_lshlrev_b32_e32 v3, 16, v0
-; GFX940-NEXT:    s_movk_i32 s8, 0x7fff
-; GFX940-NEXT:  .LBB17_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_lshrrev_b32_sdwa v0, s6, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    v_add_f32_e32 v0, v0, v3
-; GFX940-NEXT:    v_bfe_u32 v4, v0, 16, 1
-; GFX940-NEXT:    v_or_b32_e32 v5, 0x400000, v0
-; GFX940-NEXT:    v_add3_u32 v4, v4, v0, s8
-; GFX940-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v4, v5, vcc
-; GFX940-NEXT:    v_lshlrev_b32_sdwa v0, s6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX940-NEXT:    v_and_or_b32 v0, v1, s7, v0
-; GFX940-NEXT:    v_mov_b64_e32 v[4:5], v[0:1]
-; GFX940-NEXT:    buffer_atomic_cmpswap v[4:5], v2, s[0:3], 0 offen sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v4, v1
-; GFX940-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
-; GFX940-NEXT:    v_mov_b32_e32 v1, v4
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[4:5]
-; GFX940-NEXT:    s_cbranch_execnz .LBB17_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[4:5]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fadd_noret_bf16__offset__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    s_addk_i32 s16, 0x200
+; GFX942-NEXT:    s_and_b32 s4, s16, -4
+; GFX942-NEXT:    v_mov_b32_e32 v2, s4
+; GFX942-NEXT:    buffer_load_dword v1, v2, s[0:3], 0 offen
+; GFX942-NEXT:    s_and_b32 s4, s16, 3
+; GFX942-NEXT:    s_lshl_b32 s6, s4, 3
+; GFX942-NEXT:    s_lshl_b32 s4, 0xffff, s6
+; GFX942-NEXT:    s_not_b32 s7, s4
+; GFX942-NEXT:    s_mov_b64 s[4:5], 0
+; GFX942-NEXT:    v_lshlrev_b32_e32 v3, 16, v0
+; GFX942-NEXT:    s_movk_i32 s8, 0x7fff
+; GFX942-NEXT:  .LBB17_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_lshrrev_b32_sdwa v0, s6, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    v_add_f32_e32 v0, v0, v3
+; GFX942-NEXT:    v_bfe_u32 v4, v0, 16, 1
+; GFX942-NEXT:    v_or_b32_e32 v5, 0x400000, v0
+; GFX942-NEXT:    v_add3_u32 v4, v4, v0, s8
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v5, vcc
+; GFX942-NEXT:    v_lshlrev_b32_sdwa v0, s6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX942-NEXT:    v_and_or_b32 v0, v1, s7, v0
+; GFX942-NEXT:    v_mov_b64_e32 v[4:5], v[0:1]
+; GFX942-NEXT:    buffer_atomic_cmpswap v[4:5], v2, s[0:3], 0 offen sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v4, v1
+; GFX942-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
+; GFX942-NEXT:    v_mov_b32_e32 v1, v4
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[4:5]
+; GFX942-NEXT:    s_cbranch_execnz .LBB17_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[4:5]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fadd_noret_bf16__offset__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -5634,80 +5634,80 @@ define bfloat @buffer_fat_ptr_agent_atomic_fadd_ret_bf16__offset__waterfall__amd
 ; GFX12-NEXT:    v_lshrrev_b32_e32 v0, v7, v4
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_bf16__offset__waterfall__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_add_u32_e32 v4, 0x200, v4
-; GFX940-NEXT:    v_and_b32_e32 v9, -4, v4
-; GFX940-NEXT:    v_and_b32_e32 v4, 3, v4
-; GFX940-NEXT:    v_lshlrev_b32_e32 v8, 3, v4
-; GFX940-NEXT:    s_mov_b32 s0, 0xffff
-; GFX940-NEXT:    v_lshlrev_b32_e64 v4, v8, s0
-; GFX940-NEXT:    v_not_b32_e32 v10, v4
-; GFX940-NEXT:    s_mov_b64 s[2:3], exec
-; GFX940-NEXT:  .LBB18_1: ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    v_readfirstlane_b32 s4, v0
-; GFX940-NEXT:    v_readfirstlane_b32 s5, v1
-; GFX940-NEXT:    v_readfirstlane_b32 s6, v2
-; GFX940-NEXT:    v_readfirstlane_b32 s7, v3
-; GFX940-NEXT:    v_cmp_eq_u64_e32 vcc, s[4:5], v[0:1]
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cmp_eq_u64_e64 s[0:1], s[6:7], v[2:3]
-; GFX940-NEXT:    s_and_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    s_and_saveexec_b64 s[0:1], s[0:1]
-; GFX940-NEXT:    buffer_load_dword v7, v9, s[4:7], 0 offen
-; GFX940-NEXT:    s_xor_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB18_1
-; GFX940-NEXT:  ; %bb.2:
-; GFX940-NEXT:    s_mov_b64 exec, s[2:3]
-; GFX940-NEXT:    s_mov_b64 s[2:3], 0
-; GFX940-NEXT:    v_lshlrev_b32_e32 v11, 16, v5
-; GFX940-NEXT:    s_movk_i32 s10, 0x7fff
-; GFX940-NEXT:  .LBB18_3: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Loop Header: Depth=1
-; GFX940-NEXT:    ; Child Loop BB18_4 Depth 2
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_lshrrev_b32_sdwa v4, v8, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX940-NEXT:    s_mov_b64 s[8:9], exec
-; GFX940-NEXT:    v_add_f32_e32 v4, v4, v11
-; GFX940-NEXT:    v_bfe_u32 v5, v4, 16, 1
-; GFX940-NEXT:    v_add3_u32 v5, v5, v4, s10
-; GFX940-NEXT:    v_or_b32_e32 v6, 0x400000, v4
-; GFX940-NEXT:    v_cmp_u_f32_e32 vcc, v4, v4
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cndmask_b32_e32 v4, v5, v6, vcc
-; GFX940-NEXT:    v_lshlrev_b32_sdwa v4, v8, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX940-NEXT:    v_and_or_b32 v6, v7, v10, v4
-; GFX940-NEXT:    v_mov_b64_e32 v[4:5], v[6:7]
-; GFX940-NEXT:  .LBB18_4: ; Parent Loop BB18_3 Depth=1
-; GFX940-NEXT:    ; => This Inner Loop Header: Depth=2
-; GFX940-NEXT:    v_readfirstlane_b32 s4, v0
-; GFX940-NEXT:    v_readfirstlane_b32 s5, v1
-; GFX940-NEXT:    v_readfirstlane_b32 s6, v2
-; GFX940-NEXT:    v_readfirstlane_b32 s7, v3
-; GFX940-NEXT:    v_cmp_eq_u64_e32 vcc, s[4:5], v[0:1]
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cmp_eq_u64_e64 s[0:1], s[6:7], v[2:3]
-; GFX940-NEXT:    s_and_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    s_and_saveexec_b64 s[0:1], s[0:1]
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_atomic_cmpswap v[4:5], v9, s[4:7], 0 offen sc0
-; GFX940-NEXT:    s_xor_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB18_4
-; GFX940-NEXT:  ; %bb.5: ; in Loop: Header=BB18_3 Depth=1
-; GFX940-NEXT:    s_mov_b64 exec, s[8:9]
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v4, v7
-; GFX940-NEXT:    s_or_b64 s[2:3], vcc, s[2:3]
-; GFX940-NEXT:    v_mov_b32_e32 v7, v4
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[2:3]
-; GFX940-NEXT:    s_cbranch_execnz .LBB18_3
-; GFX940-NEXT:  ; %bb.6: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[2:3]
-; GFX940-NEXT:    v_lshrrev_b32_e32 v0, v8, v4
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_bf16__offset__waterfall__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_add_u32_e32 v4, 0x200, v4
+; GFX942-NEXT:    v_and_b32_e32 v9, -4, v4
+; GFX942-NEXT:    v_and_b32_e32 v4, 3, v4
+; GFX942-NEXT:    v_lshlrev_b32_e32 v8, 3, v4
+; GFX942-NEXT:    s_mov_b32 s0, 0xffff
+; GFX942-NEXT:    v_lshlrev_b32_e64 v4, v8, s0
+; GFX942-NEXT:    v_not_b32_e32 v10, v4
+; GFX942-NEXT:    s_mov_b64 s[2:3], exec
+; GFX942-NEXT:  .LBB18_1: ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    v_readfirstlane_b32 s4, v0
+; GFX942-NEXT:    v_readfirstlane_b32 s5, v1
+; GFX942-NEXT:    v_readfirstlane_b32 s6, v2
+; GFX942-NEXT:    v_readfirstlane_b32 s7, v3
+; GFX942-NEXT:    v_cmp_eq_u64_e32 vcc, s[4:5], v[0:1]
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cmp_eq_u64_e64 s[0:1], s[6:7], v[2:3]
+; GFX942-NEXT:    s_and_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    s_and_saveexec_b64 s[0:1], s[0:1]
+; GFX942-NEXT:    buffer_load_dword v7, v9, s[4:7], 0 offen
+; GFX942-NEXT:    s_xor_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB18_1
+; GFX942-NEXT:  ; %bb.2:
+; GFX942-NEXT:    s_mov_b64 exec, s[2:3]
+; GFX942-NEXT:    s_mov_b64 s[2:3], 0
+; GFX942-NEXT:    v_lshlrev_b32_e32 v11, 16, v5
+; GFX942-NEXT:    s_movk_i32 s10, 0x7fff
+; GFX942-NEXT:  .LBB18_3: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Loop Header: Depth=1
+; GFX942-NEXT:    ; Child Loop BB18_4 Depth 2
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_lshrrev_b32_sdwa v4, v8, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX942-NEXT:    s_mov_b64 s[8:9], exec
+; GFX942-NEXT:    v_add_f32_e32 v4, v4, v11
+; GFX942-NEXT:    v_bfe_u32 v5, v4, 16, 1
+; GFX942-NEXT:    v_add3_u32 v5, v5, v4, s10
+; GFX942-NEXT:    v_or_b32_e32 v6, 0x400000, v4
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v4, v4
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v4, v5, v6, vcc
+; GFX942-NEXT:    v_lshlrev_b32_sdwa v4, v8, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX942-NEXT:    v_and_or_b32 v6, v7, v10, v4
+; GFX942-NEXT:    v_mov_b64_e32 v[4:5], v[6:7]
+; GFX942-NEXT:  .LBB18_4: ; Parent Loop BB18_3 Depth=1
+; GFX942-NEXT:    ; => This Inner Loop Header: Depth=2
+; GFX942-NEXT:    v_readfirstlane_b32 s4, v0
+; GFX942-NEXT:    v_readfirstlane_b32 s5, v1
+; GFX942-NEXT:    v_readfirstlane_b32 s6, v2
+; GFX942-NEXT:    v_readfirstlane_b32 s7, v3
+; GFX942-NEXT:    v_cmp_eq_u64_e32 vcc, s[4:5], v[0:1]
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cmp_eq_u64_e64 s[0:1], s[6:7], v[2:3]
+; GFX942-NEXT:    s_and_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    s_and_saveexec_b64 s[0:1], s[0:1]
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_atomic_cmpswap v[4:5], v9, s[4:7], 0 offen sc0
+; GFX942-NEXT:    s_xor_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB18_4
+; GFX942-NEXT:  ; %bb.5: ; in Loop: Header=BB18_3 Depth=1
+; GFX942-NEXT:    s_mov_b64 exec, s[8:9]
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v4, v7
+; GFX942-NEXT:    s_or_b64 s[2:3], vcc, s[2:3]
+; GFX942-NEXT:    v_mov_b32_e32 v7, v4
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[2:3]
+; GFX942-NEXT:    s_cbranch_execnz .LBB18_3
+; GFX942-NEXT:  ; %bb.6: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[2:3]
+; GFX942-NEXT:    v_lshrrev_b32_e32 v0, v8, v4
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_bf16__offset__waterfall__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -6256,15 +6256,15 @@ define <2 x half> @buffer_fat_ptr_agent_atomic_fadd_ret_v2f16__offset__amdgpu_no
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_v2f16__offset__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v1, s16
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    buffer_atomic_pk_add_f16 v0, v1, s[0:3], 0 offen offset:1024 sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_v2f16__offset__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v1, s16
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    buffer_atomic_pk_add_f16 v0, v1, s[0:3], 0 offen offset:1024 sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_v2f16__offset__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -6505,15 +6505,15 @@ define void @buffer_fat_ptr_agent_atomic_fadd_noret_v2f16__offset__amdgpu_no_fin
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fadd_noret_v2f16__offset__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v1, s16
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    buffer_atomic_pk_add_f16 v0, v1, s[0:3], 0 offen offset:1024
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fadd_noret_v2f16__offset__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v1, s16
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    buffer_atomic_pk_add_f16 v0, v1, s[0:3], 0 offen offset:1024
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fadd_noret_v2f16__offset__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -6756,33 +6756,33 @@ define <2 x half> @buffer_fat_ptr_agent_atomic_fadd_ret_v2f16__offset__waterfall
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_v2f16__offset__waterfall__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    s_mov_b64 s[2:3], exec
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:  .LBB21_1: ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    v_readfirstlane_b32 s4, v0
-; GFX940-NEXT:    v_readfirstlane_b32 s5, v1
-; GFX940-NEXT:    v_readfirstlane_b32 s6, v2
-; GFX940-NEXT:    v_readfirstlane_b32 s7, v3
-; GFX940-NEXT:    v_cmp_eq_u64_e32 vcc, s[4:5], v[0:1]
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cmp_eq_u64_e64 s[0:1], s[6:7], v[2:3]
-; GFX940-NEXT:    s_and_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    s_and_saveexec_b64 s[0:1], s[0:1]
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_atomic_pk_add_f16 v5, v4, s[4:7], 0 offen offset:1024 sc0
-; GFX940-NEXT:    ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
-; GFX940-NEXT:    ; implicit-def: $vgpr4
-; GFX940-NEXT:    s_xor_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB21_1
-; GFX940-NEXT:  ; %bb.2:
-; GFX940-NEXT:    s_mov_b64 exec, s[2:3]
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v0, v5
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_v2f16__offset__waterfall__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    s_mov_b64 s[2:3], exec
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:  .LBB21_1: ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    v_readfirstlane_b32 s4, v0
+; GFX942-NEXT:    v_readfirstlane_b32 s5, v1
+; GFX942-NEXT:    v_readfirstlane_b32 s6, v2
+; GFX942-NEXT:    v_readfirstlane_b32 s7, v3
+; GFX942-NEXT:    v_cmp_eq_u64_e32 vcc, s[4:5], v[0:1]
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cmp_eq_u64_e64 s[0:1], s[6:7], v[2:3]
+; GFX942-NEXT:    s_and_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    s_and_saveexec_b64 s[0:1], s[0:1]
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_atomic_pk_add_f16 v5, v4, s[4:7], 0 offen offset:1024 sc0
+; GFX942-NEXT:    ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
+; GFX942-NEXT:    ; implicit-def: $vgpr4
+; GFX942-NEXT:    s_xor_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB21_1
+; GFX942-NEXT:  ; %bb.2:
+; GFX942-NEXT:    s_mov_b64 exec, s[2:3]
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v0, v5
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_v2f16__offset__waterfall__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -7235,15 +7235,15 @@ define <2 x half> @buffer_fat_ptr_agent_atomic_fadd_ret_v2f16__offset(ptr addrsp
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_v2f16__offset:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v1, s16
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    buffer_atomic_pk_add_f16 v0, v1, s[0:3], 0 offen offset:1024 sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_v2f16__offset:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v1, s16
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    buffer_atomic_pk_add_f16 v0, v1, s[0:3], 0 offen offset:1024 sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_v2f16__offset:
 ; GFX11:       ; %bb.0:
@@ -7501,15 +7501,15 @@ define void @buffer_fat_ptr_agent_atomic_fadd_noret_v2f16__offset(ptr addrspace(
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fadd_noret_v2f16__offset:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v1, s16
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    buffer_atomic_pk_add_f16 v0, v1, s[0:3], 0 offen offset:1024
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fadd_noret_v2f16__offset:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v1, s16
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    buffer_atomic_pk_add_f16 v0, v1, s[0:3], 0 offen offset:1024
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fadd_noret_v2f16__offset:
 ; GFX11:       ; %bb.0:
@@ -7764,15 +7764,15 @@ define <2 x half> @buffer_fat_ptr_agent_atomic_fadd_ret_v2f16__offset__amdgpu_no
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_v2f16__offset__amdgpu_no_remote_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v1, s16
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    buffer_atomic_pk_add_f16 v0, v1, s[0:3], 0 offen offset:1024 sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_v2f16__offset__amdgpu_no_remote_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v1, s16
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    buffer_atomic_pk_add_f16 v0, v1, s[0:3], 0 offen offset:1024 sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_v2f16__offset__amdgpu_no_remote_memory:
 ; GFX11:       ; %bb.0:
@@ -8030,15 +8030,15 @@ define void @buffer_fat_ptr_agent_atomic_fadd_noret_v2f16__offset__amdgpu_no_rem
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fadd_noret_v2f16__offset__amdgpu_no_remote_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v1, s16
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    buffer_atomic_pk_add_f16 v0, v1, s[0:3], 0 offen offset:1024
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fadd_noret_v2f16__offset__amdgpu_no_remote_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v1, s16
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    buffer_atomic_pk_add_f16 v0, v1, s[0:3], 0 offen offset:1024
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fadd_noret_v2f16__offset__amdgpu_no_remote_memory:
 ; GFX11:       ; %bb.0:
@@ -8297,50 +8297,50 @@ define <2 x bfloat> @buffer_fat_ptr_agent_atomic_fadd_ret_v2bf16__offset__amdgpu
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_v2bf16__offset__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v1, v0
-; GFX940-NEXT:    v_mov_b32_e32 v0, s16
-; GFX940-NEXT:    buffer_load_dword v0, v0, s[0:3], 0 offen offset:1024
-; GFX940-NEXT:    s_add_i32 s4, s16, 0x400
-; GFX940-NEXT:    s_mov_b64 s[6:7], 0
-; GFX940-NEXT:    v_lshlrev_b32_e32 v2, 16, v1
-; GFX940-NEXT:    s_movk_i32 s8, 0x7fff
-; GFX940-NEXT:    v_and_b32_e32 v3, 0xffff0000, v1
-; GFX940-NEXT:    s_mov_b32 s9, 0x7060302
-; GFX940-NEXT:    v_mov_b32_e32 v4, s4
-; GFX940-NEXT:  .LBB26_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v7, v0
-; GFX940-NEXT:    v_lshlrev_b32_e32 v0, 16, v7
-; GFX940-NEXT:    v_and_b32_e32 v1, 0xffff0000, v7
-; GFX940-NEXT:    v_add_f32_e32 v0, v0, v2
-; GFX940-NEXT:    v_add_f32_e32 v1, v1, v3
-; GFX940-NEXT:    v_bfe_u32 v5, v0, 16, 1
-; GFX940-NEXT:    v_bfe_u32 v8, v1, 16, 1
-; GFX940-NEXT:    v_or_b32_e32 v6, 0x400000, v0
-; GFX940-NEXT:    v_or_b32_e32 v9, 0x400000, v1
-; GFX940-NEXT:    v_add3_u32 v5, v5, v0, s8
-; GFX940-NEXT:    v_add3_u32 v8, v8, v1, s8
-; GFX940-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
-; GFX940-NEXT:    v_cmp_u_f32_e64 s[4:5], v0, v0
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    v_cndmask_b32_e32 v1, v8, v9, vcc
-; GFX940-NEXT:    v_cndmask_b32_e64 v0, v5, v6, s[4:5]
-; GFX940-NEXT:    v_perm_b32 v6, v1, v0, s9
-; GFX940-NEXT:    v_mov_b64_e32 v[0:1], v[6:7]
-; GFX940-NEXT:    buffer_atomic_cmpswap v[0:1], v4, s[0:3], 0 offen sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v0, v7
-; GFX940-NEXT:    s_or_b64 s[6:7], vcc, s[6:7]
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[6:7]
-; GFX940-NEXT:    s_cbranch_execnz .LBB26_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[6:7]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_v2bf16__offset__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v1, v0
+; GFX942-NEXT:    v_mov_b32_e32 v0, s16
+; GFX942-NEXT:    buffer_load_dword v0, v0, s[0:3], 0 offen offset:1024
+; GFX942-NEXT:    s_add_i32 s4, s16, 0x400
+; GFX942-NEXT:    s_mov_b64 s[6:7], 0
+; GFX942-NEXT:    v_lshlrev_b32_e32 v2, 16, v1
+; GFX942-NEXT:    s_movk_i32 s8, 0x7fff
+; GFX942-NEXT:    v_and_b32_e32 v3, 0xffff0000, v1
+; GFX942-NEXT:    s_mov_b32 s9, 0x7060302
+; GFX942-NEXT:    v_mov_b32_e32 v4, s4
+; GFX942-NEXT:  .LBB26_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v7, v0
+; GFX942-NEXT:    v_lshlrev_b32_e32 v0, 16, v7
+; GFX942-NEXT:    v_and_b32_e32 v1, 0xffff0000, v7
+; GFX942-NEXT:    v_add_f32_e32 v0, v0, v2
+; GFX942-NEXT:    v_add_f32_e32 v1, v1, v3
+; GFX942-NEXT:    v_bfe_u32 v5, v0, 16, 1
+; GFX942-NEXT:    v_bfe_u32 v8, v1, 16, 1
+; GFX942-NEXT:    v_or_b32_e32 v6, 0x400000, v0
+; GFX942-NEXT:    v_or_b32_e32 v9, 0x400000, v1
+; GFX942-NEXT:    v_add3_u32 v5, v5, v0, s8
+; GFX942-NEXT:    v_add3_u32 v8, v8, v1, s8
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX942-NEXT:    v_cmp_u_f32_e64 s[4:5], v0, v0
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v8, v9, vcc
+; GFX942-NEXT:    v_cndmask_b32_e64 v0, v5, v6, s[4:5]
+; GFX942-NEXT:    v_perm_b32 v6, v1, v0, s9
+; GFX942-NEXT:    v_mov_b64_e32 v[0:1], v[6:7]
+; GFX942-NEXT:    buffer_atomic_cmpswap v[0:1], v4, s[0:3], 0 offen sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v0, v7
+; GFX942-NEXT:    s_or_b64 s[6:7], vcc, s[6:7]
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[6:7]
+; GFX942-NEXT:    s_cbranch_execnz .LBB26_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[6:7]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_v2bf16__offset__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -8680,49 +8680,49 @@ define void @buffer_fat_ptr_agent_atomic_fadd_noret_v2bf16__offset__amdgpu_no_fi
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fadd_noret_v2bf16__offset__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v1, s16
-; GFX940-NEXT:    buffer_load_dword v1, v1, s[0:3], 0 offen offset:1024
-; GFX940-NEXT:    s_add_i32 s4, s16, 0x400
-; GFX940-NEXT:    s_mov_b64 s[6:7], 0
-; GFX940-NEXT:    v_lshlrev_b32_e32 v2, 16, v0
-; GFX940-NEXT:    s_movk_i32 s8, 0x7fff
-; GFX940-NEXT:    v_and_b32_e32 v3, 0xffff0000, v0
-; GFX940-NEXT:    s_mov_b32 s9, 0x7060302
-; GFX940-NEXT:    v_mov_b32_e32 v4, s4
-; GFX940-NEXT:  .LBB27_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_lshlrev_b32_e32 v0, 16, v1
-; GFX940-NEXT:    v_and_b32_e32 v5, 0xffff0000, v1
-; GFX940-NEXT:    v_add_f32_e32 v0, v0, v2
-; GFX940-NEXT:    v_add_f32_e32 v5, v5, v3
-; GFX940-NEXT:    v_bfe_u32 v6, v0, 16, 1
-; GFX940-NEXT:    v_bfe_u32 v8, v5, 16, 1
-; GFX940-NEXT:    v_or_b32_e32 v7, 0x400000, v0
-; GFX940-NEXT:    v_or_b32_e32 v9, 0x400000, v5
-; GFX940-NEXT:    v_add3_u32 v6, v6, v0, s8
-; GFX940-NEXT:    v_add3_u32 v8, v8, v5, s8
-; GFX940-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
-; GFX940-NEXT:    v_cmp_u_f32_e64 s[4:5], v0, v0
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    v_cndmask_b32_e32 v5, v8, v9, vcc
-; GFX940-NEXT:    v_cndmask_b32_e64 v0, v6, v7, s[4:5]
-; GFX940-NEXT:    v_perm_b32 v0, v5, v0, s9
-; GFX940-NEXT:    v_mov_b64_e32 v[6:7], v[0:1]
-; GFX940-NEXT:    buffer_atomic_cmpswap v[6:7], v4, s[0:3], 0 offen sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v6, v1
-; GFX940-NEXT:    s_or_b64 s[6:7], vcc, s[6:7]
-; GFX940-NEXT:    v_mov_b32_e32 v1, v6
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[6:7]
-; GFX940-NEXT:    s_cbranch_execnz .LBB27_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[6:7]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fadd_noret_v2bf16__offset__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v1, s16
+; GFX942-NEXT:    buffer_load_dword v1, v1, s[0:3], 0 offen offset:1024
+; GFX942-NEXT:    s_add_i32 s4, s16, 0x400
+; GFX942-NEXT:    s_mov_b64 s[6:7], 0
+; GFX942-NEXT:    v_lshlrev_b32_e32 v2, 16, v0
+; GFX942-NEXT:    s_movk_i32 s8, 0x7fff
+; GFX942-NEXT:    v_and_b32_e32 v3, 0xffff0000, v0
+; GFX942-NEXT:    s_mov_b32 s9, 0x7060302
+; GFX942-NEXT:    v_mov_b32_e32 v4, s4
+; GFX942-NEXT:  .LBB27_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_lshlrev_b32_e32 v0, 16, v1
+; GFX942-NEXT:    v_and_b32_e32 v5, 0xffff0000, v1
+; GFX942-NEXT:    v_add_f32_e32 v0, v0, v2
+; GFX942-NEXT:    v_add_f32_e32 v5, v5, v3
+; GFX942-NEXT:    v_bfe_u32 v6, v0, 16, 1
+; GFX942-NEXT:    v_bfe_u32 v8, v5, 16, 1
+; GFX942-NEXT:    v_or_b32_e32 v7, 0x400000, v0
+; GFX942-NEXT:    v_or_b32_e32 v9, 0x400000, v5
+; GFX942-NEXT:    v_add3_u32 v6, v6, v0, s8
+; GFX942-NEXT:    v_add3_u32 v8, v8, v5, s8
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
+; GFX942-NEXT:    v_cmp_u_f32_e64 s[4:5], v0, v0
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    v_cndmask_b32_e32 v5, v8, v9, vcc
+; GFX942-NEXT:    v_cndmask_b32_e64 v0, v6, v7, s[4:5]
+; GFX942-NEXT:    v_perm_b32 v0, v5, v0, s9
+; GFX942-NEXT:    v_mov_b64_e32 v[6:7], v[0:1]
+; GFX942-NEXT:    buffer_atomic_cmpswap v[6:7], v4, s[0:3], 0 offen sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v6, v1
+; GFX942-NEXT:    s_or_b64 s[6:7], vcc, s[6:7]
+; GFX942-NEXT:    v_mov_b32_e32 v1, v6
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[6:7]
+; GFX942-NEXT:    s_cbranch_execnz .LBB27_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[6:7]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fadd_noret_v2bf16__offset__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -9075,83 +9075,83 @@ define <2 x bfloat> @buffer_fat_ptr_agent_atomic_fadd_ret_v2bf16__offset__waterf
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_v2bf16__offset__waterfall__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_add_u32_e32 v8, 0x400, v4
-; GFX940-NEXT:    s_mov_b64 s[2:3], exec
-; GFX940-NEXT:  .LBB28_1: ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    v_readfirstlane_b32 s4, v0
-; GFX940-NEXT:    v_readfirstlane_b32 s5, v1
-; GFX940-NEXT:    v_readfirstlane_b32 s6, v2
-; GFX940-NEXT:    v_readfirstlane_b32 s7, v3
-; GFX940-NEXT:    v_cmp_eq_u64_e32 vcc, s[4:5], v[0:1]
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cmp_eq_u64_e64 s[0:1], s[6:7], v[2:3]
-; GFX940-NEXT:    s_and_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    s_and_saveexec_b64 s[0:1], s[0:1]
-; GFX940-NEXT:    buffer_load_dword v7, v4, s[4:7], 0 offen offset:1024
-; GFX940-NEXT:    ; implicit-def: $vgpr4
-; GFX940-NEXT:    s_xor_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB28_1
-; GFX940-NEXT:  ; %bb.2:
-; GFX940-NEXT:    s_mov_b64 exec, s[2:3]
-; GFX940-NEXT:    s_mov_b64 s[2:3], 0
-; GFX940-NEXT:    v_lshlrev_b32_e32 v9, 16, v5
-; GFX940-NEXT:    s_movk_i32 s10, 0x7fff
-; GFX940-NEXT:    v_and_b32_e32 v10, 0xffff0000, v5
-; GFX940-NEXT:    s_mov_b32 s11, 0x7060302
-; GFX940-NEXT:  .LBB28_3: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Loop Header: Depth=1
-; GFX940-NEXT:    ; Child Loop BB28_4 Depth 2
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_lshlrev_b32_e32 v4, 16, v7
-; GFX940-NEXT:    v_add_f32_e32 v4, v4, v9
-; GFX940-NEXT:    v_bfe_u32 v5, v4, 16, 1
-; GFX940-NEXT:    v_add3_u32 v5, v5, v4, s10
-; GFX940-NEXT:    v_or_b32_e32 v6, 0x400000, v4
-; GFX940-NEXT:    v_cmp_u_f32_e32 vcc, v4, v4
-; GFX940-NEXT:    s_mov_b64 s[8:9], exec
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    v_cndmask_b32_e32 v4, v5, v6, vcc
-; GFX940-NEXT:    v_and_b32_e32 v5, 0xffff0000, v7
-; GFX940-NEXT:    v_add_f32_e32 v5, v5, v10
-; GFX940-NEXT:    v_bfe_u32 v6, v5, 16, 1
-; GFX940-NEXT:    v_add3_u32 v6, v6, v5, s10
-; GFX940-NEXT:    v_or_b32_e32 v11, 0x400000, v5
-; GFX940-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v5, v6, v11, vcc
-; GFX940-NEXT:    v_perm_b32 v6, v5, v4, s11
-; GFX940-NEXT:    v_mov_b64_e32 v[4:5], v[6:7]
-; GFX940-NEXT:  .LBB28_4: ; Parent Loop BB28_3 Depth=1
-; GFX940-NEXT:    ; => This Inner Loop Header: Depth=2
-; GFX940-NEXT:    v_readfirstlane_b32 s4, v0
-; GFX940-NEXT:    v_readfirstlane_b32 s5, v1
-; GFX940-NEXT:    v_readfirstlane_b32 s6, v2
-; GFX940-NEXT:    v_readfirstlane_b32 s7, v3
-; GFX940-NEXT:    v_cmp_eq_u64_e32 vcc, s[4:5], v[0:1]
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cmp_eq_u64_e64 s[0:1], s[6:7], v[2:3]
-; GFX940-NEXT:    s_and_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    s_and_saveexec_b64 s[0:1], s[0:1]
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_atomic_cmpswap v[4:5], v8, s[4:7], 0 offen sc0
-; GFX940-NEXT:    s_xor_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB28_4
-; GFX940-NEXT:  ; %bb.5: ; in Loop: Header=BB28_3 Depth=1
-; GFX940-NEXT:    s_mov_b64 exec, s[8:9]
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v4, v7
-; GFX940-NEXT:    s_or_b64 s[2:3], vcc, s[2:3]
-; GFX940-NEXT:    v_mov_b32_e32 v7, v4
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[2:3]
-; GFX940-NEXT:    s_cbranch_execnz .LBB28_3
-; GFX940-NEXT:  ; %bb.6: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[2:3]
-; GFX940-NEXT:    v_mov_b32_e32 v0, v4
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_v2bf16__offset__waterfall__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_add_u32_e32 v8, 0x400, v4
+; GFX942-NEXT:    s_mov_b64 s[2:3], exec
+; GFX942-NEXT:  .LBB28_1: ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    v_readfirstlane_b32 s4, v0
+; GFX942-NEXT:    v_readfirstlane_b32 s5, v1
+; GFX942-NEXT:    v_readfirstlane_b32 s6, v2
+; GFX942-NEXT:    v_readfirstlane_b32 s7, v3
+; GFX942-NEXT:    v_cmp_eq_u64_e32 vcc, s[4:5], v[0:1]
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cmp_eq_u64_e64 s[0:1], s[6:7], v[2:3]
+; GFX942-NEXT:    s_and_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    s_and_saveexec_b64 s[0:1], s[0:1]
+; GFX942-NEXT:    buffer_load_dword v7, v4, s[4:7], 0 offen offset:1024
+; GFX942-NEXT:    ; implicit-def: $vgpr4
+; GFX942-NEXT:    s_xor_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB28_1
+; GFX942-NEXT:  ; %bb.2:
+; GFX942-NEXT:    s_mov_b64 exec, s[2:3]
+; GFX942-NEXT:    s_mov_b64 s[2:3], 0
+; GFX942-NEXT:    v_lshlrev_b32_e32 v9, 16, v5
+; GFX942-NEXT:    s_movk_i32 s10, 0x7fff
+; GFX942-NEXT:    v_and_b32_e32 v10, 0xffff0000, v5
+; GFX942-NEXT:    s_mov_b32 s11, 0x7060302
+; GFX942-NEXT:  .LBB28_3: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Loop Header: Depth=1
+; GFX942-NEXT:    ; Child Loop BB28_4 Depth 2
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_lshlrev_b32_e32 v4, 16, v7
+; GFX942-NEXT:    v_add_f32_e32 v4, v4, v9
+; GFX942-NEXT:    v_bfe_u32 v5, v4, 16, 1
+; GFX942-NEXT:    v_add3_u32 v5, v5, v4, s10
+; GFX942-NEXT:    v_or_b32_e32 v6, 0x400000, v4
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v4, v4
+; GFX942-NEXT:    s_mov_b64 s[8:9], exec
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    v_cndmask_b32_e32 v4, v5, v6, vcc
+; GFX942-NEXT:    v_and_b32_e32 v5, 0xffff0000, v7
+; GFX942-NEXT:    v_add_f32_e32 v5, v5, v10
+; GFX942-NEXT:    v_bfe_u32 v6, v5, 16, 1
+; GFX942-NEXT:    v_add3_u32 v6, v6, v5, s10
+; GFX942-NEXT:    v_or_b32_e32 v11, 0x400000, v5
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v5, v6, v11, vcc
+; GFX942-NEXT:    v_perm_b32 v6, v5, v4, s11
+; GFX942-NEXT:    v_mov_b64_e32 v[4:5], v[6:7]
+; GFX942-NEXT:  .LBB28_4: ; Parent Loop BB28_3 Depth=1
+; GFX942-NEXT:    ; => This Inner Loop Header: Depth=2
+; GFX942-NEXT:    v_readfirstlane_b32 s4, v0
+; GFX942-NEXT:    v_readfirstlane_b32 s5, v1
+; GFX942-NEXT:    v_readfirstlane_b32 s6, v2
+; GFX942-NEXT:    v_readfirstlane_b32 s7, v3
+; GFX942-NEXT:    v_cmp_eq_u64_e32 vcc, s[4:5], v[0:1]
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cmp_eq_u64_e64 s[0:1], s[6:7], v[2:3]
+; GFX942-NEXT:    s_and_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    s_and_saveexec_b64 s[0:1], s[0:1]
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_atomic_cmpswap v[4:5], v8, s[4:7], 0 offen sc0
+; GFX942-NEXT:    s_xor_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB28_4
+; GFX942-NEXT:  ; %bb.5: ; in Loop: Header=BB28_3 Depth=1
+; GFX942-NEXT:    s_mov_b64 exec, s[8:9]
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v4, v7
+; GFX942-NEXT:    s_or_b64 s[2:3], vcc, s[2:3]
+; GFX942-NEXT:    v_mov_b32_e32 v7, v4
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[2:3]
+; GFX942-NEXT:    s_cbranch_execnz .LBB28_3
+; GFX942-NEXT:  ; %bb.6: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[2:3]
+; GFX942-NEXT:    v_mov_b32_e32 v0, v4
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_v2bf16__offset__waterfall__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -9713,50 +9713,50 @@ define <2 x bfloat> @buffer_fat_ptr_agent_atomic_fadd_ret_v2bf16__offset(ptr add
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_v2bf16__offset:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v1, v0
-; GFX940-NEXT:    v_mov_b32_e32 v0, s16
-; GFX940-NEXT:    buffer_load_dword v0, v0, s[0:3], 0 offen offset:1024
-; GFX940-NEXT:    s_add_i32 s4, s16, 0x400
-; GFX940-NEXT:    s_mov_b64 s[6:7], 0
-; GFX940-NEXT:    v_lshlrev_b32_e32 v2, 16, v1
-; GFX940-NEXT:    s_movk_i32 s8, 0x7fff
-; GFX940-NEXT:    v_and_b32_e32 v3, 0xffff0000, v1
-; GFX940-NEXT:    s_mov_b32 s9, 0x7060302
-; GFX940-NEXT:    v_mov_b32_e32 v4, s4
-; GFX940-NEXT:  .LBB29_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v7, v0
-; GFX940-NEXT:    v_lshlrev_b32_e32 v0, 16, v7
-; GFX940-NEXT:    v_and_b32_e32 v1, 0xffff0000, v7
-; GFX940-NEXT:    v_add_f32_e32 v0, v0, v2
-; GFX940-NEXT:    v_add_f32_e32 v1, v1, v3
-; GFX940-NEXT:    v_bfe_u32 v5, v0, 16, 1
-; GFX940-NEXT:    v_bfe_u32 v8, v1, 16, 1
-; GFX940-NEXT:    v_or_b32_e32 v6, 0x400000, v0
-; GFX940-NEXT:    v_or_b32_e32 v9, 0x400000, v1
-; GFX940-NEXT:    v_add3_u32 v5, v5, v0, s8
-; GFX940-NEXT:    v_add3_u32 v8, v8, v1, s8
-; GFX940-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
-; GFX940-NEXT:    v_cmp_u_f32_e64 s[4:5], v0, v0
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    v_cndmask_b32_e32 v1, v8, v9, vcc
-; GFX940-NEXT:    v_cndmask_b32_e64 v0, v5, v6, s[4:5]
-; GFX940-NEXT:    v_perm_b32 v6, v1, v0, s9
-; GFX940-NEXT:    v_mov_b64_e32 v[0:1], v[6:7]
-; GFX940-NEXT:    buffer_atomic_cmpswap v[0:1], v4, s[0:3], 0 offen sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v0, v7
-; GFX940-NEXT:    s_or_b64 s[6:7], vcc, s[6:7]
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[6:7]
-; GFX940-NEXT:    s_cbranch_execnz .LBB29_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[6:7]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_v2bf16__offset:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v1, v0
+; GFX942-NEXT:    v_mov_b32_e32 v0, s16
+; GFX942-NEXT:    buffer_load_dword v0, v0, s[0:3], 0 offen offset:1024
+; GFX942-NEXT:    s_add_i32 s4, s16, 0x400
+; GFX942-NEXT:    s_mov_b64 s[6:7], 0
+; GFX942-NEXT:    v_lshlrev_b32_e32 v2, 16, v1
+; GFX942-NEXT:    s_movk_i32 s8, 0x7fff
+; GFX942-NEXT:    v_and_b32_e32 v3, 0xffff0000, v1
+; GFX942-NEXT:    s_mov_b32 s9, 0x7060302
+; GFX942-NEXT:    v_mov_b32_e32 v4, s4
+; GFX942-NEXT:  .LBB29_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v7, v0
+; GFX942-NEXT:    v_lshlrev_b32_e32 v0, 16, v7
+; GFX942-NEXT:    v_and_b32_e32 v1, 0xffff0000, v7
+; GFX942-NEXT:    v_add_f32_e32 v0, v0, v2
+; GFX942-NEXT:    v_add_f32_e32 v1, v1, v3
+; GFX942-NEXT:    v_bfe_u32 v5, v0, 16, 1
+; GFX942-NEXT:    v_bfe_u32 v8, v1, 16, 1
+; GFX942-NEXT:    v_or_b32_e32 v6, 0x400000, v0
+; GFX942-NEXT:    v_or_b32_e32 v9, 0x400000, v1
+; GFX942-NEXT:    v_add3_u32 v5, v5, v0, s8
+; GFX942-NEXT:    v_add3_u32 v8, v8, v1, s8
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX942-NEXT:    v_cmp_u_f32_e64 s[4:5], v0, v0
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v8, v9, vcc
+; GFX942-NEXT:    v_cndmask_b32_e64 v0, v5, v6, s[4:5]
+; GFX942-NEXT:    v_perm_b32 v6, v1, v0, s9
+; GFX942-NEXT:    v_mov_b64_e32 v[0:1], v[6:7]
+; GFX942-NEXT:    buffer_atomic_cmpswap v[0:1], v4, s[0:3], 0 offen sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v0, v7
+; GFX942-NEXT:    s_or_b64 s[6:7], vcc, s[6:7]
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[6:7]
+; GFX942-NEXT:    s_cbranch_execnz .LBB29_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[6:7]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_v2bf16__offset:
 ; GFX11:       ; %bb.0:
@@ -10096,49 +10096,49 @@ define void @buffer_fat_ptr_agent_atomic_fadd_noret_v2bf16__offset(ptr addrspace
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fadd_noret_v2bf16__offset:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v1, s16
-; GFX940-NEXT:    buffer_load_dword v1, v1, s[0:3], 0 offen offset:1024
-; GFX940-NEXT:    s_add_i32 s4, s16, 0x400
-; GFX940-NEXT:    s_mov_b64 s[6:7], 0
-; GFX940-NEXT:    v_lshlrev_b32_e32 v2, 16, v0
-; GFX940-NEXT:    s_movk_i32 s8, 0x7fff
-; GFX940-NEXT:    v_and_b32_e32 v3, 0xffff0000, v0
-; GFX940-NEXT:    s_mov_b32 s9, 0x7060302
-; GFX940-NEXT:    v_mov_b32_e32 v4, s4
-; GFX940-NEXT:  .LBB30_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_lshlrev_b32_e32 v0, 16, v1
-; GFX940-NEXT:    v_and_b32_e32 v5, 0xffff0000, v1
-; GFX940-NEXT:    v_add_f32_e32 v0, v0, v2
-; GFX940-NEXT:    v_add_f32_e32 v5, v5, v3
-; GFX940-NEXT:    v_bfe_u32 v6, v0, 16, 1
-; GFX940-NEXT:    v_bfe_u32 v8, v5, 16, 1
-; GFX940-NEXT:    v_or_b32_e32 v7, 0x400000, v0
-; GFX940-NEXT:    v_or_b32_e32 v9, 0x400000, v5
-; GFX940-NEXT:    v_add3_u32 v6, v6, v0, s8
-; GFX940-NEXT:    v_add3_u32 v8, v8, v5, s8
-; GFX940-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
-; GFX940-NEXT:    v_cmp_u_f32_e64 s[4:5], v0, v0
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    v_cndmask_b32_e32 v5, v8, v9, vcc
-; GFX940-NEXT:    v_cndmask_b32_e64 v0, v6, v7, s[4:5]
-; GFX940-NEXT:    v_perm_b32 v0, v5, v0, s9
-; GFX940-NEXT:    v_mov_b64_e32 v[6:7], v[0:1]
-; GFX940-NEXT:    buffer_atomic_cmpswap v[6:7], v4, s[0:3], 0 offen sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v6, v1
-; GFX940-NEXT:    s_or_b64 s[6:7], vcc, s[6:7]
-; GFX940-NEXT:    v_mov_b32_e32 v1, v6
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[6:7]
-; GFX940-NEXT:    s_cbranch_execnz .LBB30_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[6:7]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fadd_noret_v2bf16__offset:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v1, s16
+; GFX942-NEXT:    buffer_load_dword v1, v1, s[0:3], 0 offen offset:1024
+; GFX942-NEXT:    s_add_i32 s4, s16, 0x400
+; GFX942-NEXT:    s_mov_b64 s[6:7], 0
+; GFX942-NEXT:    v_lshlrev_b32_e32 v2, 16, v0
+; GFX942-NEXT:    s_movk_i32 s8, 0x7fff
+; GFX942-NEXT:    v_and_b32_e32 v3, 0xffff0000, v0
+; GFX942-NEXT:    s_mov_b32 s9, 0x7060302
+; GFX942-NEXT:    v_mov_b32_e32 v4, s4
+; GFX942-NEXT:  .LBB30_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_lshlrev_b32_e32 v0, 16, v1
+; GFX942-NEXT:    v_and_b32_e32 v5, 0xffff0000, v1
+; GFX942-NEXT:    v_add_f32_e32 v0, v0, v2
+; GFX942-NEXT:    v_add_f32_e32 v5, v5, v3
+; GFX942-NEXT:    v_bfe_u32 v6, v0, 16, 1
+; GFX942-NEXT:    v_bfe_u32 v8, v5, 16, 1
+; GFX942-NEXT:    v_or_b32_e32 v7, 0x400000, v0
+; GFX942-NEXT:    v_or_b32_e32 v9, 0x400000, v5
+; GFX942-NEXT:    v_add3_u32 v6, v6, v0, s8
+; GFX942-NEXT:    v_add3_u32 v8, v8, v5, s8
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
+; GFX942-NEXT:    v_cmp_u_f32_e64 s[4:5], v0, v0
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    v_cndmask_b32_e32 v5, v8, v9, vcc
+; GFX942-NEXT:    v_cndmask_b32_e64 v0, v6, v7, s[4:5]
+; GFX942-NEXT:    v_perm_b32 v0, v5, v0, s9
+; GFX942-NEXT:    v_mov_b64_e32 v[6:7], v[0:1]
+; GFX942-NEXT:    buffer_atomic_cmpswap v[6:7], v4, s[0:3], 0 offen sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v6, v1
+; GFX942-NEXT:    s_or_b64 s[6:7], vcc, s[6:7]
+; GFX942-NEXT:    v_mov_b32_e32 v1, v6
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[6:7]
+; GFX942-NEXT:    s_cbranch_execnz .LBB30_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[6:7]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fadd_noret_v2bf16__offset:
 ; GFX11:       ; %bb.0:
@@ -10470,50 +10470,50 @@ define <2 x bfloat> @buffer_fat_ptr_agent_atomic_fadd_ret_v2bf16__offset__amdgpu
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_v2bf16__offset__amdgpu_no_remote_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v1, v0
-; GFX940-NEXT:    v_mov_b32_e32 v0, s16
-; GFX940-NEXT:    buffer_load_dword v0, v0, s[0:3], 0 offen offset:1024
-; GFX940-NEXT:    s_add_i32 s4, s16, 0x400
-; GFX940-NEXT:    s_mov_b64 s[6:7], 0
-; GFX940-NEXT:    v_lshlrev_b32_e32 v2, 16, v1
-; GFX940-NEXT:    s_movk_i32 s8, 0x7fff
-; GFX940-NEXT:    v_and_b32_e32 v3, 0xffff0000, v1
-; GFX940-NEXT:    s_mov_b32 s9, 0x7060302
-; GFX940-NEXT:    v_mov_b32_e32 v4, s4
-; GFX940-NEXT:  .LBB31_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v7, v0
-; GFX940-NEXT:    v_lshlrev_b32_e32 v0, 16, v7
-; GFX940-NEXT:    v_and_b32_e32 v1, 0xffff0000, v7
-; GFX940-NEXT:    v_add_f32_e32 v0, v0, v2
-; GFX940-NEXT:    v_add_f32_e32 v1, v1, v3
-; GFX940-NEXT:    v_bfe_u32 v5, v0, 16, 1
-; GFX940-NEXT:    v_bfe_u32 v8, v1, 16, 1
-; GFX940-NEXT:    v_or_b32_e32 v6, 0x400000, v0
-; GFX940-NEXT:    v_or_b32_e32 v9, 0x400000, v1
-; GFX940-NEXT:    v_add3_u32 v5, v5, v0, s8
-; GFX940-NEXT:    v_add3_u32 v8, v8, v1, s8
-; GFX940-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
-; GFX940-NEXT:    v_cmp_u_f32_e64 s[4:5], v0, v0
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    v_cndmask_b32_e32 v1, v8, v9, vcc
-; GFX940-NEXT:    v_cndmask_b32_e64 v0, v5, v6, s[4:5]
-; GFX940-NEXT:    v_perm_b32 v6, v1, v0, s9
-; GFX940-NEXT:    v_mov_b64_e32 v[0:1], v[6:7]
-; GFX940-NEXT:    buffer_atomic_cmpswap v[0:1], v4, s[0:3], 0 offen sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v0, v7
-; GFX940-NEXT:    s_or_b64 s[6:7], vcc, s[6:7]
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[6:7]
-; GFX940-NEXT:    s_cbranch_execnz .LBB31_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[6:7]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_v2bf16__offset__amdgpu_no_remote_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v1, v0
+; GFX942-NEXT:    v_mov_b32_e32 v0, s16
+; GFX942-NEXT:    buffer_load_dword v0, v0, s[0:3], 0 offen offset:1024
+; GFX942-NEXT:    s_add_i32 s4, s16, 0x400
+; GFX942-NEXT:    s_mov_b64 s[6:7], 0
+; GFX942-NEXT:    v_lshlrev_b32_e32 v2, 16, v1
+; GFX942-NEXT:    s_movk_i32 s8, 0x7fff
+; GFX942-NEXT:    v_and_b32_e32 v3, 0xffff0000, v1
+; GFX942-NEXT:    s_mov_b32 s9, 0x7060302
+; GFX942-NEXT:    v_mov_b32_e32 v4, s4
+; GFX942-NEXT:  .LBB31_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v7, v0
+; GFX942-NEXT:    v_lshlrev_b32_e32 v0, 16, v7
+; GFX942-NEXT:    v_and_b32_e32 v1, 0xffff0000, v7
+; GFX942-NEXT:    v_add_f32_e32 v0, v0, v2
+; GFX942-NEXT:    v_add_f32_e32 v1, v1, v3
+; GFX942-NEXT:    v_bfe_u32 v5, v0, 16, 1
+; GFX942-NEXT:    v_bfe_u32 v8, v1, 16, 1
+; GFX942-NEXT:    v_or_b32_e32 v6, 0x400000, v0
+; GFX942-NEXT:    v_or_b32_e32 v9, 0x400000, v1
+; GFX942-NEXT:    v_add3_u32 v5, v5, v0, s8
+; GFX942-NEXT:    v_add3_u32 v8, v8, v1, s8
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX942-NEXT:    v_cmp_u_f32_e64 s[4:5], v0, v0
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v8, v9, vcc
+; GFX942-NEXT:    v_cndmask_b32_e64 v0, v5, v6, s[4:5]
+; GFX942-NEXT:    v_perm_b32 v6, v1, v0, s9
+; GFX942-NEXT:    v_mov_b64_e32 v[0:1], v[6:7]
+; GFX942-NEXT:    buffer_atomic_cmpswap v[0:1], v4, s[0:3], 0 offen sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v0, v7
+; GFX942-NEXT:    s_or_b64 s[6:7], vcc, s[6:7]
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[6:7]
+; GFX942-NEXT:    s_cbranch_execnz .LBB31_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[6:7]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_v2bf16__offset__amdgpu_no_remote_memory:
 ; GFX11:       ; %bb.0:
@@ -10853,49 +10853,49 @@ define void @buffer_fat_ptr_agent_atomic_fadd_noret_v2bf16__offset__amdgpu_no_re
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fadd_noret_v2bf16__offset__amdgpu_no_remote_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v1, s16
-; GFX940-NEXT:    buffer_load_dword v1, v1, s[0:3], 0 offen offset:1024
-; GFX940-NEXT:    s_add_i32 s4, s16, 0x400
-; GFX940-NEXT:    s_mov_b64 s[6:7], 0
-; GFX940-NEXT:    v_lshlrev_b32_e32 v2, 16, v0
-; GFX940-NEXT:    s_movk_i32 s8, 0x7fff
-; GFX940-NEXT:    v_and_b32_e32 v3, 0xffff0000, v0
-; GFX940-NEXT:    s_mov_b32 s9, 0x7060302
-; GFX940-NEXT:    v_mov_b32_e32 v4, s4
-; GFX940-NEXT:  .LBB32_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_lshlrev_b32_e32 v0, 16, v1
-; GFX940-NEXT:    v_and_b32_e32 v5, 0xffff0000, v1
-; GFX940-NEXT:    v_add_f32_e32 v0, v0, v2
-; GFX940-NEXT:    v_add_f32_e32 v5, v5, v3
-; GFX940-NEXT:    v_bfe_u32 v6, v0, 16, 1
-; GFX940-NEXT:    v_bfe_u32 v8, v5, 16, 1
-; GFX940-NEXT:    v_or_b32_e32 v7, 0x400000, v0
-; GFX940-NEXT:    v_or_b32_e32 v9, 0x400000, v5
-; GFX940-NEXT:    v_add3_u32 v6, v6, v0, s8
-; GFX940-NEXT:    v_add3_u32 v8, v8, v5, s8
-; GFX940-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
-; GFX940-NEXT:    v_cmp_u_f32_e64 s[4:5], v0, v0
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    v_cndmask_b32_e32 v5, v8, v9, vcc
-; GFX940-NEXT:    v_cndmask_b32_e64 v0, v6, v7, s[4:5]
-; GFX940-NEXT:    v_perm_b32 v0, v5, v0, s9
-; GFX940-NEXT:    v_mov_b64_e32 v[6:7], v[0:1]
-; GFX940-NEXT:    buffer_atomic_cmpswap v[6:7], v4, s[0:3], 0 offen sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v6, v1
-; GFX940-NEXT:    s_or_b64 s[6:7], vcc, s[6:7]
-; GFX940-NEXT:    v_mov_b32_e32 v1, v6
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[6:7]
-; GFX940-NEXT:    s_cbranch_execnz .LBB32_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[6:7]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fadd_noret_v2bf16__offset__amdgpu_no_remote_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v1, s16
+; GFX942-NEXT:    buffer_load_dword v1, v1, s[0:3], 0 offen offset:1024
+; GFX942-NEXT:    s_add_i32 s4, s16, 0x400
+; GFX942-NEXT:    s_mov_b64 s[6:7], 0
+; GFX942-NEXT:    v_lshlrev_b32_e32 v2, 16, v0
+; GFX942-NEXT:    s_movk_i32 s8, 0x7fff
+; GFX942-NEXT:    v_and_b32_e32 v3, 0xffff0000, v0
+; GFX942-NEXT:    s_mov_b32 s9, 0x7060302
+; GFX942-NEXT:    v_mov_b32_e32 v4, s4
+; GFX942-NEXT:  .LBB32_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_lshlrev_b32_e32 v0, 16, v1
+; GFX942-NEXT:    v_and_b32_e32 v5, 0xffff0000, v1
+; GFX942-NEXT:    v_add_f32_e32 v0, v0, v2
+; GFX942-NEXT:    v_add_f32_e32 v5, v5, v3
+; GFX942-NEXT:    v_bfe_u32 v6, v0, 16, 1
+; GFX942-NEXT:    v_bfe_u32 v8, v5, 16, 1
+; GFX942-NEXT:    v_or_b32_e32 v7, 0x400000, v0
+; GFX942-NEXT:    v_or_b32_e32 v9, 0x400000, v5
+; GFX942-NEXT:    v_add3_u32 v6, v6, v0, s8
+; GFX942-NEXT:    v_add3_u32 v8, v8, v5, s8
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
+; GFX942-NEXT:    v_cmp_u_f32_e64 s[4:5], v0, v0
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    v_cndmask_b32_e32 v5, v8, v9, vcc
+; GFX942-NEXT:    v_cndmask_b32_e64 v0, v6, v7, s[4:5]
+; GFX942-NEXT:    v_perm_b32 v0, v5, v0, s9
+; GFX942-NEXT:    v_mov_b64_e32 v[6:7], v[0:1]
+; GFX942-NEXT:    buffer_atomic_cmpswap v[6:7], v4, s[0:3], 0 offen sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v6, v1
+; GFX942-NEXT:    s_or_b64 s[6:7], vcc, s[6:7]
+; GFX942-NEXT:    v_mov_b32_e32 v1, v6
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[6:7]
+; GFX942-NEXT:    s_cbranch_execnz .LBB32_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[6:7]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fadd_noret_v2bf16__offset__amdgpu_no_remote_memory:
 ; GFX11:       ; %bb.0:
@@ -11227,49 +11227,49 @@ define void @buffer_fat_ptr_agent_atomic_fadd_noret_v2bf16__offset__amdgpu_no_fi
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fadd_noret_v2bf16__offset__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v1, s16
-; GFX940-NEXT:    buffer_load_dword v1, v1, s[0:3], 0 offen offset:1024
-; GFX940-NEXT:    s_add_i32 s4, s16, 0x400
-; GFX940-NEXT:    s_mov_b64 s[6:7], 0
-; GFX940-NEXT:    v_lshlrev_b32_e32 v2, 16, v0
-; GFX940-NEXT:    s_movk_i32 s8, 0x7fff
-; GFX940-NEXT:    v_and_b32_e32 v3, 0xffff0000, v0
-; GFX940-NEXT:    s_mov_b32 s9, 0x7060302
-; GFX940-NEXT:    v_mov_b32_e32 v4, s4
-; GFX940-NEXT:  .LBB33_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_lshlrev_b32_e32 v0, 16, v1
-; GFX940-NEXT:    v_and_b32_e32 v5, 0xffff0000, v1
-; GFX940-NEXT:    v_add_f32_e32 v0, v0, v2
-; GFX940-NEXT:    v_add_f32_e32 v5, v5, v3
-; GFX940-NEXT:    v_bfe_u32 v6, v0, 16, 1
-; GFX940-NEXT:    v_bfe_u32 v8, v5, 16, 1
-; GFX940-NEXT:    v_or_b32_e32 v7, 0x400000, v0
-; GFX940-NEXT:    v_or_b32_e32 v9, 0x400000, v5
-; GFX940-NEXT:    v_add3_u32 v6, v6, v0, s8
-; GFX940-NEXT:    v_add3_u32 v8, v8, v5, s8
-; GFX940-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
-; GFX940-NEXT:    v_cmp_u_f32_e64 s[4:5], v0, v0
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    v_cndmask_b32_e32 v5, v8, v9, vcc
-; GFX940-NEXT:    v_cndmask_b32_e64 v0, v6, v7, s[4:5]
-; GFX940-NEXT:    v_perm_b32 v0, v5, v0, s9
-; GFX940-NEXT:    v_mov_b64_e32 v[6:7], v[0:1]
-; GFX940-NEXT:    buffer_atomic_cmpswap v[6:7], v4, s[0:3], 0 offen sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v6, v1
-; GFX940-NEXT:    s_or_b64 s[6:7], vcc, s[6:7]
-; GFX940-NEXT:    v_mov_b32_e32 v1, v6
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[6:7]
-; GFX940-NEXT:    s_cbranch_execnz .LBB33_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[6:7]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fadd_noret_v2bf16__offset__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v1, s16
+; GFX942-NEXT:    buffer_load_dword v1, v1, s[0:3], 0 offen offset:1024
+; GFX942-NEXT:    s_add_i32 s4, s16, 0x400
+; GFX942-NEXT:    s_mov_b64 s[6:7], 0
+; GFX942-NEXT:    v_lshlrev_b32_e32 v2, 16, v0
+; GFX942-NEXT:    s_movk_i32 s8, 0x7fff
+; GFX942-NEXT:    v_and_b32_e32 v3, 0xffff0000, v0
+; GFX942-NEXT:    s_mov_b32 s9, 0x7060302
+; GFX942-NEXT:    v_mov_b32_e32 v4, s4
+; GFX942-NEXT:  .LBB33_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_lshlrev_b32_e32 v0, 16, v1
+; GFX942-NEXT:    v_and_b32_e32 v5, 0xffff0000, v1
+; GFX942-NEXT:    v_add_f32_e32 v0, v0, v2
+; GFX942-NEXT:    v_add_f32_e32 v5, v5, v3
+; GFX942-NEXT:    v_bfe_u32 v6, v0, 16, 1
+; GFX942-NEXT:    v_bfe_u32 v8, v5, 16, 1
+; GFX942-NEXT:    v_or_b32_e32 v7, 0x400000, v0
+; GFX942-NEXT:    v_or_b32_e32 v9, 0x400000, v5
+; GFX942-NEXT:    v_add3_u32 v6, v6, v0, s8
+; GFX942-NEXT:    v_add3_u32 v8, v8, v5, s8
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
+; GFX942-NEXT:    v_cmp_u_f32_e64 s[4:5], v0, v0
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    v_cndmask_b32_e32 v5, v8, v9, vcc
+; GFX942-NEXT:    v_cndmask_b32_e64 v0, v6, v7, s[4:5]
+; GFX942-NEXT:    v_perm_b32 v0, v5, v0, s9
+; GFX942-NEXT:    v_mov_b64_e32 v[6:7], v[0:1]
+; GFX942-NEXT:    buffer_atomic_cmpswap v[6:7], v4, s[0:3], 0 offen sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v6, v1
+; GFX942-NEXT:    s_or_b64 s[6:7], vcc, s[6:7]
+; GFX942-NEXT:    v_mov_b32_e32 v1, v6
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[6:7]
+; GFX942-NEXT:    s_cbranch_execnz .LBB33_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[6:7]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fadd_noret_v2bf16__offset__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory:
 ; GFX11:       ; %bb.0:
@@ -11606,15 +11606,15 @@ define float @buffer_fat_ptr_system_atomic_fadd_ret_f32__offset__amdgpu_no_fine_
 ; GFX12-NEXT:    global_inv scope:SCOPE_SYS
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: buffer_fat_ptr_system_atomic_fadd_ret_f32__offset__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v1, s16
-; GFX940-NEXT:    buffer_wbl2 sc0 sc1
-; GFX940-NEXT:    buffer_atomic_add_f32 v0, v1, s[0:3], 0 offen offset:1024 sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc0 sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: buffer_fat_ptr_system_atomic_fadd_ret_f32__offset__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v1, s16
+; GFX942-NEXT:    buffer_wbl2 sc0 sc1
+; GFX942-NEXT:    buffer_atomic_add_f32 v0, v1, s[0:3], 0 offen offset:1024 sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc0 sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: buffer_fat_ptr_system_atomic_fadd_ret_f32__offset__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
diff --git llvm/test/CodeGen/AMDGPU/buffer-fat-pointer-atomicrmw-fmax.ll llvm/test/CodeGen/AMDGPU/buffer-fat-pointer-atomicrmw-fmax.ll
index 7f06d169a6b1..38adf60888ec 100644
--- llvm/test/CodeGen/AMDGPU/buffer-fat-pointer-atomicrmw-fmax.ll
+++ llvm/test/CodeGen/AMDGPU/buffer-fat-pointer-atomicrmw-fmax.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
 ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 < %s | FileCheck -check-prefix=GFX12 %s
-; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx940 < %s | FileCheck -check-prefix=GFX940 %s
+; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx942 < %s | FileCheck -check-prefix=GFX942 %s
 ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 < %s | FileCheck -check-prefix=GFX11 %s
 ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s
 ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx90a < %s | FileCheck -check-prefix=GFX90A %s
@@ -28,34 +28,34 @@ define float @buffer_fat_ptr_agent_atomic_fmax_ret_f32__offset__amdgpu_no_fine_g
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_f32__offset__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v1, v0
-; GFX940-NEXT:    v_mov_b32_e32 v0, s16
-; GFX940-NEXT:    buffer_load_dword v0, v0, s[0:3], 0 offen offset:1024
-; GFX940-NEXT:    s_add_i32 s6, s16, 0x400
-; GFX940-NEXT:    s_mov_b64 s[4:5], 0
-; GFX940-NEXT:    v_max_f32_e32 v2, v1, v1
-; GFX940-NEXT:    v_mov_b32_e32 v3, s6
-; GFX940-NEXT:  .LBB0_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v5, v0
-; GFX940-NEXT:    v_max_f32_e32 v0, v5, v5
-; GFX940-NEXT:    v_max_f32_e32 v4, v0, v2
-; GFX940-NEXT:    v_mov_b64_e32 v[0:1], v[4:5]
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    buffer_atomic_cmpswap v[0:1], v3, s[0:3], 0 offen sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v0, v5
-; GFX940-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[4:5]
-; GFX940-NEXT:    s_cbranch_execnz .LBB0_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[4:5]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_f32__offset__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v1, v0
+; GFX942-NEXT:    v_mov_b32_e32 v0, s16
+; GFX942-NEXT:    buffer_load_dword v0, v0, s[0:3], 0 offen offset:1024
+; GFX942-NEXT:    s_add_i32 s6, s16, 0x400
+; GFX942-NEXT:    s_mov_b64 s[4:5], 0
+; GFX942-NEXT:    v_max_f32_e32 v2, v1, v1
+; GFX942-NEXT:    v_mov_b32_e32 v3, s6
+; GFX942-NEXT:  .LBB0_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v5, v0
+; GFX942-NEXT:    v_max_f32_e32 v0, v5, v5
+; GFX942-NEXT:    v_max_f32_e32 v4, v0, v2
+; GFX942-NEXT:    v_mov_b64_e32 v[0:1], v[4:5]
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    buffer_atomic_cmpswap v[0:1], v3, s[0:3], 0 offen sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v0, v5
+; GFX942-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[4:5]
+; GFX942-NEXT:    s_cbranch_execnz .LBB0_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[4:5]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_f32__offset__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -203,33 +203,33 @@ define void @buffer_fat_ptr_agent_atomic_fmax_noret_f32__offset__amdgpu_no_fine_
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fmax_noret_f32__offset__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v1, s16
-; GFX940-NEXT:    buffer_load_dword v1, v1, s[0:3], 0 offen offset:1024
-; GFX940-NEXT:    s_add_i32 s6, s16, 0x400
-; GFX940-NEXT:    s_mov_b64 s[4:5], 0
-; GFX940-NEXT:    v_max_f32_e32 v2, v0, v0
-; GFX940-NEXT:    v_mov_b32_e32 v3, s6
-; GFX940-NEXT:  .LBB1_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_max_f32_e32 v0, v1, v1
-; GFX940-NEXT:    v_max_f32_e32 v0, v0, v2
-; GFX940-NEXT:    v_mov_b64_e32 v[4:5], v[0:1]
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    buffer_atomic_cmpswap v[4:5], v3, s[0:3], 0 offen sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v4, v1
-; GFX940-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
-; GFX940-NEXT:    v_mov_b32_e32 v1, v4
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[4:5]
-; GFX940-NEXT:    s_cbranch_execnz .LBB1_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[4:5]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmax_noret_f32__offset__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v1, s16
+; GFX942-NEXT:    buffer_load_dword v1, v1, s[0:3], 0 offen offset:1024
+; GFX942-NEXT:    s_add_i32 s6, s16, 0x400
+; GFX942-NEXT:    s_mov_b64 s[4:5], 0
+; GFX942-NEXT:    v_max_f32_e32 v2, v0, v0
+; GFX942-NEXT:    v_mov_b32_e32 v3, s6
+; GFX942-NEXT:  .LBB1_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_max_f32_e32 v0, v1, v1
+; GFX942-NEXT:    v_max_f32_e32 v0, v0, v2
+; GFX942-NEXT:    v_mov_b64_e32 v[4:5], v[0:1]
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    buffer_atomic_cmpswap v[4:5], v3, s[0:3], 0 offen sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v4, v1
+; GFX942-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
+; GFX942-NEXT:    v_mov_b32_e32 v1, v4
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[4:5]
+; GFX942-NEXT:    s_cbranch_execnz .LBB1_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[4:5]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fmax_noret_f32__offset__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -395,66 +395,66 @@ define float @buffer_fat_ptr_agent_atomic_fmax_ret_f32__offset__waterfall__amdgp
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_f32__offset__waterfall__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_add_u32_e32 v8, 0x400, v4
-; GFX940-NEXT:    s_mov_b64 s[2:3], exec
-; GFX940-NEXT:  .LBB2_1: ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    v_readfirstlane_b32 s4, v0
-; GFX940-NEXT:    v_readfirstlane_b32 s5, v1
-; GFX940-NEXT:    v_readfirstlane_b32 s6, v2
-; GFX940-NEXT:    v_readfirstlane_b32 s7, v3
-; GFX940-NEXT:    v_cmp_eq_u64_e32 vcc, s[4:5], v[0:1]
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cmp_eq_u64_e64 s[0:1], s[6:7], v[2:3]
-; GFX940-NEXT:    s_and_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    s_and_saveexec_b64 s[0:1], s[0:1]
-; GFX940-NEXT:    buffer_load_dword v7, v4, s[4:7], 0 offen offset:1024
-; GFX940-NEXT:    ; implicit-def: $vgpr4
-; GFX940-NEXT:    s_xor_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB2_1
-; GFX940-NEXT:  ; %bb.2:
-; GFX940-NEXT:    s_mov_b64 exec, s[2:3]
-; GFX940-NEXT:    s_mov_b64 s[2:3], 0
-; GFX940-NEXT:    v_max_f32_e32 v9, v5, v5
-; GFX940-NEXT:  .LBB2_3: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Loop Header: Depth=1
-; GFX940-NEXT:    ; Child Loop BB2_4 Depth 2
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_max_f32_e32 v4, v7, v7
-; GFX940-NEXT:    v_max_f32_e32 v6, v4, v9
-; GFX940-NEXT:    s_mov_b64 s[8:9], exec
-; GFX940-NEXT:    v_mov_b64_e32 v[4:5], v[6:7]
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:  .LBB2_4: ; Parent Loop BB2_3 Depth=1
-; GFX940-NEXT:    ; => This Inner Loop Header: Depth=2
-; GFX940-NEXT:    v_readfirstlane_b32 s4, v0
-; GFX940-NEXT:    v_readfirstlane_b32 s5, v1
-; GFX940-NEXT:    v_readfirstlane_b32 s6, v2
-; GFX940-NEXT:    v_readfirstlane_b32 s7, v3
-; GFX940-NEXT:    v_cmp_eq_u64_e32 vcc, s[4:5], v[0:1]
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cmp_eq_u64_e64 s[0:1], s[6:7], v[2:3]
-; GFX940-NEXT:    s_and_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    s_and_saveexec_b64 s[0:1], s[0:1]
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_atomic_cmpswap v[4:5], v8, s[4:7], 0 offen sc0
-; GFX940-NEXT:    s_xor_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB2_4
-; GFX940-NEXT:  ; %bb.5: ; in Loop: Header=BB2_3 Depth=1
-; GFX940-NEXT:    s_mov_b64 exec, s[8:9]
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v4, v7
-; GFX940-NEXT:    s_or_b64 s[2:3], vcc, s[2:3]
-; GFX940-NEXT:    v_mov_b32_e32 v7, v4
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[2:3]
-; GFX940-NEXT:    s_cbranch_execnz .LBB2_3
-; GFX940-NEXT:  ; %bb.6: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[2:3]
-; GFX940-NEXT:    v_mov_b32_e32 v0, v4
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_f32__offset__waterfall__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_add_u32_e32 v8, 0x400, v4
+; GFX942-NEXT:    s_mov_b64 s[2:3], exec
+; GFX942-NEXT:  .LBB2_1: ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    v_readfirstlane_b32 s4, v0
+; GFX942-NEXT:    v_readfirstlane_b32 s5, v1
+; GFX942-NEXT:    v_readfirstlane_b32 s6, v2
+; GFX942-NEXT:    v_readfirstlane_b32 s7, v3
+; GFX942-NEXT:    v_cmp_eq_u64_e32 vcc, s[4:5], v[0:1]
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cmp_eq_u64_e64 s[0:1], s[6:7], v[2:3]
+; GFX942-NEXT:    s_and_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    s_and_saveexec_b64 s[0:1], s[0:1]
+; GFX942-NEXT:    buffer_load_dword v7, v4, s[4:7], 0 offen offset:1024
+; GFX942-NEXT:    ; implicit-def: $vgpr4
+; GFX942-NEXT:    s_xor_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB2_1
+; GFX942-NEXT:  ; %bb.2:
+; GFX942-NEXT:    s_mov_b64 exec, s[2:3]
+; GFX942-NEXT:    s_mov_b64 s[2:3], 0
+; GFX942-NEXT:    v_max_f32_e32 v9, v5, v5
+; GFX942-NEXT:  .LBB2_3: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Loop Header: Depth=1
+; GFX942-NEXT:    ; Child Loop BB2_4 Depth 2
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_max_f32_e32 v4, v7, v7
+; GFX942-NEXT:    v_max_f32_e32 v6, v4, v9
+; GFX942-NEXT:    s_mov_b64 s[8:9], exec
+; GFX942-NEXT:    v_mov_b64_e32 v[4:5], v[6:7]
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:  .LBB2_4: ; Parent Loop BB2_3 Depth=1
+; GFX942-NEXT:    ; => This Inner Loop Header: Depth=2
+; GFX942-NEXT:    v_readfirstlane_b32 s4, v0
+; GFX942-NEXT:    v_readfirstlane_b32 s5, v1
+; GFX942-NEXT:    v_readfirstlane_b32 s6, v2
+; GFX942-NEXT:    v_readfirstlane_b32 s7, v3
+; GFX942-NEXT:    v_cmp_eq_u64_e32 vcc, s[4:5], v[0:1]
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cmp_eq_u64_e64 s[0:1], s[6:7], v[2:3]
+; GFX942-NEXT:    s_and_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    s_and_saveexec_b64 s[0:1], s[0:1]
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_atomic_cmpswap v[4:5], v8, s[4:7], 0 offen sc0
+; GFX942-NEXT:    s_xor_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB2_4
+; GFX942-NEXT:  ; %bb.5: ; in Loop: Header=BB2_3 Depth=1
+; GFX942-NEXT:    s_mov_b64 exec, s[8:9]
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v4, v7
+; GFX942-NEXT:    s_or_b64 s[2:3], vcc, s[2:3]
+; GFX942-NEXT:    v_mov_b32_e32 v7, v4
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[2:3]
+; GFX942-NEXT:    s_cbranch_execnz .LBB2_3
+; GFX942-NEXT:  ; %bb.6: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[2:3]
+; GFX942-NEXT:    v_mov_b32_e32 v0, v4
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_f32__offset__waterfall__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -766,34 +766,34 @@ define float @buffer_fat_ptr_agent_atomic_fmax_ret_f32__offset__amdgpu_no_remote
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_f32__offset__amdgpu_no_remote_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v1, v0
-; GFX940-NEXT:    v_mov_b32_e32 v0, s16
-; GFX940-NEXT:    buffer_load_dword v0, v0, s[0:3], 0 offen offset:1024
-; GFX940-NEXT:    s_add_i32 s6, s16, 0x400
-; GFX940-NEXT:    s_mov_b64 s[4:5], 0
-; GFX940-NEXT:    v_max_f32_e32 v2, v1, v1
-; GFX940-NEXT:    v_mov_b32_e32 v3, s6
-; GFX940-NEXT:  .LBB3_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v5, v0
-; GFX940-NEXT:    v_max_f32_e32 v0, v5, v5
-; GFX940-NEXT:    v_max_f32_e32 v4, v0, v2
-; GFX940-NEXT:    v_mov_b64_e32 v[0:1], v[4:5]
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    buffer_atomic_cmpswap v[0:1], v3, s[0:3], 0 offen sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v0, v5
-; GFX940-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[4:5]
-; GFX940-NEXT:    s_cbranch_execnz .LBB3_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[4:5]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_f32__offset__amdgpu_no_remote_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v1, v0
+; GFX942-NEXT:    v_mov_b32_e32 v0, s16
+; GFX942-NEXT:    buffer_load_dword v0, v0, s[0:3], 0 offen offset:1024
+; GFX942-NEXT:    s_add_i32 s6, s16, 0x400
+; GFX942-NEXT:    s_mov_b64 s[4:5], 0
+; GFX942-NEXT:    v_max_f32_e32 v2, v1, v1
+; GFX942-NEXT:    v_mov_b32_e32 v3, s6
+; GFX942-NEXT:  .LBB3_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v5, v0
+; GFX942-NEXT:    v_max_f32_e32 v0, v5, v5
+; GFX942-NEXT:    v_max_f32_e32 v4, v0, v2
+; GFX942-NEXT:    v_mov_b64_e32 v[0:1], v[4:5]
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    buffer_atomic_cmpswap v[0:1], v3, s[0:3], 0 offen sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v0, v5
+; GFX942-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[4:5]
+; GFX942-NEXT:    s_cbranch_execnz .LBB3_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[4:5]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_f32__offset__amdgpu_no_remote_memory:
 ; GFX11:       ; %bb.0:
@@ -1023,34 +1023,34 @@ define float @buffer_fat_ptr_agent_atomic_fmax_ret_f32__offset__amdgpu_no_fine_g
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_f32__offset__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v1, v0
-; GFX940-NEXT:    v_mov_b32_e32 v0, s16
-; GFX940-NEXT:    buffer_load_dword v0, v0, s[0:3], 0 offen offset:1024
-; GFX940-NEXT:    s_add_i32 s6, s16, 0x400
-; GFX940-NEXT:    s_mov_b64 s[4:5], 0
-; GFX940-NEXT:    v_max_f32_e32 v2, v1, v1
-; GFX940-NEXT:    v_mov_b32_e32 v3, s6
-; GFX940-NEXT:  .LBB4_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v5, v0
-; GFX940-NEXT:    v_max_f32_e32 v0, v5, v5
-; GFX940-NEXT:    v_max_f32_e32 v4, v0, v2
-; GFX940-NEXT:    v_mov_b64_e32 v[0:1], v[4:5]
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    buffer_atomic_cmpswap v[0:1], v3, s[0:3], 0 offen sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v0, v5
-; GFX940-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[4:5]
-; GFX940-NEXT:    s_cbranch_execnz .LBB4_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[4:5]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_f32__offset__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v1, v0
+; GFX942-NEXT:    v_mov_b32_e32 v0, s16
+; GFX942-NEXT:    buffer_load_dword v0, v0, s[0:3], 0 offen offset:1024
+; GFX942-NEXT:    s_add_i32 s6, s16, 0x400
+; GFX942-NEXT:    s_mov_b64 s[4:5], 0
+; GFX942-NEXT:    v_max_f32_e32 v2, v1, v1
+; GFX942-NEXT:    v_mov_b32_e32 v3, s6
+; GFX942-NEXT:  .LBB4_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v5, v0
+; GFX942-NEXT:    v_max_f32_e32 v0, v5, v5
+; GFX942-NEXT:    v_max_f32_e32 v4, v0, v2
+; GFX942-NEXT:    v_mov_b64_e32 v[0:1], v[4:5]
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    buffer_atomic_cmpswap v[0:1], v3, s[0:3], 0 offen sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v0, v5
+; GFX942-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[4:5]
+; GFX942-NEXT:    s_cbranch_execnz .LBB4_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[4:5]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_f32__offset__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory:
 ; GFX11:       ; %bb.0:
@@ -1226,15 +1226,15 @@ define double @buffer_fat_ptr_agent_atomic_fmax_ret_f64__offset__amdgpu_no_fine_
 ; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s4
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_f64__offset__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v2, s16
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 offen offset:2048 sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_f64__offset__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v2, s16
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 offen offset:2048 sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_f64__offset__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -1417,15 +1417,15 @@ define void @buffer_fat_ptr_agent_atomic_fmax_noret_f64__offset__amdgpu_no_fine_
 ; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s4
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fmax_noret_f64__offset__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v2, s16
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 offen offset:2048
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmax_noret_f64__offset__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v2, s16
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 offen offset:2048
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fmax_noret_f64__offset__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -1643,36 +1643,36 @@ define double @buffer_fat_ptr_agent_atomic_fmax_ret_f64__offset__waterfall__amdg
 ; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s1
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_f64__offset__waterfall__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v7, v6
-; GFX940-NEXT:    v_mov_b32_e32 v6, v5
-; GFX940-NEXT:    s_mov_b64 s[2:3], exec
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:  .LBB7_1: ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    v_readfirstlane_b32 s4, v0
-; GFX940-NEXT:    v_readfirstlane_b32 s5, v1
-; GFX940-NEXT:    v_readfirstlane_b32 s6, v2
-; GFX940-NEXT:    v_readfirstlane_b32 s7, v3
-; GFX940-NEXT:    v_cmp_eq_u64_e32 vcc, s[4:5], v[0:1]
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cmp_eq_u64_e64 s[0:1], s[6:7], v[2:3]
-; GFX940-NEXT:    s_and_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    s_and_saveexec_b64 s[0:1], s[0:1]
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_atomic_max_f64 v[6:7], v4, s[4:7], 0 offen offset:2048 sc0
-; GFX940-NEXT:    ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
-; GFX940-NEXT:    ; implicit-def: $vgpr4
-; GFX940-NEXT:    s_xor_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB7_1
-; GFX940-NEXT:  ; %bb.2:
-; GFX940-NEXT:    s_mov_b64 exec, s[2:3]
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v0, v6
-; GFX940-NEXT:    v_mov_b32_e32 v1, v7
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_f64__offset__waterfall__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v7, v6
+; GFX942-NEXT:    v_mov_b32_e32 v6, v5
+; GFX942-NEXT:    s_mov_b64 s[2:3], exec
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:  .LBB7_1: ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    v_readfirstlane_b32 s4, v0
+; GFX942-NEXT:    v_readfirstlane_b32 s5, v1
+; GFX942-NEXT:    v_readfirstlane_b32 s6, v2
+; GFX942-NEXT:    v_readfirstlane_b32 s7, v3
+; GFX942-NEXT:    v_cmp_eq_u64_e32 vcc, s[4:5], v[0:1]
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cmp_eq_u64_e64 s[0:1], s[6:7], v[2:3]
+; GFX942-NEXT:    s_and_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    s_and_saveexec_b64 s[0:1], s[0:1]
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_atomic_max_f64 v[6:7], v4, s[4:7], 0 offen offset:2048 sc0
+; GFX942-NEXT:    ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
+; GFX942-NEXT:    ; implicit-def: $vgpr4
+; GFX942-NEXT:    s_xor_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB7_1
+; GFX942-NEXT:  ; %bb.2:
+; GFX942-NEXT:    s_mov_b64 exec, s[2:3]
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v0, v6
+; GFX942-NEXT:    v_mov_b32_e32 v1, v7
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_f64__offset__waterfall__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -2033,15 +2033,15 @@ define double @buffer_fat_ptr_agent_atomic_fmax_ret_f64__offset__amdgpu_no_remot
 ; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s4
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_f64__offset__amdgpu_no_remote_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v2, s16
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 offen offset:2048 sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_f64__offset__amdgpu_no_remote_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v2, s16
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 offen offset:2048 sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_f64__offset__amdgpu_no_remote_memory:
 ; GFX11:       ; %bb.0:
@@ -2320,15 +2320,15 @@ define double @buffer_fat_ptr_agent_atomic_fmax_ret_f64__offset__amdgpu_no_fine_
 ; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s4
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_f64__offset__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v2, s16
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 offen offset:2048 sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_f64__offset__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v2, s16
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 offen offset:2048 sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_f64__offset__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory:
 ; GFX11:       ; %bb.0:
@@ -2529,41 +2529,41 @@ define half @buffer_fat_ptr_agent_atomic_fmax_ret_f16__offset__amdgpu_no_fine_gr
 ; GFX12-NEXT:    v_lshrrev_b32_e32 v0, s4, v2
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_f16__offset__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    s_addk_i32 s16, 0x200
-; GFX940-NEXT:    s_and_b32 s4, s16, -4
-; GFX940-NEXT:    v_mov_b32_e32 v4, s4
-; GFX940-NEXT:    buffer_load_dword v1, v4, s[0:3], 0 offen
-; GFX940-NEXT:    s_and_b32 s4, s16, 3
-; GFX940-NEXT:    s_lshl_b32 s6, s4, 3
-; GFX940-NEXT:    s_lshl_b32 s4, 0xffff, s6
-; GFX940-NEXT:    s_not_b32 s7, s4
-; GFX940-NEXT:    s_mov_b64 s[4:5], 0
-; GFX940-NEXT:    v_max_f16_e32 v5, v0, v0
-; GFX940-NEXT:  .LBB10_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_lshrrev_b32_e32 v0, s6, v1
-; GFX940-NEXT:    v_max_f16_e32 v0, v0, v0
-; GFX940-NEXT:    v_max_f16_e32 v0, v0, v5
-; GFX940-NEXT:    v_lshlrev_b32_e32 v0, s6, v0
-; GFX940-NEXT:    v_and_or_b32 v0, v1, s7, v0
-; GFX940-NEXT:    v_mov_b64_e32 v[2:3], v[0:1]
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    buffer_atomic_cmpswap v[2:3], v4, s[0:3], 0 offen sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v1
-; GFX940-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
-; GFX940-NEXT:    v_mov_b32_e32 v1, v2
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[4:5]
-; GFX940-NEXT:    s_cbranch_execnz .LBB10_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[4:5]
-; GFX940-NEXT:    v_lshrrev_b32_e32 v0, s6, v2
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_f16__offset__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    s_addk_i32 s16, 0x200
+; GFX942-NEXT:    s_and_b32 s4, s16, -4
+; GFX942-NEXT:    v_mov_b32_e32 v4, s4
+; GFX942-NEXT:    buffer_load_dword v1, v4, s[0:3], 0 offen
+; GFX942-NEXT:    s_and_b32 s4, s16, 3
+; GFX942-NEXT:    s_lshl_b32 s6, s4, 3
+; GFX942-NEXT:    s_lshl_b32 s4, 0xffff, s6
+; GFX942-NEXT:    s_not_b32 s7, s4
+; GFX942-NEXT:    s_mov_b64 s[4:5], 0
+; GFX942-NEXT:    v_max_f16_e32 v5, v0, v0
+; GFX942-NEXT:  .LBB10_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_lshrrev_b32_e32 v0, s6, v1
+; GFX942-NEXT:    v_max_f16_e32 v0, v0, v0
+; GFX942-NEXT:    v_max_f16_e32 v0, v0, v5
+; GFX942-NEXT:    v_lshlrev_b32_e32 v0, s6, v0
+; GFX942-NEXT:    v_and_or_b32 v0, v1, s7, v0
+; GFX942-NEXT:    v_mov_b64_e32 v[2:3], v[0:1]
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    buffer_atomic_cmpswap v[2:3], v4, s[0:3], 0 offen sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v1
+; GFX942-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
+; GFX942-NEXT:    v_mov_b32_e32 v1, v2
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[4:5]
+; GFX942-NEXT:    s_cbranch_execnz .LBB10_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[4:5]
+; GFX942-NEXT:    v_lshrrev_b32_e32 v0, s6, v2
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_f16__offset__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -2892,40 +2892,40 @@ define void @buffer_fat_ptr_agent_atomic_fmax_noret_f16__offset__amdgpu_no_fine_
 ; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s5
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fmax_noret_f16__offset__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    s_addk_i32 s16, 0x200
-; GFX940-NEXT:    s_and_b32 s4, s16, -4
-; GFX940-NEXT:    v_mov_b32_e32 v2, s4
-; GFX940-NEXT:    buffer_load_dword v1, v2, s[0:3], 0 offen
-; GFX940-NEXT:    s_and_b32 s4, s16, 3
-; GFX940-NEXT:    s_lshl_b32 s6, s4, 3
-; GFX940-NEXT:    s_lshl_b32 s4, 0xffff, s6
-; GFX940-NEXT:    s_not_b32 s7, s4
-; GFX940-NEXT:    s_mov_b64 s[4:5], 0
-; GFX940-NEXT:    v_max_f16_e32 v3, v0, v0
-; GFX940-NEXT:  .LBB11_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_lshrrev_b32_e32 v0, s6, v1
-; GFX940-NEXT:    v_max_f16_e32 v0, v0, v0
-; GFX940-NEXT:    v_max_f16_e32 v0, v0, v3
-; GFX940-NEXT:    v_lshlrev_b32_e32 v0, s6, v0
-; GFX940-NEXT:    v_and_or_b32 v0, v1, s7, v0
-; GFX940-NEXT:    v_mov_b64_e32 v[4:5], v[0:1]
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    buffer_atomic_cmpswap v[4:5], v2, s[0:3], 0 offen sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v4, v1
-; GFX940-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
-; GFX940-NEXT:    v_mov_b32_e32 v1, v4
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[4:5]
-; GFX940-NEXT:    s_cbranch_execnz .LBB11_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[4:5]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmax_noret_f16__offset__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    s_addk_i32 s16, 0x200
+; GFX942-NEXT:    s_and_b32 s4, s16, -4
+; GFX942-NEXT:    v_mov_b32_e32 v2, s4
+; GFX942-NEXT:    buffer_load_dword v1, v2, s[0:3], 0 offen
+; GFX942-NEXT:    s_and_b32 s4, s16, 3
+; GFX942-NEXT:    s_lshl_b32 s6, s4, 3
+; GFX942-NEXT:    s_lshl_b32 s4, 0xffff, s6
+; GFX942-NEXT:    s_not_b32 s7, s4
+; GFX942-NEXT:    s_mov_b64 s[4:5], 0
+; GFX942-NEXT:    v_max_f16_e32 v3, v0, v0
+; GFX942-NEXT:  .LBB11_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_lshrrev_b32_e32 v0, s6, v1
+; GFX942-NEXT:    v_max_f16_e32 v0, v0, v0
+; GFX942-NEXT:    v_max_f16_e32 v0, v0, v3
+; GFX942-NEXT:    v_lshlrev_b32_e32 v0, s6, v0
+; GFX942-NEXT:    v_and_or_b32 v0, v1, s7, v0
+; GFX942-NEXT:    v_mov_b64_e32 v[4:5], v[0:1]
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    buffer_atomic_cmpswap v[4:5], v2, s[0:3], 0 offen sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v4, v1
+; GFX942-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
+; GFX942-NEXT:    v_mov_b32_e32 v1, v4
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[4:5]
+; GFX942-NEXT:    s_cbranch_execnz .LBB11_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[4:5]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fmax_noret_f16__offset__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -3283,74 +3283,74 @@ define half @buffer_fat_ptr_agent_atomic_fmax_ret_f16__offset__waterfall__amdgpu
 ; GFX12-NEXT:    v_lshrrev_b32_e32 v0, v7, v4
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_f16__offset__waterfall__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_add_u32_e32 v4, 0x200, v4
-; GFX940-NEXT:    v_and_b32_e32 v9, -4, v4
-; GFX940-NEXT:    v_and_b32_e32 v4, 3, v4
-; GFX940-NEXT:    v_lshlrev_b32_e32 v8, 3, v4
-; GFX940-NEXT:    s_mov_b32 s0, 0xffff
-; GFX940-NEXT:    v_lshlrev_b32_e64 v4, v8, s0
-; GFX940-NEXT:    v_not_b32_e32 v10, v4
-; GFX940-NEXT:    s_mov_b64 s[2:3], exec
-; GFX940-NEXT:  .LBB12_1: ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    v_readfirstlane_b32 s4, v0
-; GFX940-NEXT:    v_readfirstlane_b32 s5, v1
-; GFX940-NEXT:    v_readfirstlane_b32 s6, v2
-; GFX940-NEXT:    v_readfirstlane_b32 s7, v3
-; GFX940-NEXT:    v_cmp_eq_u64_e32 vcc, s[4:5], v[0:1]
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cmp_eq_u64_e64 s[0:1], s[6:7], v[2:3]
-; GFX940-NEXT:    s_and_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    s_and_saveexec_b64 s[0:1], s[0:1]
-; GFX940-NEXT:    buffer_load_dword v7, v9, s[4:7], 0 offen
-; GFX940-NEXT:    s_xor_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB12_1
-; GFX940-NEXT:  ; %bb.2:
-; GFX940-NEXT:    s_mov_b64 exec, s[2:3]
-; GFX940-NEXT:    s_mov_b64 s[2:3], 0
-; GFX940-NEXT:    v_max_f16_e32 v11, v5, v5
-; GFX940-NEXT:  .LBB12_3: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Loop Header: Depth=1
-; GFX940-NEXT:    ; Child Loop BB12_4 Depth 2
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_lshrrev_b32_e32 v4, v8, v7
-; GFX940-NEXT:    v_max_f16_e32 v4, v4, v4
-; GFX940-NEXT:    v_max_f16_e32 v4, v4, v11
-; GFX940-NEXT:    v_lshlrev_b32_e32 v4, v8, v4
-; GFX940-NEXT:    v_and_or_b32 v6, v7, v10, v4
-; GFX940-NEXT:    s_mov_b64 s[8:9], exec
-; GFX940-NEXT:    v_mov_b64_e32 v[4:5], v[6:7]
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:  .LBB12_4: ; Parent Loop BB12_3 Depth=1
-; GFX940-NEXT:    ; => This Inner Loop Header: Depth=2
-; GFX940-NEXT:    v_readfirstlane_b32 s4, v0
-; GFX940-NEXT:    v_readfirstlane_b32 s5, v1
-; GFX940-NEXT:    v_readfirstlane_b32 s6, v2
-; GFX940-NEXT:    v_readfirstlane_b32 s7, v3
-; GFX940-NEXT:    v_cmp_eq_u64_e32 vcc, s[4:5], v[0:1]
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cmp_eq_u64_e64 s[0:1], s[6:7], v[2:3]
-; GFX940-NEXT:    s_and_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    s_and_saveexec_b64 s[0:1], s[0:1]
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_atomic_cmpswap v[4:5], v9, s[4:7], 0 offen sc0
-; GFX940-NEXT:    s_xor_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB12_4
-; GFX940-NEXT:  ; %bb.5: ; in Loop: Header=BB12_3 Depth=1
-; GFX940-NEXT:    s_mov_b64 exec, s[8:9]
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v4, v7
-; GFX940-NEXT:    s_or_b64 s[2:3], vcc, s[2:3]
-; GFX940-NEXT:    v_mov_b32_e32 v7, v4
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[2:3]
-; GFX940-NEXT:    s_cbranch_execnz .LBB12_3
-; GFX940-NEXT:  ; %bb.6: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[2:3]
-; GFX940-NEXT:    v_lshrrev_b32_e32 v0, v8, v4
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_f16__offset__waterfall__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_add_u32_e32 v4, 0x200, v4
+; GFX942-NEXT:    v_and_b32_e32 v9, -4, v4
+; GFX942-NEXT:    v_and_b32_e32 v4, 3, v4
+; GFX942-NEXT:    v_lshlrev_b32_e32 v8, 3, v4
+; GFX942-NEXT:    s_mov_b32 s0, 0xffff
+; GFX942-NEXT:    v_lshlrev_b32_e64 v4, v8, s0
+; GFX942-NEXT:    v_not_b32_e32 v10, v4
+; GFX942-NEXT:    s_mov_b64 s[2:3], exec
+; GFX942-NEXT:  .LBB12_1: ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    v_readfirstlane_b32 s4, v0
+; GFX942-NEXT:    v_readfirstlane_b32 s5, v1
+; GFX942-NEXT:    v_readfirstlane_b32 s6, v2
+; GFX942-NEXT:    v_readfirstlane_b32 s7, v3
+; GFX942-NEXT:    v_cmp_eq_u64_e32 vcc, s[4:5], v[0:1]
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cmp_eq_u64_e64 s[0:1], s[6:7], v[2:3]
+; GFX942-NEXT:    s_and_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    s_and_saveexec_b64 s[0:1], s[0:1]
+; GFX942-NEXT:    buffer_load_dword v7, v9, s[4:7], 0 offen
+; GFX942-NEXT:    s_xor_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB12_1
+; GFX942-NEXT:  ; %bb.2:
+; GFX942-NEXT:    s_mov_b64 exec, s[2:3]
+; GFX942-NEXT:    s_mov_b64 s[2:3], 0
+; GFX942-NEXT:    v_max_f16_e32 v11, v5, v5
+; GFX942-NEXT:  .LBB12_3: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Loop Header: Depth=1
+; GFX942-NEXT:    ; Child Loop BB12_4 Depth 2
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_lshrrev_b32_e32 v4, v8, v7
+; GFX942-NEXT:    v_max_f16_e32 v4, v4, v4
+; GFX942-NEXT:    v_max_f16_e32 v4, v4, v11
+; GFX942-NEXT:    v_lshlrev_b32_e32 v4, v8, v4
+; GFX942-NEXT:    v_and_or_b32 v6, v7, v10, v4
+; GFX942-NEXT:    s_mov_b64 s[8:9], exec
+; GFX942-NEXT:    v_mov_b64_e32 v[4:5], v[6:7]
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:  .LBB12_4: ; Parent Loop BB12_3 Depth=1
+; GFX942-NEXT:    ; => This Inner Loop Header: Depth=2
+; GFX942-NEXT:    v_readfirstlane_b32 s4, v0
+; GFX942-NEXT:    v_readfirstlane_b32 s5, v1
+; GFX942-NEXT:    v_readfirstlane_b32 s6, v2
+; GFX942-NEXT:    v_readfirstlane_b32 s7, v3
+; GFX942-NEXT:    v_cmp_eq_u64_e32 vcc, s[4:5], v[0:1]
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cmp_eq_u64_e64 s[0:1], s[6:7], v[2:3]
+; GFX942-NEXT:    s_and_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    s_and_saveexec_b64 s[0:1], s[0:1]
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_atomic_cmpswap v[4:5], v9, s[4:7], 0 offen sc0
+; GFX942-NEXT:    s_xor_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB12_4
+; GFX942-NEXT:  ; %bb.5: ; in Loop: Header=BB12_3 Depth=1
+; GFX942-NEXT:    s_mov_b64 exec, s[8:9]
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v4, v7
+; GFX942-NEXT:    s_or_b64 s[2:3], vcc, s[2:3]
+; GFX942-NEXT:    v_mov_b32_e32 v7, v4
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[2:3]
+; GFX942-NEXT:    s_cbranch_execnz .LBB12_3
+; GFX942-NEXT:  ; %bb.6: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[2:3]
+; GFX942-NEXT:    v_lshrrev_b32_e32 v0, v8, v4
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_f16__offset__waterfall__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -3915,47 +3915,47 @@ define bfloat @buffer_fat_ptr_agent_atomic_fmax_ret_bf16__offset__amdgpu_no_fine
 ; GFX12-NEXT:    v_lshrrev_b32_e32 v0, s4, v2
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_bf16__offset__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    s_addk_i32 s16, 0x200
-; GFX940-NEXT:    s_and_b32 s4, s16, -4
-; GFX940-NEXT:    v_mov_b32_e32 v4, s4
-; GFX940-NEXT:    buffer_load_dword v1, v4, s[0:3], 0 offen
-; GFX940-NEXT:    s_and_b32 s4, s16, 3
-; GFX940-NEXT:    s_lshl_b32 s6, s4, 3
-; GFX940-NEXT:    s_lshl_b32 s4, 0xffff, s6
-; GFX940-NEXT:    s_not_b32 s7, s4
-; GFX940-NEXT:    s_mov_b64 s[4:5], 0
-; GFX940-NEXT:    v_lshlrev_b32_e32 v5, 16, v0
-; GFX940-NEXT:    s_movk_i32 s8, 0x7fff
-; GFX940-NEXT:  .LBB13_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_lshrrev_b32_sdwa v0, s6, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    v_max_f32_e32 v0, v0, v5
-; GFX940-NEXT:    v_bfe_u32 v2, v0, 16, 1
-; GFX940-NEXT:    v_or_b32_e32 v3, 0x400000, v0
-; GFX940-NEXT:    v_add3_u32 v2, v2, v0, s8
-; GFX940-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v2, v3, vcc
-; GFX940-NEXT:    v_lshlrev_b32_sdwa v0, s6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX940-NEXT:    v_and_or_b32 v0, v1, s7, v0
-; GFX940-NEXT:    v_mov_b64_e32 v[2:3], v[0:1]
-; GFX940-NEXT:    buffer_atomic_cmpswap v[2:3], v4, s[0:3], 0 offen sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v1
-; GFX940-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
-; GFX940-NEXT:    v_mov_b32_e32 v1, v2
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[4:5]
-; GFX940-NEXT:    s_cbranch_execnz .LBB13_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[4:5]
-; GFX940-NEXT:    v_lshrrev_b32_e32 v0, s6, v2
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_bf16__offset__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    s_addk_i32 s16, 0x200
+; GFX942-NEXT:    s_and_b32 s4, s16, -4
+; GFX942-NEXT:    v_mov_b32_e32 v4, s4
+; GFX942-NEXT:    buffer_load_dword v1, v4, s[0:3], 0 offen
+; GFX942-NEXT:    s_and_b32 s4, s16, 3
+; GFX942-NEXT:    s_lshl_b32 s6, s4, 3
+; GFX942-NEXT:    s_lshl_b32 s4, 0xffff, s6
+; GFX942-NEXT:    s_not_b32 s7, s4
+; GFX942-NEXT:    s_mov_b64 s[4:5], 0
+; GFX942-NEXT:    v_lshlrev_b32_e32 v5, 16, v0
+; GFX942-NEXT:    s_movk_i32 s8, 0x7fff
+; GFX942-NEXT:  .LBB13_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_lshrrev_b32_sdwa v0, s6, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    v_max_f32_e32 v0, v0, v5
+; GFX942-NEXT:    v_bfe_u32 v2, v0, 16, 1
+; GFX942-NEXT:    v_or_b32_e32 v3, 0x400000, v0
+; GFX942-NEXT:    v_add3_u32 v2, v2, v0, s8
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v2, v3, vcc
+; GFX942-NEXT:    v_lshlrev_b32_sdwa v0, s6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX942-NEXT:    v_and_or_b32 v0, v1, s7, v0
+; GFX942-NEXT:    v_mov_b64_e32 v[2:3], v[0:1]
+; GFX942-NEXT:    buffer_atomic_cmpswap v[2:3], v4, s[0:3], 0 offen sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v1
+; GFX942-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
+; GFX942-NEXT:    v_mov_b32_e32 v1, v2
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[4:5]
+; GFX942-NEXT:    s_cbranch_execnz .LBB13_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[4:5]
+; GFX942-NEXT:    v_lshrrev_b32_e32 v0, s6, v2
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_bf16__offset__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -4321,46 +4321,46 @@ define void @buffer_fat_ptr_agent_atomic_fmax_noret_bf16__offset__amdgpu_no_fine
 ; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s5
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fmax_noret_bf16__offset__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    s_addk_i32 s16, 0x200
-; GFX940-NEXT:    s_and_b32 s4, s16, -4
-; GFX940-NEXT:    v_mov_b32_e32 v2, s4
-; GFX940-NEXT:    buffer_load_dword v1, v2, s[0:3], 0 offen
-; GFX940-NEXT:    s_and_b32 s4, s16, 3
-; GFX940-NEXT:    s_lshl_b32 s6, s4, 3
-; GFX940-NEXT:    s_lshl_b32 s4, 0xffff, s6
-; GFX940-NEXT:    s_not_b32 s7, s4
-; GFX940-NEXT:    s_mov_b64 s[4:5], 0
-; GFX940-NEXT:    v_lshlrev_b32_e32 v3, 16, v0
-; GFX940-NEXT:    s_movk_i32 s8, 0x7fff
-; GFX940-NEXT:  .LBB14_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_lshrrev_b32_sdwa v0, s6, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    v_max_f32_e32 v0, v0, v3
-; GFX940-NEXT:    v_bfe_u32 v4, v0, 16, 1
-; GFX940-NEXT:    v_or_b32_e32 v5, 0x400000, v0
-; GFX940-NEXT:    v_add3_u32 v4, v4, v0, s8
-; GFX940-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v4, v5, vcc
-; GFX940-NEXT:    v_lshlrev_b32_sdwa v0, s6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX940-NEXT:    v_and_or_b32 v0, v1, s7, v0
-; GFX940-NEXT:    v_mov_b64_e32 v[4:5], v[0:1]
-; GFX940-NEXT:    buffer_atomic_cmpswap v[4:5], v2, s[0:3], 0 offen sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v4, v1
-; GFX940-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
-; GFX940-NEXT:    v_mov_b32_e32 v1, v4
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[4:5]
-; GFX940-NEXT:    s_cbranch_execnz .LBB14_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[4:5]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmax_noret_bf16__offset__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    s_addk_i32 s16, 0x200
+; GFX942-NEXT:    s_and_b32 s4, s16, -4
+; GFX942-NEXT:    v_mov_b32_e32 v2, s4
+; GFX942-NEXT:    buffer_load_dword v1, v2, s[0:3], 0 offen
+; GFX942-NEXT:    s_and_b32 s4, s16, 3
+; GFX942-NEXT:    s_lshl_b32 s6, s4, 3
+; GFX942-NEXT:    s_lshl_b32 s4, 0xffff, s6
+; GFX942-NEXT:    s_not_b32 s7, s4
+; GFX942-NEXT:    s_mov_b64 s[4:5], 0
+; GFX942-NEXT:    v_lshlrev_b32_e32 v3, 16, v0
+; GFX942-NEXT:    s_movk_i32 s8, 0x7fff
+; GFX942-NEXT:  .LBB14_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_lshrrev_b32_sdwa v0, s6, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    v_max_f32_e32 v0, v0, v3
+; GFX942-NEXT:    v_bfe_u32 v4, v0, 16, 1
+; GFX942-NEXT:    v_or_b32_e32 v5, 0x400000, v0
+; GFX942-NEXT:    v_add3_u32 v4, v4, v0, s8
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v5, vcc
+; GFX942-NEXT:    v_lshlrev_b32_sdwa v0, s6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX942-NEXT:    v_and_or_b32 v0, v1, s7, v0
+; GFX942-NEXT:    v_mov_b64_e32 v[4:5], v[0:1]
+; GFX942-NEXT:    buffer_atomic_cmpswap v[4:5], v2, s[0:3], 0 offen sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v4, v1
+; GFX942-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
+; GFX942-NEXT:    v_mov_b32_e32 v1, v4
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[4:5]
+; GFX942-NEXT:    s_cbranch_execnz .LBB14_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[4:5]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fmax_noret_bf16__offset__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -4755,80 +4755,80 @@ define bfloat @buffer_fat_ptr_agent_atomic_fmax_ret_bf16__offset__waterfall__amd
 ; GFX12-NEXT:    v_lshrrev_b32_e32 v0, v7, v4
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_bf16__offset__waterfall__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_add_u32_e32 v4, 0x200, v4
-; GFX940-NEXT:    v_and_b32_e32 v9, -4, v4
-; GFX940-NEXT:    v_and_b32_e32 v4, 3, v4
-; GFX940-NEXT:    v_lshlrev_b32_e32 v8, 3, v4
-; GFX940-NEXT:    s_mov_b32 s0, 0xffff
-; GFX940-NEXT:    v_lshlrev_b32_e64 v4, v8, s0
-; GFX940-NEXT:    v_not_b32_e32 v10, v4
-; GFX940-NEXT:    s_mov_b64 s[2:3], exec
-; GFX940-NEXT:  .LBB15_1: ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    v_readfirstlane_b32 s4, v0
-; GFX940-NEXT:    v_readfirstlane_b32 s5, v1
-; GFX940-NEXT:    v_readfirstlane_b32 s6, v2
-; GFX940-NEXT:    v_readfirstlane_b32 s7, v3
-; GFX940-NEXT:    v_cmp_eq_u64_e32 vcc, s[4:5], v[0:1]
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cmp_eq_u64_e64 s[0:1], s[6:7], v[2:3]
-; GFX940-NEXT:    s_and_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    s_and_saveexec_b64 s[0:1], s[0:1]
-; GFX940-NEXT:    buffer_load_dword v7, v9, s[4:7], 0 offen
-; GFX940-NEXT:    s_xor_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB15_1
-; GFX940-NEXT:  ; %bb.2:
-; GFX940-NEXT:    s_mov_b64 exec, s[2:3]
-; GFX940-NEXT:    s_mov_b64 s[2:3], 0
-; GFX940-NEXT:    v_lshlrev_b32_e32 v11, 16, v5
-; GFX940-NEXT:    s_movk_i32 s10, 0x7fff
-; GFX940-NEXT:  .LBB15_3: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Loop Header: Depth=1
-; GFX940-NEXT:    ; Child Loop BB15_4 Depth 2
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_lshrrev_b32_sdwa v4, v8, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX940-NEXT:    s_mov_b64 s[8:9], exec
-; GFX940-NEXT:    v_max_f32_e32 v4, v4, v11
-; GFX940-NEXT:    v_bfe_u32 v5, v4, 16, 1
-; GFX940-NEXT:    v_add3_u32 v5, v5, v4, s10
-; GFX940-NEXT:    v_or_b32_e32 v6, 0x400000, v4
-; GFX940-NEXT:    v_cmp_u_f32_e32 vcc, v4, v4
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cndmask_b32_e32 v4, v5, v6, vcc
-; GFX940-NEXT:    v_lshlrev_b32_sdwa v4, v8, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX940-NEXT:    v_and_or_b32 v6, v7, v10, v4
-; GFX940-NEXT:    v_mov_b64_e32 v[4:5], v[6:7]
-; GFX940-NEXT:  .LBB15_4: ; Parent Loop BB15_3 Depth=1
-; GFX940-NEXT:    ; => This Inner Loop Header: Depth=2
-; GFX940-NEXT:    v_readfirstlane_b32 s4, v0
-; GFX940-NEXT:    v_readfirstlane_b32 s5, v1
-; GFX940-NEXT:    v_readfirstlane_b32 s6, v2
-; GFX940-NEXT:    v_readfirstlane_b32 s7, v3
-; GFX940-NEXT:    v_cmp_eq_u64_e32 vcc, s[4:5], v[0:1]
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cmp_eq_u64_e64 s[0:1], s[6:7], v[2:3]
-; GFX940-NEXT:    s_and_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    s_and_saveexec_b64 s[0:1], s[0:1]
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_atomic_cmpswap v[4:5], v9, s[4:7], 0 offen sc0
-; GFX940-NEXT:    s_xor_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB15_4
-; GFX940-NEXT:  ; %bb.5: ; in Loop: Header=BB15_3 Depth=1
-; GFX940-NEXT:    s_mov_b64 exec, s[8:9]
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v4, v7
-; GFX940-NEXT:    s_or_b64 s[2:3], vcc, s[2:3]
-; GFX940-NEXT:    v_mov_b32_e32 v7, v4
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[2:3]
-; GFX940-NEXT:    s_cbranch_execnz .LBB15_3
-; GFX940-NEXT:  ; %bb.6: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[2:3]
-; GFX940-NEXT:    v_lshrrev_b32_e32 v0, v8, v4
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_bf16__offset__waterfall__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_add_u32_e32 v4, 0x200, v4
+; GFX942-NEXT:    v_and_b32_e32 v9, -4, v4
+; GFX942-NEXT:    v_and_b32_e32 v4, 3, v4
+; GFX942-NEXT:    v_lshlrev_b32_e32 v8, 3, v4
+; GFX942-NEXT:    s_mov_b32 s0, 0xffff
+; GFX942-NEXT:    v_lshlrev_b32_e64 v4, v8, s0
+; GFX942-NEXT:    v_not_b32_e32 v10, v4
+; GFX942-NEXT:    s_mov_b64 s[2:3], exec
+; GFX942-NEXT:  .LBB15_1: ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    v_readfirstlane_b32 s4, v0
+; GFX942-NEXT:    v_readfirstlane_b32 s5, v1
+; GFX942-NEXT:    v_readfirstlane_b32 s6, v2
+; GFX942-NEXT:    v_readfirstlane_b32 s7, v3
+; GFX942-NEXT:    v_cmp_eq_u64_e32 vcc, s[4:5], v[0:1]
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cmp_eq_u64_e64 s[0:1], s[6:7], v[2:3]
+; GFX942-NEXT:    s_and_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    s_and_saveexec_b64 s[0:1], s[0:1]
+; GFX942-NEXT:    buffer_load_dword v7, v9, s[4:7], 0 offen
+; GFX942-NEXT:    s_xor_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB15_1
+; GFX942-NEXT:  ; %bb.2:
+; GFX942-NEXT:    s_mov_b64 exec, s[2:3]
+; GFX942-NEXT:    s_mov_b64 s[2:3], 0
+; GFX942-NEXT:    v_lshlrev_b32_e32 v11, 16, v5
+; GFX942-NEXT:    s_movk_i32 s10, 0x7fff
+; GFX942-NEXT:  .LBB15_3: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Loop Header: Depth=1
+; GFX942-NEXT:    ; Child Loop BB15_4 Depth 2
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_lshrrev_b32_sdwa v4, v8, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX942-NEXT:    s_mov_b64 s[8:9], exec
+; GFX942-NEXT:    v_max_f32_e32 v4, v4, v11
+; GFX942-NEXT:    v_bfe_u32 v5, v4, 16, 1
+; GFX942-NEXT:    v_add3_u32 v5, v5, v4, s10
+; GFX942-NEXT:    v_or_b32_e32 v6, 0x400000, v4
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v4, v4
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v4, v5, v6, vcc
+; GFX942-NEXT:    v_lshlrev_b32_sdwa v4, v8, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX942-NEXT:    v_and_or_b32 v6, v7, v10, v4
+; GFX942-NEXT:    v_mov_b64_e32 v[4:5], v[6:7]
+; GFX942-NEXT:  .LBB15_4: ; Parent Loop BB15_3 Depth=1
+; GFX942-NEXT:    ; => This Inner Loop Header: Depth=2
+; GFX942-NEXT:    v_readfirstlane_b32 s4, v0
+; GFX942-NEXT:    v_readfirstlane_b32 s5, v1
+; GFX942-NEXT:    v_readfirstlane_b32 s6, v2
+; GFX942-NEXT:    v_readfirstlane_b32 s7, v3
+; GFX942-NEXT:    v_cmp_eq_u64_e32 vcc, s[4:5], v[0:1]
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cmp_eq_u64_e64 s[0:1], s[6:7], v[2:3]
+; GFX942-NEXT:    s_and_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    s_and_saveexec_b64 s[0:1], s[0:1]
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_atomic_cmpswap v[4:5], v9, s[4:7], 0 offen sc0
+; GFX942-NEXT:    s_xor_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB15_4
+; GFX942-NEXT:  ; %bb.5: ; in Loop: Header=BB15_3 Depth=1
+; GFX942-NEXT:    s_mov_b64 exec, s[8:9]
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v4, v7
+; GFX942-NEXT:    s_or_b64 s[2:3], vcc, s[2:3]
+; GFX942-NEXT:    v_mov_b32_e32 v7, v4
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[2:3]
+; GFX942-NEXT:    s_cbranch_execnz .LBB15_3
+; GFX942-NEXT:  ; %bb.6: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[2:3]
+; GFX942-NEXT:    v_lshrrev_b32_e32 v0, v8, v4
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_bf16__offset__waterfall__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -5402,35 +5402,35 @@ define <2 x half> @buffer_fat_ptr_agent_atomic_fmax_ret_v2f16__offset__amdgpu_no
 ; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s4
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_v2f16__offset__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v1, v0
-; GFX940-NEXT:    v_mov_b32_e32 v0, s16
-; GFX940-NEXT:    buffer_load_dword v0, v0, s[0:3], 0 offen offset:1024
-; GFX940-NEXT:    s_add_i32 s6, s16, 0x400
-; GFX940-NEXT:    s_mov_b64 s[4:5], 0
-; GFX940-NEXT:    v_pk_max_f16 v2, v1, v1
-; GFX940-NEXT:    v_mov_b32_e32 v3, s6
-; GFX940-NEXT:  .LBB16_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v5, v0
-; GFX940-NEXT:    v_pk_max_f16 v0, v5, v5
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    v_pk_max_f16 v4, v0, v2
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_mov_b64_e32 v[0:1], v[4:5]
-; GFX940-NEXT:    buffer_atomic_cmpswap v[0:1], v3, s[0:3], 0 offen sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v0, v5
-; GFX940-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[4:5]
-; GFX940-NEXT:    s_cbranch_execnz .LBB16_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[4:5]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_v2f16__offset__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v1, v0
+; GFX942-NEXT:    v_mov_b32_e32 v0, s16
+; GFX942-NEXT:    buffer_load_dword v0, v0, s[0:3], 0 offen offset:1024
+; GFX942-NEXT:    s_add_i32 s6, s16, 0x400
+; GFX942-NEXT:    s_mov_b64 s[4:5], 0
+; GFX942-NEXT:    v_pk_max_f16 v2, v1, v1
+; GFX942-NEXT:    v_mov_b32_e32 v3, s6
+; GFX942-NEXT:  .LBB16_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v5, v0
+; GFX942-NEXT:    v_pk_max_f16 v0, v5, v5
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    v_pk_max_f16 v4, v0, v2
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_mov_b64_e32 v[0:1], v[4:5]
+; GFX942-NEXT:    buffer_atomic_cmpswap v[0:1], v3, s[0:3], 0 offen sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v0, v5
+; GFX942-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[4:5]
+; GFX942-NEXT:    s_cbranch_execnz .LBB16_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[4:5]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_v2f16__offset__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -5720,34 +5720,34 @@ define void @buffer_fat_ptr_agent_atomic_fmax_noret_v2f16__offset__amdgpu_no_fin
 ; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s4
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fmax_noret_v2f16__offset__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v1, s16
-; GFX940-NEXT:    buffer_load_dword v1, v1, s[0:3], 0 offen offset:1024
-; GFX940-NEXT:    s_add_i32 s6, s16, 0x400
-; GFX940-NEXT:    s_mov_b64 s[4:5], 0
-; GFX940-NEXT:    v_pk_max_f16 v2, v0, v0
-; GFX940-NEXT:    v_mov_b32_e32 v3, s6
-; GFX940-NEXT:  .LBB17_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_pk_max_f16 v0, v1, v1
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    v_pk_max_f16 v0, v0, v2
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_mov_b64_e32 v[4:5], v[0:1]
-; GFX940-NEXT:    buffer_atomic_cmpswap v[4:5], v3, s[0:3], 0 offen sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v4, v1
-; GFX940-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
-; GFX940-NEXT:    v_mov_b32_e32 v1, v4
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[4:5]
-; GFX940-NEXT:    s_cbranch_execnz .LBB17_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[4:5]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmax_noret_v2f16__offset__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v1, s16
+; GFX942-NEXT:    buffer_load_dword v1, v1, s[0:3], 0 offen offset:1024
+; GFX942-NEXT:    s_add_i32 s6, s16, 0x400
+; GFX942-NEXT:    s_mov_b64 s[4:5], 0
+; GFX942-NEXT:    v_pk_max_f16 v2, v0, v0
+; GFX942-NEXT:    v_mov_b32_e32 v3, s6
+; GFX942-NEXT:  .LBB17_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_pk_max_f16 v0, v1, v1
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    v_pk_max_f16 v0, v0, v2
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_mov_b64_e32 v[4:5], v[0:1]
+; GFX942-NEXT:    buffer_atomic_cmpswap v[4:5], v3, s[0:3], 0 offen sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v4, v1
+; GFX942-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
+; GFX942-NEXT:    v_mov_b32_e32 v1, v4
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[4:5]
+; GFX942-NEXT:    s_cbranch_execnz .LBB17_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[4:5]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fmax_noret_v2f16__offset__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -6071,66 +6071,66 @@ define <2 x half> @buffer_fat_ptr_agent_atomic_fmax_ret_v2f16__offset__waterfall
 ; GFX12-NEXT:    v_mov_b32_e32 v0, v4
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_v2f16__offset__waterfall__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_add_u32_e32 v8, 0x400, v4
-; GFX940-NEXT:    s_mov_b64 s[2:3], exec
-; GFX940-NEXT:  .LBB18_1: ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    v_readfirstlane_b32 s4, v0
-; GFX940-NEXT:    v_readfirstlane_b32 s5, v1
-; GFX940-NEXT:    v_readfirstlane_b32 s6, v2
-; GFX940-NEXT:    v_readfirstlane_b32 s7, v3
-; GFX940-NEXT:    v_cmp_eq_u64_e32 vcc, s[4:5], v[0:1]
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cmp_eq_u64_e64 s[0:1], s[6:7], v[2:3]
-; GFX940-NEXT:    s_and_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    s_and_saveexec_b64 s[0:1], s[0:1]
-; GFX940-NEXT:    buffer_load_dword v7, v4, s[4:7], 0 offen offset:1024
-; GFX940-NEXT:    ; implicit-def: $vgpr4
-; GFX940-NEXT:    s_xor_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB18_1
-; GFX940-NEXT:  ; %bb.2:
-; GFX940-NEXT:    s_mov_b64 exec, s[2:3]
-; GFX940-NEXT:    s_mov_b64 s[2:3], 0
-; GFX940-NEXT:    v_pk_max_f16 v9, v5, v5
-; GFX940-NEXT:  .LBB18_3: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Loop Header: Depth=1
-; GFX940-NEXT:    ; Child Loop BB18_4 Depth 2
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_pk_max_f16 v4, v7, v7
-; GFX940-NEXT:    s_mov_b64 s[8:9], exec
-; GFX940-NEXT:    v_pk_max_f16 v6, v4, v9
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    v_mov_b64_e32 v[4:5], v[6:7]
-; GFX940-NEXT:  .LBB18_4: ; Parent Loop BB18_3 Depth=1
-; GFX940-NEXT:    ; => This Inner Loop Header: Depth=2
-; GFX940-NEXT:    v_readfirstlane_b32 s4, v0
-; GFX940-NEXT:    v_readfirstlane_b32 s5, v1
-; GFX940-NEXT:    v_readfirstlane_b32 s6, v2
-; GFX940-NEXT:    v_readfirstlane_b32 s7, v3
-; GFX940-NEXT:    v_cmp_eq_u64_e32 vcc, s[4:5], v[0:1]
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cmp_eq_u64_e64 s[0:1], s[6:7], v[2:3]
-; GFX940-NEXT:    s_and_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    s_and_saveexec_b64 s[0:1], s[0:1]
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_atomic_cmpswap v[4:5], v8, s[4:7], 0 offen sc0
-; GFX940-NEXT:    s_xor_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB18_4
-; GFX940-NEXT:  ; %bb.5: ; in Loop: Header=BB18_3 Depth=1
-; GFX940-NEXT:    s_mov_b64 exec, s[8:9]
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v4, v7
-; GFX940-NEXT:    s_or_b64 s[2:3], vcc, s[2:3]
-; GFX940-NEXT:    v_mov_b32_e32 v7, v4
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[2:3]
-; GFX940-NEXT:    s_cbranch_execnz .LBB18_3
-; GFX940-NEXT:  ; %bb.6: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[2:3]
-; GFX940-NEXT:    v_mov_b32_e32 v0, v4
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_v2f16__offset__waterfall__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_add_u32_e32 v8, 0x400, v4
+; GFX942-NEXT:    s_mov_b64 s[2:3], exec
+; GFX942-NEXT:  .LBB18_1: ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    v_readfirstlane_b32 s4, v0
+; GFX942-NEXT:    v_readfirstlane_b32 s5, v1
+; GFX942-NEXT:    v_readfirstlane_b32 s6, v2
+; GFX942-NEXT:    v_readfirstlane_b32 s7, v3
+; GFX942-NEXT:    v_cmp_eq_u64_e32 vcc, s[4:5], v[0:1]
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cmp_eq_u64_e64 s[0:1], s[6:7], v[2:3]
+; GFX942-NEXT:    s_and_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    s_and_saveexec_b64 s[0:1], s[0:1]
+; GFX942-NEXT:    buffer_load_dword v7, v4, s[4:7], 0 offen offset:1024
+; GFX942-NEXT:    ; implicit-def: $vgpr4
+; GFX942-NEXT:    s_xor_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB18_1
+; GFX942-NEXT:  ; %bb.2:
+; GFX942-NEXT:    s_mov_b64 exec, s[2:3]
+; GFX942-NEXT:    s_mov_b64 s[2:3], 0
+; GFX942-NEXT:    v_pk_max_f16 v9, v5, v5
+; GFX942-NEXT:  .LBB18_3: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Loop Header: Depth=1
+; GFX942-NEXT:    ; Child Loop BB18_4 Depth 2
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_pk_max_f16 v4, v7, v7
+; GFX942-NEXT:    s_mov_b64 s[8:9], exec
+; GFX942-NEXT:    v_pk_max_f16 v6, v4, v9
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    v_mov_b64_e32 v[4:5], v[6:7]
+; GFX942-NEXT:  .LBB18_4: ; Parent Loop BB18_3 Depth=1
+; GFX942-NEXT:    ; => This Inner Loop Header: Depth=2
+; GFX942-NEXT:    v_readfirstlane_b32 s4, v0
+; GFX942-NEXT:    v_readfirstlane_b32 s5, v1
+; GFX942-NEXT:    v_readfirstlane_b32 s6, v2
+; GFX942-NEXT:    v_readfirstlane_b32 s7, v3
+; GFX942-NEXT:    v_cmp_eq_u64_e32 vcc, s[4:5], v[0:1]
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cmp_eq_u64_e64 s[0:1], s[6:7], v[2:3]
+; GFX942-NEXT:    s_and_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    s_and_saveexec_b64 s[0:1], s[0:1]
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_atomic_cmpswap v[4:5], v8, s[4:7], 0 offen sc0
+; GFX942-NEXT:    s_xor_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB18_4
+; GFX942-NEXT:  ; %bb.5: ; in Loop: Header=BB18_3 Depth=1
+; GFX942-NEXT:    s_mov_b64 exec, s[8:9]
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v4, v7
+; GFX942-NEXT:    s_or_b64 s[2:3], vcc, s[2:3]
+; GFX942-NEXT:    v_mov_b32_e32 v7, v4
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[2:3]
+; GFX942-NEXT:    s_cbranch_execnz .LBB18_3
+; GFX942-NEXT:  ; %bb.6: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[2:3]
+; GFX942-NEXT:    v_mov_b32_e32 v0, v4
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_v2f16__offset__waterfall__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -6669,50 +6669,50 @@ define <2 x bfloat> @buffer_fat_ptr_agent_atomic_fmax_ret_v2bf16__offset__amdgpu
 ; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s5
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_v2bf16__offset__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v1, v0
-; GFX940-NEXT:    v_mov_b32_e32 v0, s16
-; GFX940-NEXT:    buffer_load_dword v0, v0, s[0:3], 0 offen offset:1024
-; GFX940-NEXT:    s_add_i32 s4, s16, 0x400
-; GFX940-NEXT:    s_mov_b64 s[6:7], 0
-; GFX940-NEXT:    v_lshlrev_b32_e32 v2, 16, v1
-; GFX940-NEXT:    s_movk_i32 s8, 0x7fff
-; GFX940-NEXT:    v_and_b32_e32 v3, 0xffff0000, v1
-; GFX940-NEXT:    s_mov_b32 s9, 0x7060302
-; GFX940-NEXT:    v_mov_b32_e32 v4, s4
-; GFX940-NEXT:  .LBB19_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v7, v0
-; GFX940-NEXT:    v_lshlrev_b32_e32 v0, 16, v7
-; GFX940-NEXT:    v_and_b32_e32 v1, 0xffff0000, v7
-; GFX940-NEXT:    v_max_f32_e32 v0, v0, v2
-; GFX940-NEXT:    v_max_f32_e32 v1, v1, v3
-; GFX940-NEXT:    v_bfe_u32 v5, v0, 16, 1
-; GFX940-NEXT:    v_bfe_u32 v8, v1, 16, 1
-; GFX940-NEXT:    v_or_b32_e32 v6, 0x400000, v0
-; GFX940-NEXT:    v_or_b32_e32 v9, 0x400000, v1
-; GFX940-NEXT:    v_add3_u32 v5, v5, v0, s8
-; GFX940-NEXT:    v_add3_u32 v8, v8, v1, s8
-; GFX940-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
-; GFX940-NEXT:    v_cmp_u_f32_e64 s[4:5], v0, v0
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    v_cndmask_b32_e32 v1, v8, v9, vcc
-; GFX940-NEXT:    v_cndmask_b32_e64 v0, v5, v6, s[4:5]
-; GFX940-NEXT:    v_perm_b32 v6, v1, v0, s9
-; GFX940-NEXT:    v_mov_b64_e32 v[0:1], v[6:7]
-; GFX940-NEXT:    buffer_atomic_cmpswap v[0:1], v4, s[0:3], 0 offen sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v0, v7
-; GFX940-NEXT:    s_or_b64 s[6:7], vcc, s[6:7]
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[6:7]
-; GFX940-NEXT:    s_cbranch_execnz .LBB19_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[6:7]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_v2bf16__offset__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v1, v0
+; GFX942-NEXT:    v_mov_b32_e32 v0, s16
+; GFX942-NEXT:    buffer_load_dword v0, v0, s[0:3], 0 offen offset:1024
+; GFX942-NEXT:    s_add_i32 s4, s16, 0x400
+; GFX942-NEXT:    s_mov_b64 s[6:7], 0
+; GFX942-NEXT:    v_lshlrev_b32_e32 v2, 16, v1
+; GFX942-NEXT:    s_movk_i32 s8, 0x7fff
+; GFX942-NEXT:    v_and_b32_e32 v3, 0xffff0000, v1
+; GFX942-NEXT:    s_mov_b32 s9, 0x7060302
+; GFX942-NEXT:    v_mov_b32_e32 v4, s4
+; GFX942-NEXT:  .LBB19_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v7, v0
+; GFX942-NEXT:    v_lshlrev_b32_e32 v0, 16, v7
+; GFX942-NEXT:    v_and_b32_e32 v1, 0xffff0000, v7
+; GFX942-NEXT:    v_max_f32_e32 v0, v0, v2
+; GFX942-NEXT:    v_max_f32_e32 v1, v1, v3
+; GFX942-NEXT:    v_bfe_u32 v5, v0, 16, 1
+; GFX942-NEXT:    v_bfe_u32 v8, v1, 16, 1
+; GFX942-NEXT:    v_or_b32_e32 v6, 0x400000, v0
+; GFX942-NEXT:    v_or_b32_e32 v9, 0x400000, v1
+; GFX942-NEXT:    v_add3_u32 v5, v5, v0, s8
+; GFX942-NEXT:    v_add3_u32 v8, v8, v1, s8
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX942-NEXT:    v_cmp_u_f32_e64 s[4:5], v0, v0
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v8, v9, vcc
+; GFX942-NEXT:    v_cndmask_b32_e64 v0, v5, v6, s[4:5]
+; GFX942-NEXT:    v_perm_b32 v6, v1, v0, s9
+; GFX942-NEXT:    v_mov_b64_e32 v[0:1], v[6:7]
+; GFX942-NEXT:    buffer_atomic_cmpswap v[0:1], v4, s[0:3], 0 offen sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v0, v7
+; GFX942-NEXT:    s_or_b64 s[6:7], vcc, s[6:7]
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[6:7]
+; GFX942-NEXT:    s_cbranch_execnz .LBB19_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[6:7]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_v2bf16__offset__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -7089,49 +7089,49 @@ define void @buffer_fat_ptr_agent_atomic_fmax_noret_v2bf16__offset__amdgpu_no_fi
 ; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s5
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fmax_noret_v2bf16__offset__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v1, s16
-; GFX940-NEXT:    buffer_load_dword v1, v1, s[0:3], 0 offen offset:1024
-; GFX940-NEXT:    s_add_i32 s4, s16, 0x400
-; GFX940-NEXT:    s_mov_b64 s[6:7], 0
-; GFX940-NEXT:    v_lshlrev_b32_e32 v2, 16, v0
-; GFX940-NEXT:    s_movk_i32 s8, 0x7fff
-; GFX940-NEXT:    v_and_b32_e32 v3, 0xffff0000, v0
-; GFX940-NEXT:    s_mov_b32 s9, 0x7060302
-; GFX940-NEXT:    v_mov_b32_e32 v4, s4
-; GFX940-NEXT:  .LBB20_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_lshlrev_b32_e32 v0, 16, v1
-; GFX940-NEXT:    v_and_b32_e32 v5, 0xffff0000, v1
-; GFX940-NEXT:    v_max_f32_e32 v0, v0, v2
-; GFX940-NEXT:    v_max_f32_e32 v5, v5, v3
-; GFX940-NEXT:    v_bfe_u32 v6, v0, 16, 1
-; GFX940-NEXT:    v_bfe_u32 v8, v5, 16, 1
-; GFX940-NEXT:    v_or_b32_e32 v7, 0x400000, v0
-; GFX940-NEXT:    v_or_b32_e32 v9, 0x400000, v5
-; GFX940-NEXT:    v_add3_u32 v6, v6, v0, s8
-; GFX940-NEXT:    v_add3_u32 v8, v8, v5, s8
-; GFX940-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
-; GFX940-NEXT:    v_cmp_u_f32_e64 s[4:5], v0, v0
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    v_cndmask_b32_e32 v5, v8, v9, vcc
-; GFX940-NEXT:    v_cndmask_b32_e64 v0, v6, v7, s[4:5]
-; GFX940-NEXT:    v_perm_b32 v0, v5, v0, s9
-; GFX940-NEXT:    v_mov_b64_e32 v[6:7], v[0:1]
-; GFX940-NEXT:    buffer_atomic_cmpswap v[6:7], v4, s[0:3], 0 offen sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v6, v1
-; GFX940-NEXT:    s_or_b64 s[6:7], vcc, s[6:7]
-; GFX940-NEXT:    v_mov_b32_e32 v1, v6
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[6:7]
-; GFX940-NEXT:    s_cbranch_execnz .LBB20_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[6:7]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmax_noret_v2bf16__offset__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v1, s16
+; GFX942-NEXT:    buffer_load_dword v1, v1, s[0:3], 0 offen offset:1024
+; GFX942-NEXT:    s_add_i32 s4, s16, 0x400
+; GFX942-NEXT:    s_mov_b64 s[6:7], 0
+; GFX942-NEXT:    v_lshlrev_b32_e32 v2, 16, v0
+; GFX942-NEXT:    s_movk_i32 s8, 0x7fff
+; GFX942-NEXT:    v_and_b32_e32 v3, 0xffff0000, v0
+; GFX942-NEXT:    s_mov_b32 s9, 0x7060302
+; GFX942-NEXT:    v_mov_b32_e32 v4, s4
+; GFX942-NEXT:  .LBB20_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_lshlrev_b32_e32 v0, 16, v1
+; GFX942-NEXT:    v_and_b32_e32 v5, 0xffff0000, v1
+; GFX942-NEXT:    v_max_f32_e32 v0, v0, v2
+; GFX942-NEXT:    v_max_f32_e32 v5, v5, v3
+; GFX942-NEXT:    v_bfe_u32 v6, v0, 16, 1
+; GFX942-NEXT:    v_bfe_u32 v8, v5, 16, 1
+; GFX942-NEXT:    v_or_b32_e32 v7, 0x400000, v0
+; GFX942-NEXT:    v_or_b32_e32 v9, 0x400000, v5
+; GFX942-NEXT:    v_add3_u32 v6, v6, v0, s8
+; GFX942-NEXT:    v_add3_u32 v8, v8, v5, s8
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
+; GFX942-NEXT:    v_cmp_u_f32_e64 s[4:5], v0, v0
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    v_cndmask_b32_e32 v5, v8, v9, vcc
+; GFX942-NEXT:    v_cndmask_b32_e64 v0, v6, v7, s[4:5]
+; GFX942-NEXT:    v_perm_b32 v0, v5, v0, s9
+; GFX942-NEXT:    v_mov_b64_e32 v[6:7], v[0:1]
+; GFX942-NEXT:    buffer_atomic_cmpswap v[6:7], v4, s[0:3], 0 offen sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v6, v1
+; GFX942-NEXT:    s_or_b64 s[6:7], vcc, s[6:7]
+; GFX942-NEXT:    v_mov_b32_e32 v1, v6
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[6:7]
+; GFX942-NEXT:    s_cbranch_execnz .LBB20_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[6:7]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fmax_noret_v2bf16__offset__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -7542,83 +7542,83 @@ define <2 x bfloat> @buffer_fat_ptr_agent_atomic_fmax_ret_v2bf16__offset__waterf
 ; GFX12-NEXT:    v_mov_b32_e32 v0, v4
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_v2bf16__offset__waterfall__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_add_u32_e32 v8, 0x400, v4
-; GFX940-NEXT:    s_mov_b64 s[2:3], exec
-; GFX940-NEXT:  .LBB21_1: ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    v_readfirstlane_b32 s4, v0
-; GFX940-NEXT:    v_readfirstlane_b32 s5, v1
-; GFX940-NEXT:    v_readfirstlane_b32 s6, v2
-; GFX940-NEXT:    v_readfirstlane_b32 s7, v3
-; GFX940-NEXT:    v_cmp_eq_u64_e32 vcc, s[4:5], v[0:1]
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cmp_eq_u64_e64 s[0:1], s[6:7], v[2:3]
-; GFX940-NEXT:    s_and_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    s_and_saveexec_b64 s[0:1], s[0:1]
-; GFX940-NEXT:    buffer_load_dword v7, v4, s[4:7], 0 offen offset:1024
-; GFX940-NEXT:    ; implicit-def: $vgpr4
-; GFX940-NEXT:    s_xor_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB21_1
-; GFX940-NEXT:  ; %bb.2:
-; GFX940-NEXT:    s_mov_b64 exec, s[2:3]
-; GFX940-NEXT:    s_mov_b64 s[2:3], 0
-; GFX940-NEXT:    v_lshlrev_b32_e32 v9, 16, v5
-; GFX940-NEXT:    s_movk_i32 s10, 0x7fff
-; GFX940-NEXT:    v_and_b32_e32 v10, 0xffff0000, v5
-; GFX940-NEXT:    s_mov_b32 s11, 0x7060302
-; GFX940-NEXT:  .LBB21_3: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Loop Header: Depth=1
-; GFX940-NEXT:    ; Child Loop BB21_4 Depth 2
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_lshlrev_b32_e32 v4, 16, v7
-; GFX940-NEXT:    v_max_f32_e32 v4, v4, v9
-; GFX940-NEXT:    v_bfe_u32 v5, v4, 16, 1
-; GFX940-NEXT:    v_add3_u32 v5, v5, v4, s10
-; GFX940-NEXT:    v_or_b32_e32 v6, 0x400000, v4
-; GFX940-NEXT:    v_cmp_u_f32_e32 vcc, v4, v4
-; GFX940-NEXT:    s_mov_b64 s[8:9], exec
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    v_cndmask_b32_e32 v4, v5, v6, vcc
-; GFX940-NEXT:    v_and_b32_e32 v5, 0xffff0000, v7
-; GFX940-NEXT:    v_max_f32_e32 v5, v5, v10
-; GFX940-NEXT:    v_bfe_u32 v6, v5, 16, 1
-; GFX940-NEXT:    v_add3_u32 v6, v6, v5, s10
-; GFX940-NEXT:    v_or_b32_e32 v11, 0x400000, v5
-; GFX940-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v5, v6, v11, vcc
-; GFX940-NEXT:    v_perm_b32 v6, v5, v4, s11
-; GFX940-NEXT:    v_mov_b64_e32 v[4:5], v[6:7]
-; GFX940-NEXT:  .LBB21_4: ; Parent Loop BB21_3 Depth=1
-; GFX940-NEXT:    ; => This Inner Loop Header: Depth=2
-; GFX940-NEXT:    v_readfirstlane_b32 s4, v0
-; GFX940-NEXT:    v_readfirstlane_b32 s5, v1
-; GFX940-NEXT:    v_readfirstlane_b32 s6, v2
-; GFX940-NEXT:    v_readfirstlane_b32 s7, v3
-; GFX940-NEXT:    v_cmp_eq_u64_e32 vcc, s[4:5], v[0:1]
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cmp_eq_u64_e64 s[0:1], s[6:7], v[2:3]
-; GFX940-NEXT:    s_and_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    s_and_saveexec_b64 s[0:1], s[0:1]
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_atomic_cmpswap v[4:5], v8, s[4:7], 0 offen sc0
-; GFX940-NEXT:    s_xor_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB21_4
-; GFX940-NEXT:  ; %bb.5: ; in Loop: Header=BB21_3 Depth=1
-; GFX940-NEXT:    s_mov_b64 exec, s[8:9]
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v4, v7
-; GFX940-NEXT:    s_or_b64 s[2:3], vcc, s[2:3]
-; GFX940-NEXT:    v_mov_b32_e32 v7, v4
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[2:3]
-; GFX940-NEXT:    s_cbranch_execnz .LBB21_3
-; GFX940-NEXT:  ; %bb.6: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[2:3]
-; GFX940-NEXT:    v_mov_b32_e32 v0, v4
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_v2bf16__offset__waterfall__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_add_u32_e32 v8, 0x400, v4
+; GFX942-NEXT:    s_mov_b64 s[2:3], exec
+; GFX942-NEXT:  .LBB21_1: ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    v_readfirstlane_b32 s4, v0
+; GFX942-NEXT:    v_readfirstlane_b32 s5, v1
+; GFX942-NEXT:    v_readfirstlane_b32 s6, v2
+; GFX942-NEXT:    v_readfirstlane_b32 s7, v3
+; GFX942-NEXT:    v_cmp_eq_u64_e32 vcc, s[4:5], v[0:1]
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cmp_eq_u64_e64 s[0:1], s[6:7], v[2:3]
+; GFX942-NEXT:    s_and_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    s_and_saveexec_b64 s[0:1], s[0:1]
+; GFX942-NEXT:    buffer_load_dword v7, v4, s[4:7], 0 offen offset:1024
+; GFX942-NEXT:    ; implicit-def: $vgpr4
+; GFX942-NEXT:    s_xor_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB21_1
+; GFX942-NEXT:  ; %bb.2:
+; GFX942-NEXT:    s_mov_b64 exec, s[2:3]
+; GFX942-NEXT:    s_mov_b64 s[2:3], 0
+; GFX942-NEXT:    v_lshlrev_b32_e32 v9, 16, v5
+; GFX942-NEXT:    s_movk_i32 s10, 0x7fff
+; GFX942-NEXT:    v_and_b32_e32 v10, 0xffff0000, v5
+; GFX942-NEXT:    s_mov_b32 s11, 0x7060302
+; GFX942-NEXT:  .LBB21_3: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Loop Header: Depth=1
+; GFX942-NEXT:    ; Child Loop BB21_4 Depth 2
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_lshlrev_b32_e32 v4, 16, v7
+; GFX942-NEXT:    v_max_f32_e32 v4, v4, v9
+; GFX942-NEXT:    v_bfe_u32 v5, v4, 16, 1
+; GFX942-NEXT:    v_add3_u32 v5, v5, v4, s10
+; GFX942-NEXT:    v_or_b32_e32 v6, 0x400000, v4
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v4, v4
+; GFX942-NEXT:    s_mov_b64 s[8:9], exec
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    v_cndmask_b32_e32 v4, v5, v6, vcc
+; GFX942-NEXT:    v_and_b32_e32 v5, 0xffff0000, v7
+; GFX942-NEXT:    v_max_f32_e32 v5, v5, v10
+; GFX942-NEXT:    v_bfe_u32 v6, v5, 16, 1
+; GFX942-NEXT:    v_add3_u32 v6, v6, v5, s10
+; GFX942-NEXT:    v_or_b32_e32 v11, 0x400000, v5
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v5, v6, v11, vcc
+; GFX942-NEXT:    v_perm_b32 v6, v5, v4, s11
+; GFX942-NEXT:    v_mov_b64_e32 v[4:5], v[6:7]
+; GFX942-NEXT:  .LBB21_4: ; Parent Loop BB21_3 Depth=1
+; GFX942-NEXT:    ; => This Inner Loop Header: Depth=2
+; GFX942-NEXT:    v_readfirstlane_b32 s4, v0
+; GFX942-NEXT:    v_readfirstlane_b32 s5, v1
+; GFX942-NEXT:    v_readfirstlane_b32 s6, v2
+; GFX942-NEXT:    v_readfirstlane_b32 s7, v3
+; GFX942-NEXT:    v_cmp_eq_u64_e32 vcc, s[4:5], v[0:1]
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cmp_eq_u64_e64 s[0:1], s[6:7], v[2:3]
+; GFX942-NEXT:    s_and_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    s_and_saveexec_b64 s[0:1], s[0:1]
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_atomic_cmpswap v[4:5], v8, s[4:7], 0 offen sc0
+; GFX942-NEXT:    s_xor_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB21_4
+; GFX942-NEXT:  ; %bb.5: ; in Loop: Header=BB21_3 Depth=1
+; GFX942-NEXT:    s_mov_b64 exec, s[8:9]
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v4, v7
+; GFX942-NEXT:    s_or_b64 s[2:3], vcc, s[2:3]
+; GFX942-NEXT:    v_mov_b32_e32 v7, v4
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[2:3]
+; GFX942-NEXT:    s_cbranch_execnz .LBB21_3
+; GFX942-NEXT:  ; %bb.6: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[2:3]
+; GFX942-NEXT:    v_mov_b32_e32 v0, v4
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_v2bf16__offset__waterfall__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -8185,34 +8185,34 @@ define float @buffer_fat_ptr_system_atomic_fmax_ret_f32__offset__amdgpu_no_fine_
 ; GFX12-NEXT:    global_inv scope:SCOPE_SYS
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: buffer_fat_ptr_system_atomic_fmax_ret_f32__offset__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v1, v0
-; GFX940-NEXT:    v_mov_b32_e32 v0, s16
-; GFX940-NEXT:    buffer_load_dword v0, v0, s[0:3], 0 offen offset:1024
-; GFX940-NEXT:    s_add_i32 s6, s16, 0x400
-; GFX940-NEXT:    s_mov_b64 s[4:5], 0
-; GFX940-NEXT:    v_max_f32_e32 v2, v1, v1
-; GFX940-NEXT:    v_mov_b32_e32 v3, s6
-; GFX940-NEXT:  .LBB22_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v5, v0
-; GFX940-NEXT:    v_max_f32_e32 v0, v5, v5
-; GFX940-NEXT:    v_max_f32_e32 v4, v0, v2
-; GFX940-NEXT:    v_mov_b64_e32 v[0:1], v[4:5]
-; GFX940-NEXT:    buffer_wbl2 sc0 sc1
-; GFX940-NEXT:    buffer_atomic_cmpswap v[0:1], v3, s[0:3], 0 offen sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc0 sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v0, v5
-; GFX940-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[4:5]
-; GFX940-NEXT:    s_cbranch_execnz .LBB22_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[4:5]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: buffer_fat_ptr_system_atomic_fmax_ret_f32__offset__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v1, v0
+; GFX942-NEXT:    v_mov_b32_e32 v0, s16
+; GFX942-NEXT:    buffer_load_dword v0, v0, s[0:3], 0 offen offset:1024
+; GFX942-NEXT:    s_add_i32 s6, s16, 0x400
+; GFX942-NEXT:    s_mov_b64 s[4:5], 0
+; GFX942-NEXT:    v_max_f32_e32 v2, v1, v1
+; GFX942-NEXT:    v_mov_b32_e32 v3, s6
+; GFX942-NEXT:  .LBB22_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v5, v0
+; GFX942-NEXT:    v_max_f32_e32 v0, v5, v5
+; GFX942-NEXT:    v_max_f32_e32 v4, v0, v2
+; GFX942-NEXT:    v_mov_b64_e32 v[0:1], v[4:5]
+; GFX942-NEXT:    buffer_wbl2 sc0 sc1
+; GFX942-NEXT:    buffer_atomic_cmpswap v[0:1], v3, s[0:3], 0 offen sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc0 sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v0, v5
+; GFX942-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[4:5]
+; GFX942-NEXT:    s_cbranch_execnz .LBB22_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[4:5]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: buffer_fat_ptr_system_atomic_fmax_ret_f32__offset__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
diff --git llvm/test/CodeGen/AMDGPU/buffer-fat-pointer-atomicrmw-fmin.ll llvm/test/CodeGen/AMDGPU/buffer-fat-pointer-atomicrmw-fmin.ll
index a6eb81fcbf51..2b8cea9068d8 100644
--- llvm/test/CodeGen/AMDGPU/buffer-fat-pointer-atomicrmw-fmin.ll
+++ llvm/test/CodeGen/AMDGPU/buffer-fat-pointer-atomicrmw-fmin.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
 ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 < %s | FileCheck -check-prefix=GFX12 %s
-; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx940 < %s | FileCheck -check-prefix=GFX940 %s
+; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx942 < %s | FileCheck -check-prefix=GFX942 %s
 ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 < %s | FileCheck -check-prefix=GFX11 %s
 ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s
 ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx90a < %s | FileCheck -check-prefix=GFX90A %s
@@ -28,34 +28,34 @@ define float @buffer_fat_ptr_agent_atomic_fmin_ret_f32__offset__amdgpu_no_fine_g
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_f32__offset__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v1, v0
-; GFX940-NEXT:    v_mov_b32_e32 v0, s16
-; GFX940-NEXT:    buffer_load_dword v0, v0, s[0:3], 0 offen offset:1024
-; GFX940-NEXT:    s_add_i32 s6, s16, 0x400
-; GFX940-NEXT:    s_mov_b64 s[4:5], 0
-; GFX940-NEXT:    v_max_f32_e32 v2, v1, v1
-; GFX940-NEXT:    v_mov_b32_e32 v3, s6
-; GFX940-NEXT:  .LBB0_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v5, v0
-; GFX940-NEXT:    v_max_f32_e32 v0, v5, v5
-; GFX940-NEXT:    v_min_f32_e32 v4, v0, v2
-; GFX940-NEXT:    v_mov_b64_e32 v[0:1], v[4:5]
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    buffer_atomic_cmpswap v[0:1], v3, s[0:3], 0 offen sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v0, v5
-; GFX940-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[4:5]
-; GFX940-NEXT:    s_cbranch_execnz .LBB0_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[4:5]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_f32__offset__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v1, v0
+; GFX942-NEXT:    v_mov_b32_e32 v0, s16
+; GFX942-NEXT:    buffer_load_dword v0, v0, s[0:3], 0 offen offset:1024
+; GFX942-NEXT:    s_add_i32 s6, s16, 0x400
+; GFX942-NEXT:    s_mov_b64 s[4:5], 0
+; GFX942-NEXT:    v_max_f32_e32 v2, v1, v1
+; GFX942-NEXT:    v_mov_b32_e32 v3, s6
+; GFX942-NEXT:  .LBB0_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v5, v0
+; GFX942-NEXT:    v_max_f32_e32 v0, v5, v5
+; GFX942-NEXT:    v_min_f32_e32 v4, v0, v2
+; GFX942-NEXT:    v_mov_b64_e32 v[0:1], v[4:5]
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    buffer_atomic_cmpswap v[0:1], v3, s[0:3], 0 offen sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v0, v5
+; GFX942-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[4:5]
+; GFX942-NEXT:    s_cbranch_execnz .LBB0_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[4:5]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_f32__offset__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -203,33 +203,33 @@ define void @buffer_fat_ptr_agent_atomic_fmin_noret_f32__offset__amdgpu_no_fine_
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fmin_noret_f32__offset__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v1, s16
-; GFX940-NEXT:    buffer_load_dword v1, v1, s[0:3], 0 offen offset:1024
-; GFX940-NEXT:    s_add_i32 s6, s16, 0x400
-; GFX940-NEXT:    s_mov_b64 s[4:5], 0
-; GFX940-NEXT:    v_max_f32_e32 v2, v0, v0
-; GFX940-NEXT:    v_mov_b32_e32 v3, s6
-; GFX940-NEXT:  .LBB1_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_max_f32_e32 v0, v1, v1
-; GFX940-NEXT:    v_min_f32_e32 v0, v0, v2
-; GFX940-NEXT:    v_mov_b64_e32 v[4:5], v[0:1]
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    buffer_atomic_cmpswap v[4:5], v3, s[0:3], 0 offen sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v4, v1
-; GFX940-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
-; GFX940-NEXT:    v_mov_b32_e32 v1, v4
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[4:5]
-; GFX940-NEXT:    s_cbranch_execnz .LBB1_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[4:5]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmin_noret_f32__offset__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v1, s16
+; GFX942-NEXT:    buffer_load_dword v1, v1, s[0:3], 0 offen offset:1024
+; GFX942-NEXT:    s_add_i32 s6, s16, 0x400
+; GFX942-NEXT:    s_mov_b64 s[4:5], 0
+; GFX942-NEXT:    v_max_f32_e32 v2, v0, v0
+; GFX942-NEXT:    v_mov_b32_e32 v3, s6
+; GFX942-NEXT:  .LBB1_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_max_f32_e32 v0, v1, v1
+; GFX942-NEXT:    v_min_f32_e32 v0, v0, v2
+; GFX942-NEXT:    v_mov_b64_e32 v[4:5], v[0:1]
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    buffer_atomic_cmpswap v[4:5], v3, s[0:3], 0 offen sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v4, v1
+; GFX942-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
+; GFX942-NEXT:    v_mov_b32_e32 v1, v4
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[4:5]
+; GFX942-NEXT:    s_cbranch_execnz .LBB1_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[4:5]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fmin_noret_f32__offset__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -395,66 +395,66 @@ define float @buffer_fat_ptr_agent_atomic_fmin_ret_f32__offset__waterfall__amdgp
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_f32__offset__waterfall__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_add_u32_e32 v8, 0x400, v4
-; GFX940-NEXT:    s_mov_b64 s[2:3], exec
-; GFX940-NEXT:  .LBB2_1: ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    v_readfirstlane_b32 s4, v0
-; GFX940-NEXT:    v_readfirstlane_b32 s5, v1
-; GFX940-NEXT:    v_readfirstlane_b32 s6, v2
-; GFX940-NEXT:    v_readfirstlane_b32 s7, v3
-; GFX940-NEXT:    v_cmp_eq_u64_e32 vcc, s[4:5], v[0:1]
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cmp_eq_u64_e64 s[0:1], s[6:7], v[2:3]
-; GFX940-NEXT:    s_and_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    s_and_saveexec_b64 s[0:1], s[0:1]
-; GFX940-NEXT:    buffer_load_dword v7, v4, s[4:7], 0 offen offset:1024
-; GFX940-NEXT:    ; implicit-def: $vgpr4
-; GFX940-NEXT:    s_xor_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB2_1
-; GFX940-NEXT:  ; %bb.2:
-; GFX940-NEXT:    s_mov_b64 exec, s[2:3]
-; GFX940-NEXT:    s_mov_b64 s[2:3], 0
-; GFX940-NEXT:    v_max_f32_e32 v9, v5, v5
-; GFX940-NEXT:  .LBB2_3: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Loop Header: Depth=1
-; GFX940-NEXT:    ; Child Loop BB2_4 Depth 2
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_max_f32_e32 v4, v7, v7
-; GFX940-NEXT:    v_min_f32_e32 v6, v4, v9
-; GFX940-NEXT:    s_mov_b64 s[8:9], exec
-; GFX940-NEXT:    v_mov_b64_e32 v[4:5], v[6:7]
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:  .LBB2_4: ; Parent Loop BB2_3 Depth=1
-; GFX940-NEXT:    ; => This Inner Loop Header: Depth=2
-; GFX940-NEXT:    v_readfirstlane_b32 s4, v0
-; GFX940-NEXT:    v_readfirstlane_b32 s5, v1
-; GFX940-NEXT:    v_readfirstlane_b32 s6, v2
-; GFX940-NEXT:    v_readfirstlane_b32 s7, v3
-; GFX940-NEXT:    v_cmp_eq_u64_e32 vcc, s[4:5], v[0:1]
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cmp_eq_u64_e64 s[0:1], s[6:7], v[2:3]
-; GFX940-NEXT:    s_and_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    s_and_saveexec_b64 s[0:1], s[0:1]
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_atomic_cmpswap v[4:5], v8, s[4:7], 0 offen sc0
-; GFX940-NEXT:    s_xor_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB2_4
-; GFX940-NEXT:  ; %bb.5: ; in Loop: Header=BB2_3 Depth=1
-; GFX940-NEXT:    s_mov_b64 exec, s[8:9]
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v4, v7
-; GFX940-NEXT:    s_or_b64 s[2:3], vcc, s[2:3]
-; GFX940-NEXT:    v_mov_b32_e32 v7, v4
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[2:3]
-; GFX940-NEXT:    s_cbranch_execnz .LBB2_3
-; GFX940-NEXT:  ; %bb.6: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[2:3]
-; GFX940-NEXT:    v_mov_b32_e32 v0, v4
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_f32__offset__waterfall__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_add_u32_e32 v8, 0x400, v4
+; GFX942-NEXT:    s_mov_b64 s[2:3], exec
+; GFX942-NEXT:  .LBB2_1: ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    v_readfirstlane_b32 s4, v0
+; GFX942-NEXT:    v_readfirstlane_b32 s5, v1
+; GFX942-NEXT:    v_readfirstlane_b32 s6, v2
+; GFX942-NEXT:    v_readfirstlane_b32 s7, v3
+; GFX942-NEXT:    v_cmp_eq_u64_e32 vcc, s[4:5], v[0:1]
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cmp_eq_u64_e64 s[0:1], s[6:7], v[2:3]
+; GFX942-NEXT:    s_and_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    s_and_saveexec_b64 s[0:1], s[0:1]
+; GFX942-NEXT:    buffer_load_dword v7, v4, s[4:7], 0 offen offset:1024
+; GFX942-NEXT:    ; implicit-def: $vgpr4
+; GFX942-NEXT:    s_xor_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB2_1
+; GFX942-NEXT:  ; %bb.2:
+; GFX942-NEXT:    s_mov_b64 exec, s[2:3]
+; GFX942-NEXT:    s_mov_b64 s[2:3], 0
+; GFX942-NEXT:    v_max_f32_e32 v9, v5, v5
+; GFX942-NEXT:  .LBB2_3: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Loop Header: Depth=1
+; GFX942-NEXT:    ; Child Loop BB2_4 Depth 2
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_max_f32_e32 v4, v7, v7
+; GFX942-NEXT:    v_min_f32_e32 v6, v4, v9
+; GFX942-NEXT:    s_mov_b64 s[8:9], exec
+; GFX942-NEXT:    v_mov_b64_e32 v[4:5], v[6:7]
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:  .LBB2_4: ; Parent Loop BB2_3 Depth=1
+; GFX942-NEXT:    ; => This Inner Loop Header: Depth=2
+; GFX942-NEXT:    v_readfirstlane_b32 s4, v0
+; GFX942-NEXT:    v_readfirstlane_b32 s5, v1
+; GFX942-NEXT:    v_readfirstlane_b32 s6, v2
+; GFX942-NEXT:    v_readfirstlane_b32 s7, v3
+; GFX942-NEXT:    v_cmp_eq_u64_e32 vcc, s[4:5], v[0:1]
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cmp_eq_u64_e64 s[0:1], s[6:7], v[2:3]
+; GFX942-NEXT:    s_and_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    s_and_saveexec_b64 s[0:1], s[0:1]
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_atomic_cmpswap v[4:5], v8, s[4:7], 0 offen sc0
+; GFX942-NEXT:    s_xor_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB2_4
+; GFX942-NEXT:  ; %bb.5: ; in Loop: Header=BB2_3 Depth=1
+; GFX942-NEXT:    s_mov_b64 exec, s[8:9]
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v4, v7
+; GFX942-NEXT:    s_or_b64 s[2:3], vcc, s[2:3]
+; GFX942-NEXT:    v_mov_b32_e32 v7, v4
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[2:3]
+; GFX942-NEXT:    s_cbranch_execnz .LBB2_3
+; GFX942-NEXT:  ; %bb.6: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[2:3]
+; GFX942-NEXT:    v_mov_b32_e32 v0, v4
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_f32__offset__waterfall__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -766,34 +766,34 @@ define float @buffer_fat_ptr_agent_atomic_fmin_ret_f32__offset__amdgpu_no_remote
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_f32__offset__amdgpu_no_remote_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v1, v0
-; GFX940-NEXT:    v_mov_b32_e32 v0, s16
-; GFX940-NEXT:    buffer_load_dword v0, v0, s[0:3], 0 offen offset:1024
-; GFX940-NEXT:    s_add_i32 s6, s16, 0x400
-; GFX940-NEXT:    s_mov_b64 s[4:5], 0
-; GFX940-NEXT:    v_max_f32_e32 v2, v1, v1
-; GFX940-NEXT:    v_mov_b32_e32 v3, s6
-; GFX940-NEXT:  .LBB3_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v5, v0
-; GFX940-NEXT:    v_max_f32_e32 v0, v5, v5
-; GFX940-NEXT:    v_min_f32_e32 v4, v0, v2
-; GFX940-NEXT:    v_mov_b64_e32 v[0:1], v[4:5]
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    buffer_atomic_cmpswap v[0:1], v3, s[0:3], 0 offen sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v0, v5
-; GFX940-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[4:5]
-; GFX940-NEXT:    s_cbranch_execnz .LBB3_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[4:5]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_f32__offset__amdgpu_no_remote_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v1, v0
+; GFX942-NEXT:    v_mov_b32_e32 v0, s16
+; GFX942-NEXT:    buffer_load_dword v0, v0, s[0:3], 0 offen offset:1024
+; GFX942-NEXT:    s_add_i32 s6, s16, 0x400
+; GFX942-NEXT:    s_mov_b64 s[4:5], 0
+; GFX942-NEXT:    v_max_f32_e32 v2, v1, v1
+; GFX942-NEXT:    v_mov_b32_e32 v3, s6
+; GFX942-NEXT:  .LBB3_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v5, v0
+; GFX942-NEXT:    v_max_f32_e32 v0, v5, v5
+; GFX942-NEXT:    v_min_f32_e32 v4, v0, v2
+; GFX942-NEXT:    v_mov_b64_e32 v[0:1], v[4:5]
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    buffer_atomic_cmpswap v[0:1], v3, s[0:3], 0 offen sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v0, v5
+; GFX942-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[4:5]
+; GFX942-NEXT:    s_cbranch_execnz .LBB3_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[4:5]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_f32__offset__amdgpu_no_remote_memory:
 ; GFX11:       ; %bb.0:
@@ -1023,34 +1023,34 @@ define float @buffer_fat_ptr_agent_atomic_fmin_ret_f32__offset__amdgpu_no_fine_g
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_f32__offset__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v1, v0
-; GFX940-NEXT:    v_mov_b32_e32 v0, s16
-; GFX940-NEXT:    buffer_load_dword v0, v0, s[0:3], 0 offen offset:1024
-; GFX940-NEXT:    s_add_i32 s6, s16, 0x400
-; GFX940-NEXT:    s_mov_b64 s[4:5], 0
-; GFX940-NEXT:    v_max_f32_e32 v2, v1, v1
-; GFX940-NEXT:    v_mov_b32_e32 v3, s6
-; GFX940-NEXT:  .LBB4_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v5, v0
-; GFX940-NEXT:    v_max_f32_e32 v0, v5, v5
-; GFX940-NEXT:    v_min_f32_e32 v4, v0, v2
-; GFX940-NEXT:    v_mov_b64_e32 v[0:1], v[4:5]
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    buffer_atomic_cmpswap v[0:1], v3, s[0:3], 0 offen sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v0, v5
-; GFX940-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[4:5]
-; GFX940-NEXT:    s_cbranch_execnz .LBB4_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[4:5]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_f32__offset__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v1, v0
+; GFX942-NEXT:    v_mov_b32_e32 v0, s16
+; GFX942-NEXT:    buffer_load_dword v0, v0, s[0:3], 0 offen offset:1024
+; GFX942-NEXT:    s_add_i32 s6, s16, 0x400
+; GFX942-NEXT:    s_mov_b64 s[4:5], 0
+; GFX942-NEXT:    v_max_f32_e32 v2, v1, v1
+; GFX942-NEXT:    v_mov_b32_e32 v3, s6
+; GFX942-NEXT:  .LBB4_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v5, v0
+; GFX942-NEXT:    v_max_f32_e32 v0, v5, v5
+; GFX942-NEXT:    v_min_f32_e32 v4, v0, v2
+; GFX942-NEXT:    v_mov_b64_e32 v[0:1], v[4:5]
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    buffer_atomic_cmpswap v[0:1], v3, s[0:3], 0 offen sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v0, v5
+; GFX942-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[4:5]
+; GFX942-NEXT:    s_cbranch_execnz .LBB4_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[4:5]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_f32__offset__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory:
 ; GFX11:       ; %bb.0:
@@ -1226,15 +1226,15 @@ define double @buffer_fat_ptr_agent_atomic_fmin_ret_f64__offset__amdgpu_no_fine_
 ; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s4
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_f64__offset__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v2, s16
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 offen offset:2048 sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_f64__offset__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v2, s16
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 offen offset:2048 sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_f64__offset__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -1417,15 +1417,15 @@ define void @buffer_fat_ptr_agent_atomic_fmin_noret_f64__offset__amdgpu_no_fine_
 ; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s4
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fmin_noret_f64__offset__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v2, s16
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 offen offset:2048
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmin_noret_f64__offset__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v2, s16
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 offen offset:2048
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fmin_noret_f64__offset__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -1643,36 +1643,36 @@ define double @buffer_fat_ptr_agent_atomic_fmin_ret_f64__offset__waterfall__amdg
 ; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s1
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_f64__offset__waterfall__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v7, v6
-; GFX940-NEXT:    v_mov_b32_e32 v6, v5
-; GFX940-NEXT:    s_mov_b64 s[2:3], exec
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:  .LBB7_1: ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    v_readfirstlane_b32 s4, v0
-; GFX940-NEXT:    v_readfirstlane_b32 s5, v1
-; GFX940-NEXT:    v_readfirstlane_b32 s6, v2
-; GFX940-NEXT:    v_readfirstlane_b32 s7, v3
-; GFX940-NEXT:    v_cmp_eq_u64_e32 vcc, s[4:5], v[0:1]
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cmp_eq_u64_e64 s[0:1], s[6:7], v[2:3]
-; GFX940-NEXT:    s_and_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    s_and_saveexec_b64 s[0:1], s[0:1]
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_atomic_min_f64 v[6:7], v4, s[4:7], 0 offen offset:2048 sc0
-; GFX940-NEXT:    ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
-; GFX940-NEXT:    ; implicit-def: $vgpr4
-; GFX940-NEXT:    s_xor_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB7_1
-; GFX940-NEXT:  ; %bb.2:
-; GFX940-NEXT:    s_mov_b64 exec, s[2:3]
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v0, v6
-; GFX940-NEXT:    v_mov_b32_e32 v1, v7
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_f64__offset__waterfall__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v7, v6
+; GFX942-NEXT:    v_mov_b32_e32 v6, v5
+; GFX942-NEXT:    s_mov_b64 s[2:3], exec
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:  .LBB7_1: ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    v_readfirstlane_b32 s4, v0
+; GFX942-NEXT:    v_readfirstlane_b32 s5, v1
+; GFX942-NEXT:    v_readfirstlane_b32 s6, v2
+; GFX942-NEXT:    v_readfirstlane_b32 s7, v3
+; GFX942-NEXT:    v_cmp_eq_u64_e32 vcc, s[4:5], v[0:1]
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cmp_eq_u64_e64 s[0:1], s[6:7], v[2:3]
+; GFX942-NEXT:    s_and_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    s_and_saveexec_b64 s[0:1], s[0:1]
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_atomic_min_f64 v[6:7], v4, s[4:7], 0 offen offset:2048 sc0
+; GFX942-NEXT:    ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
+; GFX942-NEXT:    ; implicit-def: $vgpr4
+; GFX942-NEXT:    s_xor_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB7_1
+; GFX942-NEXT:  ; %bb.2:
+; GFX942-NEXT:    s_mov_b64 exec, s[2:3]
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v0, v6
+; GFX942-NEXT:    v_mov_b32_e32 v1, v7
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_f64__offset__waterfall__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -2033,15 +2033,15 @@ define double @buffer_fat_ptr_agent_atomic_fmin_ret_f64__offset__amdgpu_no_remot
 ; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s4
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_f64__offset__amdgpu_no_remote_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v2, s16
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 offen offset:2048 sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_f64__offset__amdgpu_no_remote_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v2, s16
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 offen offset:2048 sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_f64__offset__amdgpu_no_remote_memory:
 ; GFX11:       ; %bb.0:
@@ -2320,15 +2320,15 @@ define double @buffer_fat_ptr_agent_atomic_fmin_ret_f64__offset__amdgpu_no_fine_
 ; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s4
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_f64__offset__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v2, s16
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 offen offset:2048 sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_f64__offset__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v2, s16
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 offen offset:2048 sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_f64__offset__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory:
 ; GFX11:       ; %bb.0:
@@ -2529,41 +2529,41 @@ define half @buffer_fat_ptr_agent_atomic_fmin_ret_f16__offset__amdgpu_no_fine_gr
 ; GFX12-NEXT:    v_lshrrev_b32_e32 v0, s4, v2
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_f16__offset__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    s_addk_i32 s16, 0x200
-; GFX940-NEXT:    s_and_b32 s4, s16, -4
-; GFX940-NEXT:    v_mov_b32_e32 v4, s4
-; GFX940-NEXT:    buffer_load_dword v1, v4, s[0:3], 0 offen
-; GFX940-NEXT:    s_and_b32 s4, s16, 3
-; GFX940-NEXT:    s_lshl_b32 s6, s4, 3
-; GFX940-NEXT:    s_lshl_b32 s4, 0xffff, s6
-; GFX940-NEXT:    s_not_b32 s7, s4
-; GFX940-NEXT:    s_mov_b64 s[4:5], 0
-; GFX940-NEXT:    v_max_f16_e32 v5, v0, v0
-; GFX940-NEXT:  .LBB10_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_lshrrev_b32_e32 v0, s6, v1
-; GFX940-NEXT:    v_max_f16_e32 v0, v0, v0
-; GFX940-NEXT:    v_min_f16_e32 v0, v0, v5
-; GFX940-NEXT:    v_lshlrev_b32_e32 v0, s6, v0
-; GFX940-NEXT:    v_and_or_b32 v0, v1, s7, v0
-; GFX940-NEXT:    v_mov_b64_e32 v[2:3], v[0:1]
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    buffer_atomic_cmpswap v[2:3], v4, s[0:3], 0 offen sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v1
-; GFX940-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
-; GFX940-NEXT:    v_mov_b32_e32 v1, v2
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[4:5]
-; GFX940-NEXT:    s_cbranch_execnz .LBB10_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[4:5]
-; GFX940-NEXT:    v_lshrrev_b32_e32 v0, s6, v2
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_f16__offset__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    s_addk_i32 s16, 0x200
+; GFX942-NEXT:    s_and_b32 s4, s16, -4
+; GFX942-NEXT:    v_mov_b32_e32 v4, s4
+; GFX942-NEXT:    buffer_load_dword v1, v4, s[0:3], 0 offen
+; GFX942-NEXT:    s_and_b32 s4, s16, 3
+; GFX942-NEXT:    s_lshl_b32 s6, s4, 3
+; GFX942-NEXT:    s_lshl_b32 s4, 0xffff, s6
+; GFX942-NEXT:    s_not_b32 s7, s4
+; GFX942-NEXT:    s_mov_b64 s[4:5], 0
+; GFX942-NEXT:    v_max_f16_e32 v5, v0, v0
+; GFX942-NEXT:  .LBB10_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_lshrrev_b32_e32 v0, s6, v1
+; GFX942-NEXT:    v_max_f16_e32 v0, v0, v0
+; GFX942-NEXT:    v_min_f16_e32 v0, v0, v5
+; GFX942-NEXT:    v_lshlrev_b32_e32 v0, s6, v0
+; GFX942-NEXT:    v_and_or_b32 v0, v1, s7, v0
+; GFX942-NEXT:    v_mov_b64_e32 v[2:3], v[0:1]
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    buffer_atomic_cmpswap v[2:3], v4, s[0:3], 0 offen sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v1
+; GFX942-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
+; GFX942-NEXT:    v_mov_b32_e32 v1, v2
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[4:5]
+; GFX942-NEXT:    s_cbranch_execnz .LBB10_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[4:5]
+; GFX942-NEXT:    v_lshrrev_b32_e32 v0, s6, v2
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_f16__offset__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -2892,40 +2892,40 @@ define void @buffer_fat_ptr_agent_atomic_fmin_noret_f16__offset__amdgpu_no_fine_
 ; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s5
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fmin_noret_f16__offset__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    s_addk_i32 s16, 0x200
-; GFX940-NEXT:    s_and_b32 s4, s16, -4
-; GFX940-NEXT:    v_mov_b32_e32 v2, s4
-; GFX940-NEXT:    buffer_load_dword v1, v2, s[0:3], 0 offen
-; GFX940-NEXT:    s_and_b32 s4, s16, 3
-; GFX940-NEXT:    s_lshl_b32 s6, s4, 3
-; GFX940-NEXT:    s_lshl_b32 s4, 0xffff, s6
-; GFX940-NEXT:    s_not_b32 s7, s4
-; GFX940-NEXT:    s_mov_b64 s[4:5], 0
-; GFX940-NEXT:    v_max_f16_e32 v3, v0, v0
-; GFX940-NEXT:  .LBB11_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_lshrrev_b32_e32 v0, s6, v1
-; GFX940-NEXT:    v_max_f16_e32 v0, v0, v0
-; GFX940-NEXT:    v_min_f16_e32 v0, v0, v3
-; GFX940-NEXT:    v_lshlrev_b32_e32 v0, s6, v0
-; GFX940-NEXT:    v_and_or_b32 v0, v1, s7, v0
-; GFX940-NEXT:    v_mov_b64_e32 v[4:5], v[0:1]
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    buffer_atomic_cmpswap v[4:5], v2, s[0:3], 0 offen sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v4, v1
-; GFX940-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
-; GFX940-NEXT:    v_mov_b32_e32 v1, v4
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[4:5]
-; GFX940-NEXT:    s_cbranch_execnz .LBB11_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[4:5]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmin_noret_f16__offset__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    s_addk_i32 s16, 0x200
+; GFX942-NEXT:    s_and_b32 s4, s16, -4
+; GFX942-NEXT:    v_mov_b32_e32 v2, s4
+; GFX942-NEXT:    buffer_load_dword v1, v2, s[0:3], 0 offen
+; GFX942-NEXT:    s_and_b32 s4, s16, 3
+; GFX942-NEXT:    s_lshl_b32 s6, s4, 3
+; GFX942-NEXT:    s_lshl_b32 s4, 0xffff, s6
+; GFX942-NEXT:    s_not_b32 s7, s4
+; GFX942-NEXT:    s_mov_b64 s[4:5], 0
+; GFX942-NEXT:    v_max_f16_e32 v3, v0, v0
+; GFX942-NEXT:  .LBB11_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_lshrrev_b32_e32 v0, s6, v1
+; GFX942-NEXT:    v_max_f16_e32 v0, v0, v0
+; GFX942-NEXT:    v_min_f16_e32 v0, v0, v3
+; GFX942-NEXT:    v_lshlrev_b32_e32 v0, s6, v0
+; GFX942-NEXT:    v_and_or_b32 v0, v1, s7, v0
+; GFX942-NEXT:    v_mov_b64_e32 v[4:5], v[0:1]
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    buffer_atomic_cmpswap v[4:5], v2, s[0:3], 0 offen sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v4, v1
+; GFX942-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
+; GFX942-NEXT:    v_mov_b32_e32 v1, v4
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[4:5]
+; GFX942-NEXT:    s_cbranch_execnz .LBB11_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[4:5]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fmin_noret_f16__offset__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -3283,74 +3283,74 @@ define half @buffer_fat_ptr_agent_atomic_fmin_ret_f16__offset__waterfall__amdgpu
 ; GFX12-NEXT:    v_lshrrev_b32_e32 v0, v7, v4
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_f16__offset__waterfall__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_add_u32_e32 v4, 0x200, v4
-; GFX940-NEXT:    v_and_b32_e32 v9, -4, v4
-; GFX940-NEXT:    v_and_b32_e32 v4, 3, v4
-; GFX940-NEXT:    v_lshlrev_b32_e32 v8, 3, v4
-; GFX940-NEXT:    s_mov_b32 s0, 0xffff
-; GFX940-NEXT:    v_lshlrev_b32_e64 v4, v8, s0
-; GFX940-NEXT:    v_not_b32_e32 v10, v4
-; GFX940-NEXT:    s_mov_b64 s[2:3], exec
-; GFX940-NEXT:  .LBB12_1: ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    v_readfirstlane_b32 s4, v0
-; GFX940-NEXT:    v_readfirstlane_b32 s5, v1
-; GFX940-NEXT:    v_readfirstlane_b32 s6, v2
-; GFX940-NEXT:    v_readfirstlane_b32 s7, v3
-; GFX940-NEXT:    v_cmp_eq_u64_e32 vcc, s[4:5], v[0:1]
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cmp_eq_u64_e64 s[0:1], s[6:7], v[2:3]
-; GFX940-NEXT:    s_and_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    s_and_saveexec_b64 s[0:1], s[0:1]
-; GFX940-NEXT:    buffer_load_dword v7, v9, s[4:7], 0 offen
-; GFX940-NEXT:    s_xor_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB12_1
-; GFX940-NEXT:  ; %bb.2:
-; GFX940-NEXT:    s_mov_b64 exec, s[2:3]
-; GFX940-NEXT:    s_mov_b64 s[2:3], 0
-; GFX940-NEXT:    v_max_f16_e32 v11, v5, v5
-; GFX940-NEXT:  .LBB12_3: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Loop Header: Depth=1
-; GFX940-NEXT:    ; Child Loop BB12_4 Depth 2
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_lshrrev_b32_e32 v4, v8, v7
-; GFX940-NEXT:    v_max_f16_e32 v4, v4, v4
-; GFX940-NEXT:    v_min_f16_e32 v4, v4, v11
-; GFX940-NEXT:    v_lshlrev_b32_e32 v4, v8, v4
-; GFX940-NEXT:    v_and_or_b32 v6, v7, v10, v4
-; GFX940-NEXT:    s_mov_b64 s[8:9], exec
-; GFX940-NEXT:    v_mov_b64_e32 v[4:5], v[6:7]
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:  .LBB12_4: ; Parent Loop BB12_3 Depth=1
-; GFX940-NEXT:    ; => This Inner Loop Header: Depth=2
-; GFX940-NEXT:    v_readfirstlane_b32 s4, v0
-; GFX940-NEXT:    v_readfirstlane_b32 s5, v1
-; GFX940-NEXT:    v_readfirstlane_b32 s6, v2
-; GFX940-NEXT:    v_readfirstlane_b32 s7, v3
-; GFX940-NEXT:    v_cmp_eq_u64_e32 vcc, s[4:5], v[0:1]
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cmp_eq_u64_e64 s[0:1], s[6:7], v[2:3]
-; GFX940-NEXT:    s_and_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    s_and_saveexec_b64 s[0:1], s[0:1]
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_atomic_cmpswap v[4:5], v9, s[4:7], 0 offen sc0
-; GFX940-NEXT:    s_xor_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB12_4
-; GFX940-NEXT:  ; %bb.5: ; in Loop: Header=BB12_3 Depth=1
-; GFX940-NEXT:    s_mov_b64 exec, s[8:9]
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v4, v7
-; GFX940-NEXT:    s_or_b64 s[2:3], vcc, s[2:3]
-; GFX940-NEXT:    v_mov_b32_e32 v7, v4
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[2:3]
-; GFX940-NEXT:    s_cbranch_execnz .LBB12_3
-; GFX940-NEXT:  ; %bb.6: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[2:3]
-; GFX940-NEXT:    v_lshrrev_b32_e32 v0, v8, v4
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_f16__offset__waterfall__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_add_u32_e32 v4, 0x200, v4
+; GFX942-NEXT:    v_and_b32_e32 v9, -4, v4
+; GFX942-NEXT:    v_and_b32_e32 v4, 3, v4
+; GFX942-NEXT:    v_lshlrev_b32_e32 v8, 3, v4
+; GFX942-NEXT:    s_mov_b32 s0, 0xffff
+; GFX942-NEXT:    v_lshlrev_b32_e64 v4, v8, s0
+; GFX942-NEXT:    v_not_b32_e32 v10, v4
+; GFX942-NEXT:    s_mov_b64 s[2:3], exec
+; GFX942-NEXT:  .LBB12_1: ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    v_readfirstlane_b32 s4, v0
+; GFX942-NEXT:    v_readfirstlane_b32 s5, v1
+; GFX942-NEXT:    v_readfirstlane_b32 s6, v2
+; GFX942-NEXT:    v_readfirstlane_b32 s7, v3
+; GFX942-NEXT:    v_cmp_eq_u64_e32 vcc, s[4:5], v[0:1]
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cmp_eq_u64_e64 s[0:1], s[6:7], v[2:3]
+; GFX942-NEXT:    s_and_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    s_and_saveexec_b64 s[0:1], s[0:1]
+; GFX942-NEXT:    buffer_load_dword v7, v9, s[4:7], 0 offen
+; GFX942-NEXT:    s_xor_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB12_1
+; GFX942-NEXT:  ; %bb.2:
+; GFX942-NEXT:    s_mov_b64 exec, s[2:3]
+; GFX942-NEXT:    s_mov_b64 s[2:3], 0
+; GFX942-NEXT:    v_max_f16_e32 v11, v5, v5
+; GFX942-NEXT:  .LBB12_3: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Loop Header: Depth=1
+; GFX942-NEXT:    ; Child Loop BB12_4 Depth 2
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_lshrrev_b32_e32 v4, v8, v7
+; GFX942-NEXT:    v_max_f16_e32 v4, v4, v4
+; GFX942-NEXT:    v_min_f16_e32 v4, v4, v11
+; GFX942-NEXT:    v_lshlrev_b32_e32 v4, v8, v4
+; GFX942-NEXT:    v_and_or_b32 v6, v7, v10, v4
+; GFX942-NEXT:    s_mov_b64 s[8:9], exec
+; GFX942-NEXT:    v_mov_b64_e32 v[4:5], v[6:7]
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:  .LBB12_4: ; Parent Loop BB12_3 Depth=1
+; GFX942-NEXT:    ; => This Inner Loop Header: Depth=2
+; GFX942-NEXT:    v_readfirstlane_b32 s4, v0
+; GFX942-NEXT:    v_readfirstlane_b32 s5, v1
+; GFX942-NEXT:    v_readfirstlane_b32 s6, v2
+; GFX942-NEXT:    v_readfirstlane_b32 s7, v3
+; GFX942-NEXT:    v_cmp_eq_u64_e32 vcc, s[4:5], v[0:1]
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cmp_eq_u64_e64 s[0:1], s[6:7], v[2:3]
+; GFX942-NEXT:    s_and_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    s_and_saveexec_b64 s[0:1], s[0:1]
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_atomic_cmpswap v[4:5], v9, s[4:7], 0 offen sc0
+; GFX942-NEXT:    s_xor_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB12_4
+; GFX942-NEXT:  ; %bb.5: ; in Loop: Header=BB12_3 Depth=1
+; GFX942-NEXT:    s_mov_b64 exec, s[8:9]
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v4, v7
+; GFX942-NEXT:    s_or_b64 s[2:3], vcc, s[2:3]
+; GFX942-NEXT:    v_mov_b32_e32 v7, v4
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[2:3]
+; GFX942-NEXT:    s_cbranch_execnz .LBB12_3
+; GFX942-NEXT:  ; %bb.6: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[2:3]
+; GFX942-NEXT:    v_lshrrev_b32_e32 v0, v8, v4
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_f16__offset__waterfall__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -3915,47 +3915,47 @@ define bfloat @buffer_fat_ptr_agent_atomic_fmin_ret_bf16__offset__amdgpu_no_fine
 ; GFX12-NEXT:    v_lshrrev_b32_e32 v0, s4, v2
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_bf16__offset__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    s_addk_i32 s16, 0x200
-; GFX940-NEXT:    s_and_b32 s4, s16, -4
-; GFX940-NEXT:    v_mov_b32_e32 v4, s4
-; GFX940-NEXT:    buffer_load_dword v1, v4, s[0:3], 0 offen
-; GFX940-NEXT:    s_and_b32 s4, s16, 3
-; GFX940-NEXT:    s_lshl_b32 s6, s4, 3
-; GFX940-NEXT:    s_lshl_b32 s4, 0xffff, s6
-; GFX940-NEXT:    s_not_b32 s7, s4
-; GFX940-NEXT:    s_mov_b64 s[4:5], 0
-; GFX940-NEXT:    v_lshlrev_b32_e32 v5, 16, v0
-; GFX940-NEXT:    s_movk_i32 s8, 0x7fff
-; GFX940-NEXT:  .LBB13_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_lshrrev_b32_sdwa v0, s6, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    v_min_f32_e32 v0, v0, v5
-; GFX940-NEXT:    v_bfe_u32 v2, v0, 16, 1
-; GFX940-NEXT:    v_or_b32_e32 v3, 0x400000, v0
-; GFX940-NEXT:    v_add3_u32 v2, v2, v0, s8
-; GFX940-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v2, v3, vcc
-; GFX940-NEXT:    v_lshlrev_b32_sdwa v0, s6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX940-NEXT:    v_and_or_b32 v0, v1, s7, v0
-; GFX940-NEXT:    v_mov_b64_e32 v[2:3], v[0:1]
-; GFX940-NEXT:    buffer_atomic_cmpswap v[2:3], v4, s[0:3], 0 offen sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v1
-; GFX940-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
-; GFX940-NEXT:    v_mov_b32_e32 v1, v2
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[4:5]
-; GFX940-NEXT:    s_cbranch_execnz .LBB13_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[4:5]
-; GFX940-NEXT:    v_lshrrev_b32_e32 v0, s6, v2
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_bf16__offset__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    s_addk_i32 s16, 0x200
+; GFX942-NEXT:    s_and_b32 s4, s16, -4
+; GFX942-NEXT:    v_mov_b32_e32 v4, s4
+; GFX942-NEXT:    buffer_load_dword v1, v4, s[0:3], 0 offen
+; GFX942-NEXT:    s_and_b32 s4, s16, 3
+; GFX942-NEXT:    s_lshl_b32 s6, s4, 3
+; GFX942-NEXT:    s_lshl_b32 s4, 0xffff, s6
+; GFX942-NEXT:    s_not_b32 s7, s4
+; GFX942-NEXT:    s_mov_b64 s[4:5], 0
+; GFX942-NEXT:    v_lshlrev_b32_e32 v5, 16, v0
+; GFX942-NEXT:    s_movk_i32 s8, 0x7fff
+; GFX942-NEXT:  .LBB13_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_lshrrev_b32_sdwa v0, s6, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    v_min_f32_e32 v0, v0, v5
+; GFX942-NEXT:    v_bfe_u32 v2, v0, 16, 1
+; GFX942-NEXT:    v_or_b32_e32 v3, 0x400000, v0
+; GFX942-NEXT:    v_add3_u32 v2, v2, v0, s8
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v2, v3, vcc
+; GFX942-NEXT:    v_lshlrev_b32_sdwa v0, s6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX942-NEXT:    v_and_or_b32 v0, v1, s7, v0
+; GFX942-NEXT:    v_mov_b64_e32 v[2:3], v[0:1]
+; GFX942-NEXT:    buffer_atomic_cmpswap v[2:3], v4, s[0:3], 0 offen sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v1
+; GFX942-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
+; GFX942-NEXT:    v_mov_b32_e32 v1, v2
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[4:5]
+; GFX942-NEXT:    s_cbranch_execnz .LBB13_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[4:5]
+; GFX942-NEXT:    v_lshrrev_b32_e32 v0, s6, v2
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_bf16__offset__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -4321,46 +4321,46 @@ define void @buffer_fat_ptr_agent_atomic_fmin_noret_bf16__offset__amdgpu_no_fine
 ; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s5
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fmin_noret_bf16__offset__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    s_addk_i32 s16, 0x200
-; GFX940-NEXT:    s_and_b32 s4, s16, -4
-; GFX940-NEXT:    v_mov_b32_e32 v2, s4
-; GFX940-NEXT:    buffer_load_dword v1, v2, s[0:3], 0 offen
-; GFX940-NEXT:    s_and_b32 s4, s16, 3
-; GFX940-NEXT:    s_lshl_b32 s6, s4, 3
-; GFX940-NEXT:    s_lshl_b32 s4, 0xffff, s6
-; GFX940-NEXT:    s_not_b32 s7, s4
-; GFX940-NEXT:    s_mov_b64 s[4:5], 0
-; GFX940-NEXT:    v_lshlrev_b32_e32 v3, 16, v0
-; GFX940-NEXT:    s_movk_i32 s8, 0x7fff
-; GFX940-NEXT:  .LBB14_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_lshrrev_b32_sdwa v0, s6, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    v_min_f32_e32 v0, v0, v3
-; GFX940-NEXT:    v_bfe_u32 v4, v0, 16, 1
-; GFX940-NEXT:    v_or_b32_e32 v5, 0x400000, v0
-; GFX940-NEXT:    v_add3_u32 v4, v4, v0, s8
-; GFX940-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v4, v5, vcc
-; GFX940-NEXT:    v_lshlrev_b32_sdwa v0, s6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX940-NEXT:    v_and_or_b32 v0, v1, s7, v0
-; GFX940-NEXT:    v_mov_b64_e32 v[4:5], v[0:1]
-; GFX940-NEXT:    buffer_atomic_cmpswap v[4:5], v2, s[0:3], 0 offen sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v4, v1
-; GFX940-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
-; GFX940-NEXT:    v_mov_b32_e32 v1, v4
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[4:5]
-; GFX940-NEXT:    s_cbranch_execnz .LBB14_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[4:5]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmin_noret_bf16__offset__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    s_addk_i32 s16, 0x200
+; GFX942-NEXT:    s_and_b32 s4, s16, -4
+; GFX942-NEXT:    v_mov_b32_e32 v2, s4
+; GFX942-NEXT:    buffer_load_dword v1, v2, s[0:3], 0 offen
+; GFX942-NEXT:    s_and_b32 s4, s16, 3
+; GFX942-NEXT:    s_lshl_b32 s6, s4, 3
+; GFX942-NEXT:    s_lshl_b32 s4, 0xffff, s6
+; GFX942-NEXT:    s_not_b32 s7, s4
+; GFX942-NEXT:    s_mov_b64 s[4:5], 0
+; GFX942-NEXT:    v_lshlrev_b32_e32 v3, 16, v0
+; GFX942-NEXT:    s_movk_i32 s8, 0x7fff
+; GFX942-NEXT:  .LBB14_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_lshrrev_b32_sdwa v0, s6, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    v_min_f32_e32 v0, v0, v3
+; GFX942-NEXT:    v_bfe_u32 v4, v0, 16, 1
+; GFX942-NEXT:    v_or_b32_e32 v5, 0x400000, v0
+; GFX942-NEXT:    v_add3_u32 v4, v4, v0, s8
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v5, vcc
+; GFX942-NEXT:    v_lshlrev_b32_sdwa v0, s6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX942-NEXT:    v_and_or_b32 v0, v1, s7, v0
+; GFX942-NEXT:    v_mov_b64_e32 v[4:5], v[0:1]
+; GFX942-NEXT:    buffer_atomic_cmpswap v[4:5], v2, s[0:3], 0 offen sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v4, v1
+; GFX942-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
+; GFX942-NEXT:    v_mov_b32_e32 v1, v4
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[4:5]
+; GFX942-NEXT:    s_cbranch_execnz .LBB14_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[4:5]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fmin_noret_bf16__offset__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -4755,80 +4755,80 @@ define bfloat @buffer_fat_ptr_agent_atomic_fmin_ret_bf16__offset__waterfall__amd
 ; GFX12-NEXT:    v_lshrrev_b32_e32 v0, v7, v4
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_bf16__offset__waterfall__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_add_u32_e32 v4, 0x200, v4
-; GFX940-NEXT:    v_and_b32_e32 v9, -4, v4
-; GFX940-NEXT:    v_and_b32_e32 v4, 3, v4
-; GFX940-NEXT:    v_lshlrev_b32_e32 v8, 3, v4
-; GFX940-NEXT:    s_mov_b32 s0, 0xffff
-; GFX940-NEXT:    v_lshlrev_b32_e64 v4, v8, s0
-; GFX940-NEXT:    v_not_b32_e32 v10, v4
-; GFX940-NEXT:    s_mov_b64 s[2:3], exec
-; GFX940-NEXT:  .LBB15_1: ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    v_readfirstlane_b32 s4, v0
-; GFX940-NEXT:    v_readfirstlane_b32 s5, v1
-; GFX940-NEXT:    v_readfirstlane_b32 s6, v2
-; GFX940-NEXT:    v_readfirstlane_b32 s7, v3
-; GFX940-NEXT:    v_cmp_eq_u64_e32 vcc, s[4:5], v[0:1]
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cmp_eq_u64_e64 s[0:1], s[6:7], v[2:3]
-; GFX940-NEXT:    s_and_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    s_and_saveexec_b64 s[0:1], s[0:1]
-; GFX940-NEXT:    buffer_load_dword v7, v9, s[4:7], 0 offen
-; GFX940-NEXT:    s_xor_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB15_1
-; GFX940-NEXT:  ; %bb.2:
-; GFX940-NEXT:    s_mov_b64 exec, s[2:3]
-; GFX940-NEXT:    s_mov_b64 s[2:3], 0
-; GFX940-NEXT:    v_lshlrev_b32_e32 v11, 16, v5
-; GFX940-NEXT:    s_movk_i32 s10, 0x7fff
-; GFX940-NEXT:  .LBB15_3: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Loop Header: Depth=1
-; GFX940-NEXT:    ; Child Loop BB15_4 Depth 2
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_lshrrev_b32_sdwa v4, v8, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX940-NEXT:    s_mov_b64 s[8:9], exec
-; GFX940-NEXT:    v_min_f32_e32 v4, v4, v11
-; GFX940-NEXT:    v_bfe_u32 v5, v4, 16, 1
-; GFX940-NEXT:    v_add3_u32 v5, v5, v4, s10
-; GFX940-NEXT:    v_or_b32_e32 v6, 0x400000, v4
-; GFX940-NEXT:    v_cmp_u_f32_e32 vcc, v4, v4
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cndmask_b32_e32 v4, v5, v6, vcc
-; GFX940-NEXT:    v_lshlrev_b32_sdwa v4, v8, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX940-NEXT:    v_and_or_b32 v6, v7, v10, v4
-; GFX940-NEXT:    v_mov_b64_e32 v[4:5], v[6:7]
-; GFX940-NEXT:  .LBB15_4: ; Parent Loop BB15_3 Depth=1
-; GFX940-NEXT:    ; => This Inner Loop Header: Depth=2
-; GFX940-NEXT:    v_readfirstlane_b32 s4, v0
-; GFX940-NEXT:    v_readfirstlane_b32 s5, v1
-; GFX940-NEXT:    v_readfirstlane_b32 s6, v2
-; GFX940-NEXT:    v_readfirstlane_b32 s7, v3
-; GFX940-NEXT:    v_cmp_eq_u64_e32 vcc, s[4:5], v[0:1]
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cmp_eq_u64_e64 s[0:1], s[6:7], v[2:3]
-; GFX940-NEXT:    s_and_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    s_and_saveexec_b64 s[0:1], s[0:1]
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_atomic_cmpswap v[4:5], v9, s[4:7], 0 offen sc0
-; GFX940-NEXT:    s_xor_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB15_4
-; GFX940-NEXT:  ; %bb.5: ; in Loop: Header=BB15_3 Depth=1
-; GFX940-NEXT:    s_mov_b64 exec, s[8:9]
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v4, v7
-; GFX940-NEXT:    s_or_b64 s[2:3], vcc, s[2:3]
-; GFX940-NEXT:    v_mov_b32_e32 v7, v4
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[2:3]
-; GFX940-NEXT:    s_cbranch_execnz .LBB15_3
-; GFX940-NEXT:  ; %bb.6: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[2:3]
-; GFX940-NEXT:    v_lshrrev_b32_e32 v0, v8, v4
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_bf16__offset__waterfall__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_add_u32_e32 v4, 0x200, v4
+; GFX942-NEXT:    v_and_b32_e32 v9, -4, v4
+; GFX942-NEXT:    v_and_b32_e32 v4, 3, v4
+; GFX942-NEXT:    v_lshlrev_b32_e32 v8, 3, v4
+; GFX942-NEXT:    s_mov_b32 s0, 0xffff
+; GFX942-NEXT:    v_lshlrev_b32_e64 v4, v8, s0
+; GFX942-NEXT:    v_not_b32_e32 v10, v4
+; GFX942-NEXT:    s_mov_b64 s[2:3], exec
+; GFX942-NEXT:  .LBB15_1: ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    v_readfirstlane_b32 s4, v0
+; GFX942-NEXT:    v_readfirstlane_b32 s5, v1
+; GFX942-NEXT:    v_readfirstlane_b32 s6, v2
+; GFX942-NEXT:    v_readfirstlane_b32 s7, v3
+; GFX942-NEXT:    v_cmp_eq_u64_e32 vcc, s[4:5], v[0:1]
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cmp_eq_u64_e64 s[0:1], s[6:7], v[2:3]
+; GFX942-NEXT:    s_and_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    s_and_saveexec_b64 s[0:1], s[0:1]
+; GFX942-NEXT:    buffer_load_dword v7, v9, s[4:7], 0 offen
+; GFX942-NEXT:    s_xor_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB15_1
+; GFX942-NEXT:  ; %bb.2:
+; GFX942-NEXT:    s_mov_b64 exec, s[2:3]
+; GFX942-NEXT:    s_mov_b64 s[2:3], 0
+; GFX942-NEXT:    v_lshlrev_b32_e32 v11, 16, v5
+; GFX942-NEXT:    s_movk_i32 s10, 0x7fff
+; GFX942-NEXT:  .LBB15_3: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Loop Header: Depth=1
+; GFX942-NEXT:    ; Child Loop BB15_4 Depth 2
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_lshrrev_b32_sdwa v4, v8, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX942-NEXT:    s_mov_b64 s[8:9], exec
+; GFX942-NEXT:    v_min_f32_e32 v4, v4, v11
+; GFX942-NEXT:    v_bfe_u32 v5, v4, 16, 1
+; GFX942-NEXT:    v_add3_u32 v5, v5, v4, s10
+; GFX942-NEXT:    v_or_b32_e32 v6, 0x400000, v4
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v4, v4
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v4, v5, v6, vcc
+; GFX942-NEXT:    v_lshlrev_b32_sdwa v4, v8, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX942-NEXT:    v_and_or_b32 v6, v7, v10, v4
+; GFX942-NEXT:    v_mov_b64_e32 v[4:5], v[6:7]
+; GFX942-NEXT:  .LBB15_4: ; Parent Loop BB15_3 Depth=1
+; GFX942-NEXT:    ; => This Inner Loop Header: Depth=2
+; GFX942-NEXT:    v_readfirstlane_b32 s4, v0
+; GFX942-NEXT:    v_readfirstlane_b32 s5, v1
+; GFX942-NEXT:    v_readfirstlane_b32 s6, v2
+; GFX942-NEXT:    v_readfirstlane_b32 s7, v3
+; GFX942-NEXT:    v_cmp_eq_u64_e32 vcc, s[4:5], v[0:1]
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cmp_eq_u64_e64 s[0:1], s[6:7], v[2:3]
+; GFX942-NEXT:    s_and_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    s_and_saveexec_b64 s[0:1], s[0:1]
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_atomic_cmpswap v[4:5], v9, s[4:7], 0 offen sc0
+; GFX942-NEXT:    s_xor_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB15_4
+; GFX942-NEXT:  ; %bb.5: ; in Loop: Header=BB15_3 Depth=1
+; GFX942-NEXT:    s_mov_b64 exec, s[8:9]
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v4, v7
+; GFX942-NEXT:    s_or_b64 s[2:3], vcc, s[2:3]
+; GFX942-NEXT:    v_mov_b32_e32 v7, v4
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[2:3]
+; GFX942-NEXT:    s_cbranch_execnz .LBB15_3
+; GFX942-NEXT:  ; %bb.6: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[2:3]
+; GFX942-NEXT:    v_lshrrev_b32_e32 v0, v8, v4
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_bf16__offset__waterfall__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -5402,35 +5402,35 @@ define <2 x half> @buffer_fat_ptr_agent_atomic_fmin_ret_v2f16__offset__amdgpu_no
 ; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s4
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_v2f16__offset__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v1, v0
-; GFX940-NEXT:    v_mov_b32_e32 v0, s16
-; GFX940-NEXT:    buffer_load_dword v0, v0, s[0:3], 0 offen offset:1024
-; GFX940-NEXT:    s_add_i32 s6, s16, 0x400
-; GFX940-NEXT:    s_mov_b64 s[4:5], 0
-; GFX940-NEXT:    v_pk_max_f16 v2, v1, v1
-; GFX940-NEXT:    v_mov_b32_e32 v3, s6
-; GFX940-NEXT:  .LBB16_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v5, v0
-; GFX940-NEXT:    v_pk_max_f16 v0, v5, v5
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    v_pk_min_f16 v4, v0, v2
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_mov_b64_e32 v[0:1], v[4:5]
-; GFX940-NEXT:    buffer_atomic_cmpswap v[0:1], v3, s[0:3], 0 offen sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v0, v5
-; GFX940-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[4:5]
-; GFX940-NEXT:    s_cbranch_execnz .LBB16_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[4:5]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_v2f16__offset__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v1, v0
+; GFX942-NEXT:    v_mov_b32_e32 v0, s16
+; GFX942-NEXT:    buffer_load_dword v0, v0, s[0:3], 0 offen offset:1024
+; GFX942-NEXT:    s_add_i32 s6, s16, 0x400
+; GFX942-NEXT:    s_mov_b64 s[4:5], 0
+; GFX942-NEXT:    v_pk_max_f16 v2, v1, v1
+; GFX942-NEXT:    v_mov_b32_e32 v3, s6
+; GFX942-NEXT:  .LBB16_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v5, v0
+; GFX942-NEXT:    v_pk_max_f16 v0, v5, v5
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    v_pk_min_f16 v4, v0, v2
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_mov_b64_e32 v[0:1], v[4:5]
+; GFX942-NEXT:    buffer_atomic_cmpswap v[0:1], v3, s[0:3], 0 offen sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v0, v5
+; GFX942-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[4:5]
+; GFX942-NEXT:    s_cbranch_execnz .LBB16_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[4:5]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_v2f16__offset__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -5720,34 +5720,34 @@ define void @buffer_fat_ptr_agent_atomic_fmin_noret_v2f16__offset__amdgpu_no_fin
 ; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s4
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fmin_noret_v2f16__offset__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v1, s16
-; GFX940-NEXT:    buffer_load_dword v1, v1, s[0:3], 0 offen offset:1024
-; GFX940-NEXT:    s_add_i32 s6, s16, 0x400
-; GFX940-NEXT:    s_mov_b64 s[4:5], 0
-; GFX940-NEXT:    v_pk_max_f16 v2, v0, v0
-; GFX940-NEXT:    v_mov_b32_e32 v3, s6
-; GFX940-NEXT:  .LBB17_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_pk_max_f16 v0, v1, v1
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    v_pk_min_f16 v0, v0, v2
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_mov_b64_e32 v[4:5], v[0:1]
-; GFX940-NEXT:    buffer_atomic_cmpswap v[4:5], v3, s[0:3], 0 offen sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v4, v1
-; GFX940-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
-; GFX940-NEXT:    v_mov_b32_e32 v1, v4
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[4:5]
-; GFX940-NEXT:    s_cbranch_execnz .LBB17_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[4:5]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmin_noret_v2f16__offset__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v1, s16
+; GFX942-NEXT:    buffer_load_dword v1, v1, s[0:3], 0 offen offset:1024
+; GFX942-NEXT:    s_add_i32 s6, s16, 0x400
+; GFX942-NEXT:    s_mov_b64 s[4:5], 0
+; GFX942-NEXT:    v_pk_max_f16 v2, v0, v0
+; GFX942-NEXT:    v_mov_b32_e32 v3, s6
+; GFX942-NEXT:  .LBB17_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_pk_max_f16 v0, v1, v1
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    v_pk_min_f16 v0, v0, v2
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_mov_b64_e32 v[4:5], v[0:1]
+; GFX942-NEXT:    buffer_atomic_cmpswap v[4:5], v3, s[0:3], 0 offen sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v4, v1
+; GFX942-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
+; GFX942-NEXT:    v_mov_b32_e32 v1, v4
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[4:5]
+; GFX942-NEXT:    s_cbranch_execnz .LBB17_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[4:5]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fmin_noret_v2f16__offset__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -6071,66 +6071,66 @@ define <2 x half> @buffer_fat_ptr_agent_atomic_fmin_ret_v2f16__offset__waterfall
 ; GFX12-NEXT:    v_mov_b32_e32 v0, v4
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_v2f16__offset__waterfall__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_add_u32_e32 v8, 0x400, v4
-; GFX940-NEXT:    s_mov_b64 s[2:3], exec
-; GFX940-NEXT:  .LBB18_1: ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    v_readfirstlane_b32 s4, v0
-; GFX940-NEXT:    v_readfirstlane_b32 s5, v1
-; GFX940-NEXT:    v_readfirstlane_b32 s6, v2
-; GFX940-NEXT:    v_readfirstlane_b32 s7, v3
-; GFX940-NEXT:    v_cmp_eq_u64_e32 vcc, s[4:5], v[0:1]
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cmp_eq_u64_e64 s[0:1], s[6:7], v[2:3]
-; GFX940-NEXT:    s_and_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    s_and_saveexec_b64 s[0:1], s[0:1]
-; GFX940-NEXT:    buffer_load_dword v7, v4, s[4:7], 0 offen offset:1024
-; GFX940-NEXT:    ; implicit-def: $vgpr4
-; GFX940-NEXT:    s_xor_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB18_1
-; GFX940-NEXT:  ; %bb.2:
-; GFX940-NEXT:    s_mov_b64 exec, s[2:3]
-; GFX940-NEXT:    s_mov_b64 s[2:3], 0
-; GFX940-NEXT:    v_pk_max_f16 v9, v5, v5
-; GFX940-NEXT:  .LBB18_3: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Loop Header: Depth=1
-; GFX940-NEXT:    ; Child Loop BB18_4 Depth 2
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_pk_max_f16 v4, v7, v7
-; GFX940-NEXT:    s_mov_b64 s[8:9], exec
-; GFX940-NEXT:    v_pk_min_f16 v6, v4, v9
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    v_mov_b64_e32 v[4:5], v[6:7]
-; GFX940-NEXT:  .LBB18_4: ; Parent Loop BB18_3 Depth=1
-; GFX940-NEXT:    ; => This Inner Loop Header: Depth=2
-; GFX940-NEXT:    v_readfirstlane_b32 s4, v0
-; GFX940-NEXT:    v_readfirstlane_b32 s5, v1
-; GFX940-NEXT:    v_readfirstlane_b32 s6, v2
-; GFX940-NEXT:    v_readfirstlane_b32 s7, v3
-; GFX940-NEXT:    v_cmp_eq_u64_e32 vcc, s[4:5], v[0:1]
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cmp_eq_u64_e64 s[0:1], s[6:7], v[2:3]
-; GFX940-NEXT:    s_and_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    s_and_saveexec_b64 s[0:1], s[0:1]
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_atomic_cmpswap v[4:5], v8, s[4:7], 0 offen sc0
-; GFX940-NEXT:    s_xor_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB18_4
-; GFX940-NEXT:  ; %bb.5: ; in Loop: Header=BB18_3 Depth=1
-; GFX940-NEXT:    s_mov_b64 exec, s[8:9]
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v4, v7
-; GFX940-NEXT:    s_or_b64 s[2:3], vcc, s[2:3]
-; GFX940-NEXT:    v_mov_b32_e32 v7, v4
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[2:3]
-; GFX940-NEXT:    s_cbranch_execnz .LBB18_3
-; GFX940-NEXT:  ; %bb.6: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[2:3]
-; GFX940-NEXT:    v_mov_b32_e32 v0, v4
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_v2f16__offset__waterfall__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_add_u32_e32 v8, 0x400, v4
+; GFX942-NEXT:    s_mov_b64 s[2:3], exec
+; GFX942-NEXT:  .LBB18_1: ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    v_readfirstlane_b32 s4, v0
+; GFX942-NEXT:    v_readfirstlane_b32 s5, v1
+; GFX942-NEXT:    v_readfirstlane_b32 s6, v2
+; GFX942-NEXT:    v_readfirstlane_b32 s7, v3
+; GFX942-NEXT:    v_cmp_eq_u64_e32 vcc, s[4:5], v[0:1]
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cmp_eq_u64_e64 s[0:1], s[6:7], v[2:3]
+; GFX942-NEXT:    s_and_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    s_and_saveexec_b64 s[0:1], s[0:1]
+; GFX942-NEXT:    buffer_load_dword v7, v4, s[4:7], 0 offen offset:1024
+; GFX942-NEXT:    ; implicit-def: $vgpr4
+; GFX942-NEXT:    s_xor_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB18_1
+; GFX942-NEXT:  ; %bb.2:
+; GFX942-NEXT:    s_mov_b64 exec, s[2:3]
+; GFX942-NEXT:    s_mov_b64 s[2:3], 0
+; GFX942-NEXT:    v_pk_max_f16 v9, v5, v5
+; GFX942-NEXT:  .LBB18_3: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Loop Header: Depth=1
+; GFX942-NEXT:    ; Child Loop BB18_4 Depth 2
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_pk_max_f16 v4, v7, v7
+; GFX942-NEXT:    s_mov_b64 s[8:9], exec
+; GFX942-NEXT:    v_pk_min_f16 v6, v4, v9
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    v_mov_b64_e32 v[4:5], v[6:7]
+; GFX942-NEXT:  .LBB18_4: ; Parent Loop BB18_3 Depth=1
+; GFX942-NEXT:    ; => This Inner Loop Header: Depth=2
+; GFX942-NEXT:    v_readfirstlane_b32 s4, v0
+; GFX942-NEXT:    v_readfirstlane_b32 s5, v1
+; GFX942-NEXT:    v_readfirstlane_b32 s6, v2
+; GFX942-NEXT:    v_readfirstlane_b32 s7, v3
+; GFX942-NEXT:    v_cmp_eq_u64_e32 vcc, s[4:5], v[0:1]
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cmp_eq_u64_e64 s[0:1], s[6:7], v[2:3]
+; GFX942-NEXT:    s_and_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    s_and_saveexec_b64 s[0:1], s[0:1]
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_atomic_cmpswap v[4:5], v8, s[4:7], 0 offen sc0
+; GFX942-NEXT:    s_xor_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB18_4
+; GFX942-NEXT:  ; %bb.5: ; in Loop: Header=BB18_3 Depth=1
+; GFX942-NEXT:    s_mov_b64 exec, s[8:9]
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v4, v7
+; GFX942-NEXT:    s_or_b64 s[2:3], vcc, s[2:3]
+; GFX942-NEXT:    v_mov_b32_e32 v7, v4
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[2:3]
+; GFX942-NEXT:    s_cbranch_execnz .LBB18_3
+; GFX942-NEXT:  ; %bb.6: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[2:3]
+; GFX942-NEXT:    v_mov_b32_e32 v0, v4
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_v2f16__offset__waterfall__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -6669,50 +6669,50 @@ define <2 x bfloat> @buffer_fat_ptr_agent_atomic_fmin_ret_v2bf16__offset__amdgpu
 ; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s5
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_v2bf16__offset__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v1, v0
-; GFX940-NEXT:    v_mov_b32_e32 v0, s16
-; GFX940-NEXT:    buffer_load_dword v0, v0, s[0:3], 0 offen offset:1024
-; GFX940-NEXT:    s_add_i32 s4, s16, 0x400
-; GFX940-NEXT:    s_mov_b64 s[6:7], 0
-; GFX940-NEXT:    v_lshlrev_b32_e32 v2, 16, v1
-; GFX940-NEXT:    s_movk_i32 s8, 0x7fff
-; GFX940-NEXT:    v_and_b32_e32 v3, 0xffff0000, v1
-; GFX940-NEXT:    s_mov_b32 s9, 0x7060302
-; GFX940-NEXT:    v_mov_b32_e32 v4, s4
-; GFX940-NEXT:  .LBB19_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v7, v0
-; GFX940-NEXT:    v_lshlrev_b32_e32 v0, 16, v7
-; GFX940-NEXT:    v_and_b32_e32 v1, 0xffff0000, v7
-; GFX940-NEXT:    v_min_f32_e32 v0, v0, v2
-; GFX940-NEXT:    v_min_f32_e32 v1, v1, v3
-; GFX940-NEXT:    v_bfe_u32 v5, v0, 16, 1
-; GFX940-NEXT:    v_bfe_u32 v8, v1, 16, 1
-; GFX940-NEXT:    v_or_b32_e32 v6, 0x400000, v0
-; GFX940-NEXT:    v_or_b32_e32 v9, 0x400000, v1
-; GFX940-NEXT:    v_add3_u32 v5, v5, v0, s8
-; GFX940-NEXT:    v_add3_u32 v8, v8, v1, s8
-; GFX940-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
-; GFX940-NEXT:    v_cmp_u_f32_e64 s[4:5], v0, v0
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    v_cndmask_b32_e32 v1, v8, v9, vcc
-; GFX940-NEXT:    v_cndmask_b32_e64 v0, v5, v6, s[4:5]
-; GFX940-NEXT:    v_perm_b32 v6, v1, v0, s9
-; GFX940-NEXT:    v_mov_b64_e32 v[0:1], v[6:7]
-; GFX940-NEXT:    buffer_atomic_cmpswap v[0:1], v4, s[0:3], 0 offen sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v0, v7
-; GFX940-NEXT:    s_or_b64 s[6:7], vcc, s[6:7]
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[6:7]
-; GFX940-NEXT:    s_cbranch_execnz .LBB19_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[6:7]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_v2bf16__offset__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v1, v0
+; GFX942-NEXT:    v_mov_b32_e32 v0, s16
+; GFX942-NEXT:    buffer_load_dword v0, v0, s[0:3], 0 offen offset:1024
+; GFX942-NEXT:    s_add_i32 s4, s16, 0x400
+; GFX942-NEXT:    s_mov_b64 s[6:7], 0
+; GFX942-NEXT:    v_lshlrev_b32_e32 v2, 16, v1
+; GFX942-NEXT:    s_movk_i32 s8, 0x7fff
+; GFX942-NEXT:    v_and_b32_e32 v3, 0xffff0000, v1
+; GFX942-NEXT:    s_mov_b32 s9, 0x7060302
+; GFX942-NEXT:    v_mov_b32_e32 v4, s4
+; GFX942-NEXT:  .LBB19_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v7, v0
+; GFX942-NEXT:    v_lshlrev_b32_e32 v0, 16, v7
+; GFX942-NEXT:    v_and_b32_e32 v1, 0xffff0000, v7
+; GFX942-NEXT:    v_min_f32_e32 v0, v0, v2
+; GFX942-NEXT:    v_min_f32_e32 v1, v1, v3
+; GFX942-NEXT:    v_bfe_u32 v5, v0, 16, 1
+; GFX942-NEXT:    v_bfe_u32 v8, v1, 16, 1
+; GFX942-NEXT:    v_or_b32_e32 v6, 0x400000, v0
+; GFX942-NEXT:    v_or_b32_e32 v9, 0x400000, v1
+; GFX942-NEXT:    v_add3_u32 v5, v5, v0, s8
+; GFX942-NEXT:    v_add3_u32 v8, v8, v1, s8
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
+; GFX942-NEXT:    v_cmp_u_f32_e64 s[4:5], v0, v0
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v8, v9, vcc
+; GFX942-NEXT:    v_cndmask_b32_e64 v0, v5, v6, s[4:5]
+; GFX942-NEXT:    v_perm_b32 v6, v1, v0, s9
+; GFX942-NEXT:    v_mov_b64_e32 v[0:1], v[6:7]
+; GFX942-NEXT:    buffer_atomic_cmpswap v[0:1], v4, s[0:3], 0 offen sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v0, v7
+; GFX942-NEXT:    s_or_b64 s[6:7], vcc, s[6:7]
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[6:7]
+; GFX942-NEXT:    s_cbranch_execnz .LBB19_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[6:7]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_v2bf16__offset__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -7089,49 +7089,49 @@ define void @buffer_fat_ptr_agent_atomic_fmin_noret_v2bf16__offset__amdgpu_no_fi
 ; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s5
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fmin_noret_v2bf16__offset__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v1, s16
-; GFX940-NEXT:    buffer_load_dword v1, v1, s[0:3], 0 offen offset:1024
-; GFX940-NEXT:    s_add_i32 s4, s16, 0x400
-; GFX940-NEXT:    s_mov_b64 s[6:7], 0
-; GFX940-NEXT:    v_lshlrev_b32_e32 v2, 16, v0
-; GFX940-NEXT:    s_movk_i32 s8, 0x7fff
-; GFX940-NEXT:    v_and_b32_e32 v3, 0xffff0000, v0
-; GFX940-NEXT:    s_mov_b32 s9, 0x7060302
-; GFX940-NEXT:    v_mov_b32_e32 v4, s4
-; GFX940-NEXT:  .LBB20_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_lshlrev_b32_e32 v0, 16, v1
-; GFX940-NEXT:    v_and_b32_e32 v5, 0xffff0000, v1
-; GFX940-NEXT:    v_min_f32_e32 v0, v0, v2
-; GFX940-NEXT:    v_min_f32_e32 v5, v5, v3
-; GFX940-NEXT:    v_bfe_u32 v6, v0, 16, 1
-; GFX940-NEXT:    v_bfe_u32 v8, v5, 16, 1
-; GFX940-NEXT:    v_or_b32_e32 v7, 0x400000, v0
-; GFX940-NEXT:    v_or_b32_e32 v9, 0x400000, v5
-; GFX940-NEXT:    v_add3_u32 v6, v6, v0, s8
-; GFX940-NEXT:    v_add3_u32 v8, v8, v5, s8
-; GFX940-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
-; GFX940-NEXT:    v_cmp_u_f32_e64 s[4:5], v0, v0
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    v_cndmask_b32_e32 v5, v8, v9, vcc
-; GFX940-NEXT:    v_cndmask_b32_e64 v0, v6, v7, s[4:5]
-; GFX940-NEXT:    v_perm_b32 v0, v5, v0, s9
-; GFX940-NEXT:    v_mov_b64_e32 v[6:7], v[0:1]
-; GFX940-NEXT:    buffer_atomic_cmpswap v[6:7], v4, s[0:3], 0 offen sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v6, v1
-; GFX940-NEXT:    s_or_b64 s[6:7], vcc, s[6:7]
-; GFX940-NEXT:    v_mov_b32_e32 v1, v6
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[6:7]
-; GFX940-NEXT:    s_cbranch_execnz .LBB20_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[6:7]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmin_noret_v2bf16__offset__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v1, s16
+; GFX942-NEXT:    buffer_load_dword v1, v1, s[0:3], 0 offen offset:1024
+; GFX942-NEXT:    s_add_i32 s4, s16, 0x400
+; GFX942-NEXT:    s_mov_b64 s[6:7], 0
+; GFX942-NEXT:    v_lshlrev_b32_e32 v2, 16, v0
+; GFX942-NEXT:    s_movk_i32 s8, 0x7fff
+; GFX942-NEXT:    v_and_b32_e32 v3, 0xffff0000, v0
+; GFX942-NEXT:    s_mov_b32 s9, 0x7060302
+; GFX942-NEXT:    v_mov_b32_e32 v4, s4
+; GFX942-NEXT:  .LBB20_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_lshlrev_b32_e32 v0, 16, v1
+; GFX942-NEXT:    v_and_b32_e32 v5, 0xffff0000, v1
+; GFX942-NEXT:    v_min_f32_e32 v0, v0, v2
+; GFX942-NEXT:    v_min_f32_e32 v5, v5, v3
+; GFX942-NEXT:    v_bfe_u32 v6, v0, 16, 1
+; GFX942-NEXT:    v_bfe_u32 v8, v5, 16, 1
+; GFX942-NEXT:    v_or_b32_e32 v7, 0x400000, v0
+; GFX942-NEXT:    v_or_b32_e32 v9, 0x400000, v5
+; GFX942-NEXT:    v_add3_u32 v6, v6, v0, s8
+; GFX942-NEXT:    v_add3_u32 v8, v8, v5, s8
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
+; GFX942-NEXT:    v_cmp_u_f32_e64 s[4:5], v0, v0
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    v_cndmask_b32_e32 v5, v8, v9, vcc
+; GFX942-NEXT:    v_cndmask_b32_e64 v0, v6, v7, s[4:5]
+; GFX942-NEXT:    v_perm_b32 v0, v5, v0, s9
+; GFX942-NEXT:    v_mov_b64_e32 v[6:7], v[0:1]
+; GFX942-NEXT:    buffer_atomic_cmpswap v[6:7], v4, s[0:3], 0 offen sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v6, v1
+; GFX942-NEXT:    s_or_b64 s[6:7], vcc, s[6:7]
+; GFX942-NEXT:    v_mov_b32_e32 v1, v6
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[6:7]
+; GFX942-NEXT:    s_cbranch_execnz .LBB20_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[6:7]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fmin_noret_v2bf16__offset__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -7542,83 +7542,83 @@ define <2 x bfloat> @buffer_fat_ptr_agent_atomic_fmin_ret_v2bf16__offset__waterf
 ; GFX12-NEXT:    v_mov_b32_e32 v0, v4
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_v2bf16__offset__waterfall__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_add_u32_e32 v8, 0x400, v4
-; GFX940-NEXT:    s_mov_b64 s[2:3], exec
-; GFX940-NEXT:  .LBB21_1: ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    v_readfirstlane_b32 s4, v0
-; GFX940-NEXT:    v_readfirstlane_b32 s5, v1
-; GFX940-NEXT:    v_readfirstlane_b32 s6, v2
-; GFX940-NEXT:    v_readfirstlane_b32 s7, v3
-; GFX940-NEXT:    v_cmp_eq_u64_e32 vcc, s[4:5], v[0:1]
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cmp_eq_u64_e64 s[0:1], s[6:7], v[2:3]
-; GFX940-NEXT:    s_and_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    s_and_saveexec_b64 s[0:1], s[0:1]
-; GFX940-NEXT:    buffer_load_dword v7, v4, s[4:7], 0 offen offset:1024
-; GFX940-NEXT:    ; implicit-def: $vgpr4
-; GFX940-NEXT:    s_xor_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB21_1
-; GFX940-NEXT:  ; %bb.2:
-; GFX940-NEXT:    s_mov_b64 exec, s[2:3]
-; GFX940-NEXT:    s_mov_b64 s[2:3], 0
-; GFX940-NEXT:    v_lshlrev_b32_e32 v9, 16, v5
-; GFX940-NEXT:    s_movk_i32 s10, 0x7fff
-; GFX940-NEXT:    v_and_b32_e32 v10, 0xffff0000, v5
-; GFX940-NEXT:    s_mov_b32 s11, 0x7060302
-; GFX940-NEXT:  .LBB21_3: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Loop Header: Depth=1
-; GFX940-NEXT:    ; Child Loop BB21_4 Depth 2
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_lshlrev_b32_e32 v4, 16, v7
-; GFX940-NEXT:    v_min_f32_e32 v4, v4, v9
-; GFX940-NEXT:    v_bfe_u32 v5, v4, 16, 1
-; GFX940-NEXT:    v_add3_u32 v5, v5, v4, s10
-; GFX940-NEXT:    v_or_b32_e32 v6, 0x400000, v4
-; GFX940-NEXT:    v_cmp_u_f32_e32 vcc, v4, v4
-; GFX940-NEXT:    s_mov_b64 s[8:9], exec
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    v_cndmask_b32_e32 v4, v5, v6, vcc
-; GFX940-NEXT:    v_and_b32_e32 v5, 0xffff0000, v7
-; GFX940-NEXT:    v_min_f32_e32 v5, v5, v10
-; GFX940-NEXT:    v_bfe_u32 v6, v5, 16, 1
-; GFX940-NEXT:    v_add3_u32 v6, v6, v5, s10
-; GFX940-NEXT:    v_or_b32_e32 v11, 0x400000, v5
-; GFX940-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v5, v6, v11, vcc
-; GFX940-NEXT:    v_perm_b32 v6, v5, v4, s11
-; GFX940-NEXT:    v_mov_b64_e32 v[4:5], v[6:7]
-; GFX940-NEXT:  .LBB21_4: ; Parent Loop BB21_3 Depth=1
-; GFX940-NEXT:    ; => This Inner Loop Header: Depth=2
-; GFX940-NEXT:    v_readfirstlane_b32 s4, v0
-; GFX940-NEXT:    v_readfirstlane_b32 s5, v1
-; GFX940-NEXT:    v_readfirstlane_b32 s6, v2
-; GFX940-NEXT:    v_readfirstlane_b32 s7, v3
-; GFX940-NEXT:    v_cmp_eq_u64_e32 vcc, s[4:5], v[0:1]
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cmp_eq_u64_e64 s[0:1], s[6:7], v[2:3]
-; GFX940-NEXT:    s_and_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    s_and_saveexec_b64 s[0:1], s[0:1]
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_atomic_cmpswap v[4:5], v8, s[4:7], 0 offen sc0
-; GFX940-NEXT:    s_xor_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB21_4
-; GFX940-NEXT:  ; %bb.5: ; in Loop: Header=BB21_3 Depth=1
-; GFX940-NEXT:    s_mov_b64 exec, s[8:9]
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v4, v7
-; GFX940-NEXT:    s_or_b64 s[2:3], vcc, s[2:3]
-; GFX940-NEXT:    v_mov_b32_e32 v7, v4
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[2:3]
-; GFX940-NEXT:    s_cbranch_execnz .LBB21_3
-; GFX940-NEXT:  ; %bb.6: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[2:3]
-; GFX940-NEXT:    v_mov_b32_e32 v0, v4
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_v2bf16__offset__waterfall__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_add_u32_e32 v8, 0x400, v4
+; GFX942-NEXT:    s_mov_b64 s[2:3], exec
+; GFX942-NEXT:  .LBB21_1: ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    v_readfirstlane_b32 s4, v0
+; GFX942-NEXT:    v_readfirstlane_b32 s5, v1
+; GFX942-NEXT:    v_readfirstlane_b32 s6, v2
+; GFX942-NEXT:    v_readfirstlane_b32 s7, v3
+; GFX942-NEXT:    v_cmp_eq_u64_e32 vcc, s[4:5], v[0:1]
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cmp_eq_u64_e64 s[0:1], s[6:7], v[2:3]
+; GFX942-NEXT:    s_and_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    s_and_saveexec_b64 s[0:1], s[0:1]
+; GFX942-NEXT:    buffer_load_dword v7, v4, s[4:7], 0 offen offset:1024
+; GFX942-NEXT:    ; implicit-def: $vgpr4
+; GFX942-NEXT:    s_xor_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB21_1
+; GFX942-NEXT:  ; %bb.2:
+; GFX942-NEXT:    s_mov_b64 exec, s[2:3]
+; GFX942-NEXT:    s_mov_b64 s[2:3], 0
+; GFX942-NEXT:    v_lshlrev_b32_e32 v9, 16, v5
+; GFX942-NEXT:    s_movk_i32 s10, 0x7fff
+; GFX942-NEXT:    v_and_b32_e32 v10, 0xffff0000, v5
+; GFX942-NEXT:    s_mov_b32 s11, 0x7060302
+; GFX942-NEXT:  .LBB21_3: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Loop Header: Depth=1
+; GFX942-NEXT:    ; Child Loop BB21_4 Depth 2
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_lshlrev_b32_e32 v4, 16, v7
+; GFX942-NEXT:    v_min_f32_e32 v4, v4, v9
+; GFX942-NEXT:    v_bfe_u32 v5, v4, 16, 1
+; GFX942-NEXT:    v_add3_u32 v5, v5, v4, s10
+; GFX942-NEXT:    v_or_b32_e32 v6, 0x400000, v4
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v4, v4
+; GFX942-NEXT:    s_mov_b64 s[8:9], exec
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    v_cndmask_b32_e32 v4, v5, v6, vcc
+; GFX942-NEXT:    v_and_b32_e32 v5, 0xffff0000, v7
+; GFX942-NEXT:    v_min_f32_e32 v5, v5, v10
+; GFX942-NEXT:    v_bfe_u32 v6, v5, 16, 1
+; GFX942-NEXT:    v_add3_u32 v6, v6, v5, s10
+; GFX942-NEXT:    v_or_b32_e32 v11, 0x400000, v5
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v5, v6, v11, vcc
+; GFX942-NEXT:    v_perm_b32 v6, v5, v4, s11
+; GFX942-NEXT:    v_mov_b64_e32 v[4:5], v[6:7]
+; GFX942-NEXT:  .LBB21_4: ; Parent Loop BB21_3 Depth=1
+; GFX942-NEXT:    ; => This Inner Loop Header: Depth=2
+; GFX942-NEXT:    v_readfirstlane_b32 s4, v0
+; GFX942-NEXT:    v_readfirstlane_b32 s5, v1
+; GFX942-NEXT:    v_readfirstlane_b32 s6, v2
+; GFX942-NEXT:    v_readfirstlane_b32 s7, v3
+; GFX942-NEXT:    v_cmp_eq_u64_e32 vcc, s[4:5], v[0:1]
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cmp_eq_u64_e64 s[0:1], s[6:7], v[2:3]
+; GFX942-NEXT:    s_and_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    s_and_saveexec_b64 s[0:1], s[0:1]
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_atomic_cmpswap v[4:5], v8, s[4:7], 0 offen sc0
+; GFX942-NEXT:    s_xor_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB21_4
+; GFX942-NEXT:  ; %bb.5: ; in Loop: Header=BB21_3 Depth=1
+; GFX942-NEXT:    s_mov_b64 exec, s[8:9]
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v4, v7
+; GFX942-NEXT:    s_or_b64 s[2:3], vcc, s[2:3]
+; GFX942-NEXT:    v_mov_b32_e32 v7, v4
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[2:3]
+; GFX942-NEXT:    s_cbranch_execnz .LBB21_3
+; GFX942-NEXT:  ; %bb.6: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[2:3]
+; GFX942-NEXT:    v_mov_b32_e32 v0, v4
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_v2bf16__offset__waterfall__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -8185,34 +8185,34 @@ define float @buffer_fat_ptr_system_atomic_fmin_ret_f32__offset__amdgpu_no_fine_
 ; GFX12-NEXT:    global_inv scope:SCOPE_SYS
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: buffer_fat_ptr_system_atomic_fmin_ret_f32__offset__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v1, v0
-; GFX940-NEXT:    v_mov_b32_e32 v0, s16
-; GFX940-NEXT:    buffer_load_dword v0, v0, s[0:3], 0 offen offset:1024
-; GFX940-NEXT:    s_add_i32 s6, s16, 0x400
-; GFX940-NEXT:    s_mov_b64 s[4:5], 0
-; GFX940-NEXT:    v_max_f32_e32 v2, v1, v1
-; GFX940-NEXT:    v_mov_b32_e32 v3, s6
-; GFX940-NEXT:  .LBB22_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v5, v0
-; GFX940-NEXT:    v_max_f32_e32 v0, v5, v5
-; GFX940-NEXT:    v_min_f32_e32 v4, v0, v2
-; GFX940-NEXT:    v_mov_b64_e32 v[0:1], v[4:5]
-; GFX940-NEXT:    buffer_wbl2 sc0 sc1
-; GFX940-NEXT:    buffer_atomic_cmpswap v[0:1], v3, s[0:3], 0 offen sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc0 sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v0, v5
-; GFX940-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[4:5]
-; GFX940-NEXT:    s_cbranch_execnz .LBB22_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[4:5]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: buffer_fat_ptr_system_atomic_fmin_ret_f32__offset__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v1, v0
+; GFX942-NEXT:    v_mov_b32_e32 v0, s16
+; GFX942-NEXT:    buffer_load_dword v0, v0, s[0:3], 0 offen offset:1024
+; GFX942-NEXT:    s_add_i32 s6, s16, 0x400
+; GFX942-NEXT:    s_mov_b64 s[4:5], 0
+; GFX942-NEXT:    v_max_f32_e32 v2, v1, v1
+; GFX942-NEXT:    v_mov_b32_e32 v3, s6
+; GFX942-NEXT:  .LBB22_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v5, v0
+; GFX942-NEXT:    v_max_f32_e32 v0, v5, v5
+; GFX942-NEXT:    v_min_f32_e32 v4, v0, v2
+; GFX942-NEXT:    v_mov_b64_e32 v[0:1], v[4:5]
+; GFX942-NEXT:    buffer_wbl2 sc0 sc1
+; GFX942-NEXT:    buffer_atomic_cmpswap v[0:1], v3, s[0:3], 0 offen sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc0 sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v0, v5
+; GFX942-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[4:5]
+; GFX942-NEXT:    s_cbranch_execnz .LBB22_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[4:5]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: buffer_fat_ptr_system_atomic_fmin_ret_f32__offset__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
diff --git llvm/test/CodeGen/AMDGPU/build_vector.ll llvm/test/CodeGen/AMDGPU/build_vector.ll
index 4ab940288e8c..84bd682f623e 100644
--- llvm/test/CodeGen/AMDGPU/build_vector.ll
+++ llvm/test/CodeGen/AMDGPU/build_vector.ll
@@ -3,7 +3,7 @@
 ; RUN: llc < %s -mtriple=amdgcn -mcpu=tonga | FileCheck %s --check-prefixes=GFX8,GCN
 ; RUN: llc < %s -mtriple=amdgcn-amd-amdpal -mcpu=gfx1030 | FileCheck %s --check-prefixes=GFX10,GCN
 ; RUN: llc < %s -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 -amdgpu-enable-vopd=0 | FileCheck %s --check-prefixes=GFX11,GCN
-; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx940 | FileCheck %s --check-prefixes=GFX940,GCN
+; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx942 | FileCheck %s --check-prefixes=GFX942,GCN
 
 define amdgpu_kernel void @build_vector2 (ptr addrspace(1) %out) {
 ; GFX6-LABEL: build_vector2:
@@ -48,15 +48,15 @@ define amdgpu_kernel void @build_vector2 (ptr addrspace(1) %out) {
 ; GFX11-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
 ; GFX11-NEXT:    s_endpgm
 ;
-; GFX940-LABEL: build_vector2:
-; GFX940:       ; %bb.0: ; %entry
-; GFX940-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX940-NEXT:    v_mov_b32_e32 v2, 0
-; GFX940-NEXT:    v_mov_b32_e32 v0, 5
-; GFX940-NEXT:    v_mov_b32_e32 v1, 6
-; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1
-; GFX940-NEXT:    s_endpgm
+; GFX942-LABEL: build_vector2:
+; GFX942:       ; %bb.0: ; %entry
+; GFX942-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX942-NEXT:    v_mov_b32_e32 v2, 0
+; GFX942-NEXT:    v_mov_b32_e32 v0, 5
+; GFX942-NEXT:    v_mov_b32_e32 v1, 6
+; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1
+; GFX942-NEXT:    s_endpgm
 entry:
   store <2 x i32> <i32 5, i32 6>, ptr addrspace(1) %out
   ret void
@@ -113,17 +113,17 @@ define amdgpu_kernel void @build_vector4 (ptr addrspace(1) %out) {
 ; GFX11-NEXT:    global_store_b128 v4, v[0:3], s[0:1]
 ; GFX11-NEXT:    s_endpgm
 ;
-; GFX940-LABEL: build_vector4:
-; GFX940:       ; %bb.0: ; %entry
-; GFX940-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX940-NEXT:    v_mov_b32_e32 v4, 0
-; GFX940-NEXT:    v_mov_b32_e32 v0, 5
-; GFX940-NEXT:    v_mov_b32_e32 v1, 6
-; GFX940-NEXT:    v_mov_b32_e32 v2, 7
-; GFX940-NEXT:    v_mov_b32_e32 v3, 8
-; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-NEXT:    global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1
-; GFX940-NEXT:    s_endpgm
+; GFX942-LABEL: build_vector4:
+; GFX942:       ; %bb.0: ; %entry
+; GFX942-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX942-NEXT:    v_mov_b32_e32 v4, 0
+; GFX942-NEXT:    v_mov_b32_e32 v0, 5
+; GFX942-NEXT:    v_mov_b32_e32 v1, 6
+; GFX942-NEXT:    v_mov_b32_e32 v2, 7
+; GFX942-NEXT:    v_mov_b32_e32 v3, 8
+; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-NEXT:    global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1
+; GFX942-NEXT:    s_endpgm
 entry:
   store <4 x i32> <i32 5, i32 6, i32 7, i32 8>, ptr addrspace(1) %out
   ret void
@@ -168,14 +168,14 @@ define amdgpu_kernel void @build_vector_v2i16 (ptr addrspace(1) %out) {
 ; GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
 ; GFX11-NEXT:    s_endpgm
 ;
-; GFX940-LABEL: build_vector_v2i16:
-; GFX940:       ; %bb.0: ; %entry
-; GFX940-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX940-NEXT:    v_mov_b32_e32 v0, 0
-; GFX940-NEXT:    v_mov_b32_e32 v1, 0x60005
-; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
-; GFX940-NEXT:    s_endpgm
+; GFX942-LABEL: build_vector_v2i16:
+; GFX942:       ; %bb.0: ; %entry
+; GFX942-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX942-NEXT:    v_mov_b32_e32 v0, 0
+; GFX942-NEXT:    v_mov_b32_e32 v1, 0x60005
+; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
+; GFX942-NEXT:    s_endpgm
 entry:
   store <2 x i16> <i16 5, i16 6>, ptr addrspace(1) %out
   ret void
@@ -232,17 +232,17 @@ define amdgpu_kernel void @build_vector_v2i16_trunc (ptr addrspace(1) %out, i32
 ; GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
 ; GFX11-NEXT:    s_endpgm
 ;
-; GFX940-LABEL: build_vector_v2i16_trunc:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_load_dword s2, s[4:5], 0x2c
-; GFX940-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX940-NEXT:    v_mov_b32_e32 v0, 0
-; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-NEXT:    s_lshr_b32 s2, s2, 16
-; GFX940-NEXT:    s_pack_ll_b32_b16 s2, s2, 5
-; GFX940-NEXT:    v_mov_b32_e32 v1, s2
-; GFX940-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
-; GFX940-NEXT:    s_endpgm
+; GFX942-LABEL: build_vector_v2i16_trunc:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_load_dword s2, s[4:5], 0x2c
+; GFX942-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX942-NEXT:    v_mov_b32_e32 v0, 0
+; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-NEXT:    s_lshr_b32 s2, s2, 16
+; GFX942-NEXT:    s_pack_ll_b32_b16 s2, s2, 5
+; GFX942-NEXT:    v_mov_b32_e32 v1, s2
+; GFX942-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
+; GFX942-NEXT:    s_endpgm
   %srl = lshr i32 %a, 16
   %trunc = trunc i32 %srl to i16
   %ins.0 = insertelement <2 x i16> undef, i16 %trunc, i32 0
@@ -304,17 +304,17 @@ define amdgpu_kernel void @build_v2i32_from_v4i16_shuffle(ptr addrspace(1) %out,
 ; GFX11-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
 ; GFX11-NEXT:    s_endpgm
 ;
-; GFX940-LABEL: build_v2i32_from_v4i16_shuffle:
-; GFX940:       ; %bb.0: ; %entry
-; GFX940-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX940-NEXT:    v_mov_b32_e32 v2, 0
-; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-NEXT:    s_lshl_b32 s3, s3, 16
-; GFX940-NEXT:    s_lshl_b32 s2, s2, 16
-; GFX940-NEXT:    v_mov_b32_e32 v0, s2
-; GFX940-NEXT:    v_mov_b32_e32 v1, s3
-; GFX940-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1
-; GFX940-NEXT:    s_endpgm
+; GFX942-LABEL: build_v2i32_from_v4i16_shuffle:
+; GFX942:       ; %bb.0: ; %entry
+; GFX942-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX942-NEXT:    v_mov_b32_e32 v2, 0
+; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-NEXT:    s_lshl_b32 s3, s3, 16
+; GFX942-NEXT:    s_lshl_b32 s2, s2, 16
+; GFX942-NEXT:    v_mov_b32_e32 v0, s2
+; GFX942-NEXT:    v_mov_b32_e32 v1, s3
+; GFX942-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1
+; GFX942-NEXT:    s_endpgm
 entry:
   %shuf = shufflevector <4 x i16> %in, <4 x i16> zeroinitializer, <2 x i32> <i32 0, i32 2>
   %zextended = zext <2 x i16> %shuf to <2 x i32>
diff --git llvm/test/CodeGen/AMDGPU/copy_phys_vgpr64.mir llvm/test/CodeGen/AMDGPU/copy_phys_vgpr64.mir
index 7c21b3e08580..d5dfb5dd0848 100644
--- llvm/test/CodeGen/AMDGPU/copy_phys_vgpr64.mir
+++ llvm/test/CodeGen/AMDGPU/copy_phys_vgpr64.mir
@@ -1,7 +1,7 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
 # RUN: llc -mtriple=amdgcn -mcpu=gfx908 -run-pass postrapseudos -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX908 %s
 # RUN: llc -mtriple=amdgcn -mcpu=gfx90a -run-pass postrapseudos -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX90A %s
-# RUN: llc -mtriple=amdgcn -mcpu=gfx940 -run-pass postrapseudos -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX940 %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx942 -run-pass postrapseudos -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX942 %s
 # RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -run-pass postrapseudos -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX10 %s
 # RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -run-pass postrapseudos -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX10 %s
 
@@ -22,10 +22,10 @@ body: |
     ; GFX90A-NEXT: {{  $}}
     ; GFX90A-NEXT: $vgpr0_vgpr1 = V_PK_MOV_B32 8, $vgpr2_vgpr3, 12, $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit killed $vgpr2_vgpr3, implicit $exec
     ;
-    ; GFX940-LABEL: name: copy_v64_to_v64
-    ; GFX940: liveins: $vgpr2_vgpr3
-    ; GFX940-NEXT: {{  $}}
-    ; GFX940-NEXT: $vgpr0_vgpr1 = V_MOV_B64_e32 killed $vgpr2_vgpr3, implicit $exec, implicit $exec
+    ; GFX942-LABEL: name: copy_v64_to_v64
+    ; GFX942: liveins: $vgpr2_vgpr3
+    ; GFX942-NEXT: {{  $}}
+    ; GFX942-NEXT: $vgpr0_vgpr1 = V_MOV_B64_e32 killed $vgpr2_vgpr3, implicit $exec, implicit $exec
     ;
     ; GFX10-LABEL: name: copy_v64_to_v64
     ; GFX10: liveins: $vgpr2_vgpr3
@@ -52,10 +52,10 @@ body: |
     ; GFX90A-NEXT: {{  $}}
     ; GFX90A-NEXT: $vgpr0_vgpr1 = V_PK_MOV_B32 8, $sgpr2_sgpr3, 12, $sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit killed $sgpr2_sgpr3, implicit $exec
     ;
-    ; GFX940-LABEL: name: copy_s64_to_v64
-    ; GFX940: liveins: $sgpr2_sgpr3
-    ; GFX940-NEXT: {{  $}}
-    ; GFX940-NEXT: $vgpr0_vgpr1 = V_MOV_B64_e32 killed $sgpr2_sgpr3, implicit $exec, implicit $exec
+    ; GFX942-LABEL: name: copy_s64_to_v64
+    ; GFX942: liveins: $sgpr2_sgpr3
+    ; GFX942-NEXT: {{  $}}
+    ; GFX942-NEXT: $vgpr0_vgpr1 = V_MOV_B64_e32 killed $sgpr2_sgpr3, implicit $exec, implicit $exec
     ;
     ; GFX10-LABEL: name: copy_s64_to_v64
     ; GFX10: liveins: $sgpr2_sgpr3
@@ -83,11 +83,11 @@ body: |
     ; GFX90A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $agpr2_agpr3
     ; GFX90A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit killed $agpr2_agpr3, implicit $exec
     ;
-    ; GFX940-LABEL: name: copy_a64_to_v64
-    ; GFX940: liveins: $agpr2_agpr3
-    ; GFX940-NEXT: {{  $}}
-    ; GFX940-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $agpr2_agpr3
-    ; GFX940-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit killed $agpr2_agpr3, implicit $exec
+    ; GFX942-LABEL: name: copy_a64_to_v64
+    ; GFX942: liveins: $agpr2_agpr3
+    ; GFX942-NEXT: {{  $}}
+    ; GFX942-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $agpr2_agpr3
+    ; GFX942-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit killed $agpr2_agpr3, implicit $exec
     ;
     ; GFX10-LABEL: name: copy_a64_to_v64
     ; GFX10: liveins: $agpr2_agpr3
@@ -117,11 +117,11 @@ body: |
     ; GFX90A-NEXT: $vgpr0_vgpr1 = V_PK_MOV_B32 8, $vgpr2_vgpr3, 12, $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr2_vgpr3_vgpr4_vgpr5, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3
     ; GFX90A-NEXT: $vgpr2_vgpr3 = V_PK_MOV_B32 8, $vgpr4_vgpr5, 12, $vgpr4_vgpr5, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr2_vgpr3_vgpr4_vgpr5, implicit $exec
     ;
-    ; GFX940-LABEL: name: copy_v128_to_v128_fwd
-    ; GFX940: liveins: $vgpr2_vgpr3_vgpr4_vgpr5
-    ; GFX940-NEXT: {{  $}}
-    ; GFX940-NEXT: $vgpr0_vgpr1 = V_MOV_B64_e32 $vgpr2_vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr2_vgpr3_vgpr4_vgpr5
-    ; GFX940-NEXT: $vgpr2_vgpr3 = V_MOV_B64_e32 $vgpr4_vgpr5, implicit $exec, implicit $vgpr2_vgpr3_vgpr4_vgpr5, implicit $exec
+    ; GFX942-LABEL: name: copy_v128_to_v128_fwd
+    ; GFX942: liveins: $vgpr2_vgpr3_vgpr4_vgpr5
+    ; GFX942-NEXT: {{  $}}
+    ; GFX942-NEXT: $vgpr0_vgpr1 = V_MOV_B64_e32 $vgpr2_vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr2_vgpr3_vgpr4_vgpr5
+    ; GFX942-NEXT: $vgpr2_vgpr3 = V_MOV_B64_e32 $vgpr4_vgpr5, implicit $exec, implicit $vgpr2_vgpr3_vgpr4_vgpr5, implicit $exec
     ;
     ; GFX10-LABEL: name: copy_v128_to_v128_fwd
     ; GFX10: liveins: $vgpr2_vgpr3_vgpr4_vgpr5
@@ -153,11 +153,11 @@ body: |
     ; GFX90A-NEXT: $vgpr4_vgpr5 = V_PK_MOV_B32 8, $vgpr2_vgpr3, 12, $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3, implicit-def $vgpr2_vgpr3_vgpr4_vgpr5
     ; GFX90A-NEXT: $vgpr2_vgpr3 = V_PK_MOV_B32 8, $vgpr0_vgpr1, 12, $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3, implicit $exec
     ;
-    ; GFX940-LABEL: name: copy_v128_to_v128_back
-    ; GFX940: liveins: $vgpr0_vgpr1_vgpr2_vgpr3
-    ; GFX940-NEXT: {{  $}}
-    ; GFX940-NEXT: $vgpr4_vgpr5 = V_MOV_B64_e32 $vgpr2_vgpr3, implicit $exec, implicit-def $vgpr2_vgpr3_vgpr4_vgpr5, implicit $vgpr0_vgpr1_vgpr2_vgpr3
-    ; GFX940-NEXT: $vgpr2_vgpr3 = V_MOV_B64_e32 $vgpr0_vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3, implicit $exec
+    ; GFX942-LABEL: name: copy_v128_to_v128_back
+    ; GFX942: liveins: $vgpr0_vgpr1_vgpr2_vgpr3
+    ; GFX942-NEXT: {{  $}}
+    ; GFX942-NEXT: $vgpr4_vgpr5 = V_MOV_B64_e32 $vgpr2_vgpr3, implicit $exec, implicit-def $vgpr2_vgpr3_vgpr4_vgpr5, implicit $vgpr0_vgpr1_vgpr2_vgpr3
+    ; GFX942-NEXT: $vgpr2_vgpr3 = V_MOV_B64_e32 $vgpr0_vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3, implicit $exec
     ;
     ; GFX10-LABEL: name: copy_v128_to_v128_back
     ; GFX10: liveins: $vgpr0_vgpr1_vgpr2_vgpr3
@@ -189,12 +189,12 @@ body: |
     ; GFX90A-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr5, implicit $exec, implicit $vgpr4_vgpr5_vgpr6
     ; GFX90A-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr6, implicit $exec, implicit killed $vgpr4_vgpr5_vgpr6, implicit $exec
     ;
-    ; GFX940-LABEL: name: copy_v96_to_v96
-    ; GFX940: liveins: $vgpr4_vgpr5_vgpr6
-    ; GFX940-NEXT: {{  $}}
-    ; GFX940-NEXT: $vgpr0 = V_MOV_B32_e32 $vgpr4, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2, implicit $vgpr4_vgpr5_vgpr6
-    ; GFX940-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr5, implicit $exec, implicit $vgpr4_vgpr5_vgpr6
-    ; GFX940-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr6, implicit $exec, implicit killed $vgpr4_vgpr5_vgpr6, implicit $exec
+    ; GFX942-LABEL: name: copy_v96_to_v96
+    ; GFX942: liveins: $vgpr4_vgpr5_vgpr6
+    ; GFX942-NEXT: {{  $}}
+    ; GFX942-NEXT: $vgpr0 = V_MOV_B32_e32 $vgpr4, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2, implicit $vgpr4_vgpr5_vgpr6
+    ; GFX942-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr5, implicit $exec, implicit $vgpr4_vgpr5_vgpr6
+    ; GFX942-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr6, implicit $exec, implicit killed $vgpr4_vgpr5_vgpr6, implicit $exec
     ;
     ; GFX10-LABEL: name: copy_v96_to_v96
     ; GFX10: liveins: $vgpr4_vgpr5_vgpr6
@@ -222,10 +222,10 @@ body: |
     ; GFX90A-NEXT: {{  $}}
     ; GFX90A-NEXT: $vgpr0_vgpr1 = V_PK_MOV_B32 8, $vgpr2_vgpr3, 12, $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit killed $vgpr2_vgpr3, implicit $exec
     ;
-    ; GFX940-LABEL: name: copy_v64_to_v64_undef_sub0
-    ; GFX940: liveins: $vgpr3
-    ; GFX940-NEXT: {{  $}}
-    ; GFX940-NEXT: $vgpr0_vgpr1 = V_MOV_B64_e32 killed $vgpr2_vgpr3, implicit $exec, implicit $exec
+    ; GFX942-LABEL: name: copy_v64_to_v64_undef_sub0
+    ; GFX942: liveins: $vgpr3
+    ; GFX942-NEXT: {{  $}}
+    ; GFX942-NEXT: $vgpr0_vgpr1 = V_MOV_B64_e32 killed $vgpr2_vgpr3, implicit $exec, implicit $exec
     ;
     ; GFX10-LABEL: name: copy_v64_to_v64_undef_sub0
     ; GFX10: liveins: $vgpr3
@@ -252,10 +252,10 @@ body: |
     ; GFX90A-NEXT: {{  $}}
     ; GFX90A-NEXT: $vgpr0_vgpr1 = V_PK_MOV_B32 8, $vgpr2_vgpr3, 12, $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit killed $vgpr2_vgpr3, implicit $exec
     ;
-    ; GFX940-LABEL: name: copy_v64_to_v64_undef_sub1
-    ; GFX940: liveins: $vgpr2
-    ; GFX940-NEXT: {{  $}}
-    ; GFX940-NEXT: $vgpr0_vgpr1 = V_MOV_B64_e32 killed $vgpr2_vgpr3, implicit $exec, implicit $exec
+    ; GFX942-LABEL: name: copy_v64_to_v64_undef_sub1
+    ; GFX942: liveins: $vgpr2
+    ; GFX942-NEXT: {{  $}}
+    ; GFX942-NEXT: $vgpr0_vgpr1 = V_MOV_B64_e32 killed $vgpr2_vgpr3, implicit $exec, implicit $exec
     ;
     ; GFX10-LABEL: name: copy_v64_to_v64_undef_sub1
     ; GFX10: liveins: $vgpr2
@@ -285,11 +285,11 @@ body: |
     ; GFX90A-NEXT: $vgpr0_vgpr1 = V_PK_MOV_B32 8, $sgpr4_sgpr5, 12, $sgpr4_sgpr5, 0, 0, 0, 0, 0, implicit $exec, implicit $sgpr4_sgpr5_sgpr6_sgpr7, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3
     ; GFX90A-NEXT: $vgpr2_vgpr3 = V_PK_MOV_B32 8, $sgpr6_sgpr7, 12, $sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec, implicit killed $sgpr4_sgpr5_sgpr6_sgpr7
     ;
-    ; GFX940-LABEL: name: copy_s128_to_v128_killed
-    ; GFX940: liveins: $sgpr4_sgpr5_sgpr6_sgpr7
-    ; GFX940-NEXT: {{  $}}
-    ; GFX940-NEXT: $vgpr0_vgpr1 = V_MOV_B64_e32 $sgpr4_sgpr5, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $sgpr4_sgpr5_sgpr6_sgpr7
-    ; GFX940-NEXT: $vgpr2_vgpr3 = V_MOV_B64_e32 $sgpr6_sgpr7, implicit $exec, implicit killed $sgpr4_sgpr5_sgpr6_sgpr7
+    ; GFX942-LABEL: name: copy_s128_to_v128_killed
+    ; GFX942: liveins: $sgpr4_sgpr5_sgpr6_sgpr7
+    ; GFX942-NEXT: {{  $}}
+    ; GFX942-NEXT: $vgpr0_vgpr1 = V_MOV_B64_e32 $sgpr4_sgpr5, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $sgpr4_sgpr5_sgpr6_sgpr7
+    ; GFX942-NEXT: $vgpr2_vgpr3 = V_MOV_B64_e32 $sgpr6_sgpr7, implicit $exec, implicit killed $sgpr4_sgpr5_sgpr6_sgpr7
     ;
     ; GFX10-LABEL: name: copy_s128_to_v128_killed
     ; GFX10: liveins: $sgpr4_sgpr5_sgpr6_sgpr7
@@ -319,11 +319,11 @@ body: |
     ; GFX90A-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr2, implicit $exec, implicit-def $vgpr1_vgpr2, implicit $vgpr2_vgpr3
     ; GFX90A-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr3, implicit $exec, implicit $vgpr2_vgpr3, implicit $exec
     ;
-    ; GFX940-LABEL: name: copy_v64_to_v64_unaligned
-    ; GFX940: liveins: $vgpr2_vgpr3
-    ; GFX940-NEXT: {{  $}}
-    ; GFX940-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr2, implicit $exec, implicit-def $vgpr1_vgpr2, implicit $vgpr2_vgpr3
-    ; GFX940-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr3, implicit $exec, implicit $vgpr2_vgpr3, implicit $exec
+    ; GFX942-LABEL: name: copy_v64_to_v64_unaligned
+    ; GFX942: liveins: $vgpr2_vgpr3
+    ; GFX942-NEXT: {{  $}}
+    ; GFX942-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr2, implicit $exec, implicit-def $vgpr1_vgpr2, implicit $vgpr2_vgpr3
+    ; GFX942-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr3, implicit $exec, implicit $vgpr2_vgpr3, implicit $exec
     ;
     ; GFX10-LABEL: name: copy_v64_to_v64_unaligned
     ; GFX10: liveins: $vgpr2_vgpr3
@@ -351,11 +351,11 @@ body: |
     ; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr3_vgpr4
     ; GFX90A-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr4, implicit $exec, implicit killed $vgpr3_vgpr4, implicit $exec
     ;
-    ; GFX940-LABEL: name: copy_v64_unaligned_to_v64
-    ; GFX940: liveins: $vgpr3_vgpr4
-    ; GFX940-NEXT: {{  $}}
-    ; GFX940-NEXT: $vgpr0 = V_MOV_B32_e32 $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr3_vgpr4
-    ; GFX940-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr4, implicit $exec, implicit killed $vgpr3_vgpr4, implicit $exec
+    ; GFX942-LABEL: name: copy_v64_unaligned_to_v64
+    ; GFX942: liveins: $vgpr3_vgpr4
+    ; GFX942-NEXT: {{  $}}
+    ; GFX942-NEXT: $vgpr0 = V_MOV_B32_e32 $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr3_vgpr4
+    ; GFX942-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr4, implicit $exec, implicit killed $vgpr3_vgpr4, implicit $exec
     ;
     ; GFX10-LABEL: name: copy_v64_unaligned_to_v64
     ; GFX10: liveins: $vgpr3_vgpr4
@@ -387,13 +387,13 @@ body: |
     ; GFX90A-NEXT: $vgpr3 = V_MOV_B32_e32 $vgpr10, implicit $exec, implicit $vgpr8_vgpr9_vgpr10_vgpr11
     ; GFX90A-NEXT: $vgpr4 = V_MOV_B32_e32 $vgpr11, implicit $exec, implicit killed $vgpr8_vgpr9_vgpr10_vgpr11, implicit $exec
     ;
-    ; GFX940-LABEL: name: copy_v128_to_v128_unaligned
-    ; GFX940: liveins: $vgpr8_vgpr9_vgpr10_vgpr11
-    ; GFX940-NEXT: {{  $}}
-    ; GFX940-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr8, implicit $exec, implicit-def $vgpr1_vgpr2_vgpr3_vgpr4, implicit $vgpr8_vgpr9_vgpr10_vgpr11
-    ; GFX940-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr9, implicit $exec, implicit $vgpr8_vgpr9_vgpr10_vgpr11
-    ; GFX940-NEXT: $vgpr3 = V_MOV_B32_e32 $vgpr10, implicit $exec, implicit $vgpr8_vgpr9_vgpr10_vgpr11
-    ; GFX940-NEXT: $vgpr4 = V_MOV_B32_e32 $vgpr11, implicit $exec, implicit killed $vgpr8_vgpr9_vgpr10_vgpr11, implicit $exec
+    ; GFX942-LABEL: name: copy_v128_to_v128_unaligned
+    ; GFX942: liveins: $vgpr8_vgpr9_vgpr10_vgpr11
+    ; GFX942-NEXT: {{  $}}
+    ; GFX942-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr8, implicit $exec, implicit-def $vgpr1_vgpr2_vgpr3_vgpr4, implicit $vgpr8_vgpr9_vgpr10_vgpr11
+    ; GFX942-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr9, implicit $exec, implicit $vgpr8_vgpr9_vgpr10_vgpr11
+    ; GFX942-NEXT: $vgpr3 = V_MOV_B32_e32 $vgpr10, implicit $exec, implicit $vgpr8_vgpr9_vgpr10_vgpr11
+    ; GFX942-NEXT: $vgpr4 = V_MOV_B32_e32 $vgpr11, implicit $exec, implicit killed $vgpr8_vgpr9_vgpr10_vgpr11, implicit $exec
     ;
     ; GFX10-LABEL: name: copy_v128_to_v128_unaligned
     ; GFX10: liveins: $vgpr8_vgpr9_vgpr10_vgpr11
@@ -427,13 +427,13 @@ body: |
     ; GFX90A-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr9, implicit $exec, implicit $vgpr7_vgpr8_vgpr9_vgpr10
     ; GFX90A-NEXT: $vgpr3 = V_MOV_B32_e32 $vgpr10, implicit $exec, implicit killed $vgpr7_vgpr8_vgpr9_vgpr10, implicit $exec
     ;
-    ; GFX940-LABEL: name: copy_v128_unaligned_to_v128
-    ; GFX940: liveins: $vgpr7_vgpr8_vgpr9_vgpr10
-    ; GFX940-NEXT: {{  $}}
-    ; GFX940-NEXT: $vgpr0 = V_MOV_B32_e32 $vgpr7, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr7_vgpr8_vgpr9_vgpr10
-    ; GFX940-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr8, implicit $exec, implicit $vgpr7_vgpr8_vgpr9_vgpr10
-    ; GFX940-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr9, implicit $exec, implicit $vgpr7_vgpr8_vgpr9_vgpr10
-    ; GFX940-NEXT: $vgpr3 = V_MOV_B32_e32 $vgpr10, implicit $exec, implicit killed $vgpr7_vgpr8_vgpr9_vgpr10, implicit $exec
+    ; GFX942-LABEL: name: copy_v128_unaligned_to_v128
+    ; GFX942: liveins: $vgpr7_vgpr8_vgpr9_vgpr10
+    ; GFX942-NEXT: {{  $}}
+    ; GFX942-NEXT: $vgpr0 = V_MOV_B32_e32 $vgpr7, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr7_vgpr8_vgpr9_vgpr10
+    ; GFX942-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr8, implicit $exec, implicit $vgpr7_vgpr8_vgpr9_vgpr10
+    ; GFX942-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr9, implicit $exec, implicit $vgpr7_vgpr8_vgpr9_vgpr10
+    ; GFX942-NEXT: $vgpr3 = V_MOV_B32_e32 $vgpr10, implicit $exec, implicit killed $vgpr7_vgpr8_vgpr9_vgpr10, implicit $exec
     ;
     ; GFX10-LABEL: name: copy_v128_unaligned_to_v128
     ; GFX10: liveins: $vgpr7_vgpr8_vgpr9_vgpr10
@@ -463,11 +463,11 @@ body: |
     ; GFX90A-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr8, implicit $exec, implicit-def $vgpr1_vgpr2, implicit $sgpr8_sgpr9
     ; GFX90A-NEXT: $vgpr2 = V_MOV_B32_e32 $sgpr9, implicit $exec, implicit killed $sgpr8_sgpr9, implicit $exec
     ;
-    ; GFX940-LABEL: name: copy_s64_to_v64_unaligned
-    ; GFX940: liveins: $sgpr8_sgpr9
-    ; GFX940-NEXT: {{  $}}
-    ; GFX940-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr8, implicit $exec, implicit-def $vgpr1_vgpr2, implicit $sgpr8_sgpr9
-    ; GFX940-NEXT: $vgpr2 = V_MOV_B32_e32 $sgpr9, implicit $exec, implicit killed $sgpr8_sgpr9, implicit $exec
+    ; GFX942-LABEL: name: copy_s64_to_v64_unaligned
+    ; GFX942: liveins: $sgpr8_sgpr9
+    ; GFX942-NEXT: {{  $}}
+    ; GFX942-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr8, implicit $exec, implicit-def $vgpr1_vgpr2, implicit $sgpr8_sgpr9
+    ; GFX942-NEXT: $vgpr2 = V_MOV_B32_e32 $sgpr9, implicit $exec, implicit killed $sgpr8_sgpr9, implicit $exec
     ;
     ; GFX10-LABEL: name: copy_s64_to_v64_unaligned
     ; GFX10: liveins: $sgpr8_sgpr9
@@ -499,13 +499,13 @@ body: |
     ; GFX90A-NEXT: $vgpr3 = V_MOV_B32_e32 $sgpr10, implicit $exec, implicit $sgpr8_sgpr9_sgpr10_sgpr11
     ; GFX90A-NEXT: $vgpr4 = V_MOV_B32_e32 $sgpr11, implicit $exec, implicit killed $sgpr8_sgpr9_sgpr10_sgpr11, implicit $exec
     ;
-    ; GFX940-LABEL: name: copy_s128_to_v128_unaligned
-    ; GFX940: liveins: $sgpr8_sgpr9_sgpr10_sgpr11
-    ; GFX940-NEXT: {{  $}}
-    ; GFX940-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr8, implicit $exec, implicit-def $vgpr1_vgpr2_vgpr3_vgpr4, implicit $sgpr8_sgpr9_sgpr10_sgpr11
-    ; GFX940-NEXT: $vgpr2 = V_MOV_B32_e32 $sgpr9, implicit $exec, implicit $sgpr8_sgpr9_sgpr10_sgpr11
-    ; GFX940-NEXT: $vgpr3 = V_MOV_B32_e32 $sgpr10, implicit $exec, implicit $sgpr8_sgpr9_sgpr10_sgpr11
-    ; GFX940-NEXT: $vgpr4 = V_MOV_B32_e32 $sgpr11, implicit $exec, implicit killed $sgpr8_sgpr9_sgpr10_sgpr11, implicit $exec
+    ; GFX942-LABEL: name: copy_s128_to_v128_unaligned
+    ; GFX942: liveins: $sgpr8_sgpr9_sgpr10_sgpr11
+    ; GFX942-NEXT: {{  $}}
+    ; GFX942-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr8, implicit $exec, implicit-def $vgpr1_vgpr2_vgpr3_vgpr4, implicit $sgpr8_sgpr9_sgpr10_sgpr11
+    ; GFX942-NEXT: $vgpr2 = V_MOV_B32_e32 $sgpr9, implicit $exec, implicit $sgpr8_sgpr9_sgpr10_sgpr11
+    ; GFX942-NEXT: $vgpr3 = V_MOV_B32_e32 $sgpr10, implicit $exec, implicit $sgpr8_sgpr9_sgpr10_sgpr11
+    ; GFX942-NEXT: $vgpr4 = V_MOV_B32_e32 $sgpr11, implicit $exec, implicit killed $sgpr8_sgpr9_sgpr10_sgpr11, implicit $exec
     ;
     ; GFX10-LABEL: name: copy_s128_to_v128_unaligned
     ; GFX10: liveins: $sgpr8_sgpr9_sgpr10_sgpr11
@@ -537,12 +537,12 @@ body: |
     ; GFX90A-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr9, implicit $exec, implicit $vgpr8_vgpr9_vgpr10
     ; GFX90A-NEXT: $vgpr3 = V_MOV_B32_e32 $vgpr10, implicit $exec, implicit killed $vgpr8_vgpr9_vgpr10, implicit $exec
     ;
-    ; GFX940-LABEL: name: copy_v96_to_v96_unaligned
-    ; GFX940: liveins: $vgpr8_vgpr9_vgpr10
-    ; GFX940-NEXT: {{  $}}
-    ; GFX940-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr8, implicit $exec, implicit-def $vgpr1_vgpr2_vgpr3, implicit $vgpr8_vgpr9_vgpr10
-    ; GFX940-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr9, implicit $exec, implicit $vgpr8_vgpr9_vgpr10
-    ; GFX940-NEXT: $vgpr3 = V_MOV_B32_e32 $vgpr10, implicit $exec, implicit killed $vgpr8_vgpr9_vgpr10, implicit $exec
+    ; GFX942-LABEL: name: copy_v96_to_v96_unaligned
+    ; GFX942: liveins: $vgpr8_vgpr9_vgpr10
+    ; GFX942-NEXT: {{  $}}
+    ; GFX942-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr8, implicit $exec, implicit-def $vgpr1_vgpr2_vgpr3, implicit $vgpr8_vgpr9_vgpr10
+    ; GFX942-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr9, implicit $exec, implicit $vgpr8_vgpr9_vgpr10
+    ; GFX942-NEXT: $vgpr3 = V_MOV_B32_e32 $vgpr10, implicit $exec, implicit killed $vgpr8_vgpr9_vgpr10, implicit $exec
     ;
     ; GFX10-LABEL: name: copy_v96_to_v96_unaligned
     ; GFX10: liveins: $vgpr8_vgpr9_vgpr10
@@ -573,12 +573,12 @@ body: |
     ; GFX90A-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr8, implicit $exec, implicit $vgpr7_vgpr8_vgpr9
     ; GFX90A-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr9, implicit $exec, implicit killed $vgpr7_vgpr8_vgpr9, implicit $exec
     ;
-    ; GFX940-LABEL: name: copy_v96_unaligned_to_v96
-    ; GFX940: liveins: $vgpr7_vgpr8_vgpr9
-    ; GFX940-NEXT: {{  $}}
-    ; GFX940-NEXT: $vgpr0 = V_MOV_B32_e32 $vgpr7, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2, implicit $vgpr7_vgpr8_vgpr9
-    ; GFX940-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr8, implicit $exec, implicit $vgpr7_vgpr8_vgpr9
-    ; GFX940-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr9, implicit $exec, implicit killed $vgpr7_vgpr8_vgpr9, implicit $exec
+    ; GFX942-LABEL: name: copy_v96_unaligned_to_v96
+    ; GFX942: liveins: $vgpr7_vgpr8_vgpr9
+    ; GFX942-NEXT: {{  $}}
+    ; GFX942-NEXT: $vgpr0 = V_MOV_B32_e32 $vgpr7, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2, implicit $vgpr7_vgpr8_vgpr9
+    ; GFX942-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr8, implicit $exec, implicit $vgpr7_vgpr8_vgpr9
+    ; GFX942-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr9, implicit $exec, implicit killed $vgpr7_vgpr8_vgpr9, implicit $exec
     ;
     ; GFX10-LABEL: name: copy_v96_unaligned_to_v96
     ; GFX10: liveins: $vgpr7_vgpr8_vgpr9
@@ -609,12 +609,12 @@ body: |
     ; GFX90A-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2
     ; GFX90A-NEXT: $vgpr2 = V_MOV_B32_e32 $sgpr2, implicit $exec, implicit killed $sgpr0_sgpr1_sgpr2, implicit $exec
     ;
-    ; GFX940-LABEL: name: copy_s96_to_v96
-    ; GFX940: liveins: $sgpr0_sgpr1_sgpr2
-    ; GFX940-NEXT: {{  $}}
-    ; GFX940-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2, implicit $sgpr0_sgpr1_sgpr2
-    ; GFX940-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2
-    ; GFX940-NEXT: $vgpr2 = V_MOV_B32_e32 $sgpr2, implicit $exec, implicit killed $sgpr0_sgpr1_sgpr2, implicit $exec
+    ; GFX942-LABEL: name: copy_s96_to_v96
+    ; GFX942: liveins: $sgpr0_sgpr1_sgpr2
+    ; GFX942-NEXT: {{  $}}
+    ; GFX942-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2, implicit $sgpr0_sgpr1_sgpr2
+    ; GFX942-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2
+    ; GFX942-NEXT: $vgpr2 = V_MOV_B32_e32 $sgpr2, implicit $exec, implicit killed $sgpr0_sgpr1_sgpr2, implicit $exec
     ;
     ; GFX10-LABEL: name: copy_s96_to_v96
     ; GFX10: liveins: $sgpr0_sgpr1_sgpr2
@@ -645,12 +645,12 @@ body: |
     ; GFX90A-NEXT: $vgpr2 = V_MOV_B32_e32 $sgpr1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2
     ; GFX90A-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit killed $sgpr0_sgpr1_sgpr2, implicit $exec
     ;
-    ; GFX940-LABEL: name: copy_s96_to_v96_unaligned
-    ; GFX940: liveins: $sgpr0_sgpr1_sgpr2
-    ; GFX940-NEXT: {{  $}}
-    ; GFX940-NEXT: $vgpr3 = V_MOV_B32_e32 $sgpr2, implicit $exec, implicit-def $vgpr1_vgpr2_vgpr3, implicit $sgpr0_sgpr1_sgpr2
-    ; GFX940-NEXT: $vgpr2 = V_MOV_B32_e32 $sgpr1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2
-    ; GFX940-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit killed $sgpr0_sgpr1_sgpr2, implicit $exec
+    ; GFX942-LABEL: name: copy_s96_to_v96_unaligned
+    ; GFX942: liveins: $sgpr0_sgpr1_sgpr2
+    ; GFX942-NEXT: {{  $}}
+    ; GFX942-NEXT: $vgpr3 = V_MOV_B32_e32 $sgpr2, implicit $exec, implicit-def $vgpr1_vgpr2_vgpr3, implicit $sgpr0_sgpr1_sgpr2
+    ; GFX942-NEXT: $vgpr2 = V_MOV_B32_e32 $sgpr1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2
+    ; GFX942-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit killed $sgpr0_sgpr1_sgpr2, implicit $exec
     ;
     ; GFX10-LABEL: name: copy_s96_to_v96_unaligned
     ; GFX10: liveins: $sgpr0_sgpr1_sgpr2
diff --git llvm/test/CodeGen/AMDGPU/directive-amdgcn-target.ll llvm/test/CodeGen/AMDGPU/directive-amdgcn-target.ll
index b64968c9336b..c954e1fe124e 100644
--- llvm/test/CodeGen/AMDGPU/directive-amdgcn-target.ll
+++ llvm/test/CodeGen/AMDGPU/directive-amdgcn-target.ll
@@ -71,12 +71,6 @@
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90c < %s | FileCheck --check-prefixes=GFX90C %s
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90c -mattr=-xnack < %s | FileCheck --check-prefixes=GFX90C-NOXNACK %s
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90c -mattr=+xnack < %s | FileCheck --check-prefixes=GFX90C-XNACK %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck --check-prefixes=GFX940 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -mattr=-xnack < %s | FileCheck --check-prefixes=GFX940-NOXNACK %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -mattr=+xnack < %s | FileCheck --check-prefixes=GFX940-XNACK %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx941 < %s | FileCheck --check-prefixes=GFX941 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx941 -mattr=-xnack < %s | FileCheck --check-prefixes=GFX941-NOXNACK %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx941 -mattr=+xnack < %s | FileCheck --check-prefixes=GFX941-XNACK %s
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 < %s | FileCheck --check-prefixes=GFX942 %s
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -mattr=-xnack < %s | FileCheck --check-prefixes=GFX942-NOXNACK %s
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -mattr=+xnack < %s | FileCheck --check-prefixes=GFX942-XNACK %s
@@ -174,12 +168,6 @@
 ; GFX90C: .amdgcn_target "amdgcn-amd-amdhsa--gfx90c"
 ; GFX90C-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx90c:xnack-"
 ; GFX90C-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx90c:xnack+"
-; GFX940: .amdgcn_target "amdgcn-amd-amdhsa--gfx940"
-; GFX940-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx940:xnack-"
-; GFX940-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx940:xnack+"
-; GFX941: .amdgcn_target "amdgcn-amd-amdhsa--gfx941"
-; GFX941-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx941:xnack-"
-; GFX941-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx941:xnack+"
 ; GFX942: .amdgcn_target "amdgcn-amd-amdhsa--gfx942"
 ; GFX942-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx942:xnack-"
 ; GFX942-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx942:xnack+"
diff --git llvm/test/CodeGen/AMDGPU/dpp64_combine.ll llvm/test/CodeGen/AMDGPU/dpp64_combine.ll
index c7422786d344..34b794705e98 100644
--- llvm/test/CodeGen/AMDGPU/dpp64_combine.ll
+++ llvm/test/CodeGen/AMDGPU/dpp64_combine.ll
@@ -1,5 +1,5 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,DPP64,GFX90A
-; RUN: llc -mtriple=amdgcn -mcpu=gfx940 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,DPP64,DPPMOV64,GFX940
+; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,DPP64,DPPMOV64,GFX942
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,DPP32,GFX10PLUS
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,DPP32,GFX10PLUS
 
@@ -74,7 +74,7 @@ define amdgpu_kernel void @dpp64_div(ptr addrspace(1) %arg, i64 %in1) {
 ; DPP64: v_mov_b32_dpp
 ; GFX90A: v_add_co_u32_e32
 ; GFX90A: v_addc_co_u32_e32
-; GFX940: v_lshl_add_u64
+; GFX942: v_lshl_add_u64
 ; GFX10PLUS: v_mov_b32_dpp
 ; GFX10PLUS: v_add_co_u32
 ; GFX10PLUS: v_add_co_ci_u32_e32
diff --git llvm/test/CodeGen/AMDGPU/dpp64_combine.mir llvm/test/CodeGen/AMDGPU/dpp64_combine.mir
index 9a6a54bbc4e4..84da231c95a6 100644
--- llvm/test/CodeGen/AMDGPU/dpp64_combine.mir
+++ llvm/test/CodeGen/AMDGPU/dpp64_combine.mir
@@ -1,5 +1,5 @@
 # RUN: llc -mtriple=amdgcn -mcpu=gfx90a -run-pass=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s --check-prefix=GCN
-# RUN: llc -mtriple=amdgcn -mcpu=gfx940 -run-pass=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s --check-prefix=GCN
+# RUN: llc -mtriple=amdgcn -mcpu=gfx942 -run-pass=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s --check-prefix=GCN
 
 ---
 # GCN-LABEL: name: dpp64_old_impdef
diff --git llvm/test/CodeGen/AMDGPU/elf-header-flags-mach.ll llvm/test/CodeGen/AMDGPU/elf-header-flags-mach.ll
index 99344f16d4cd..65039b471694 100644
--- llvm/test/CodeGen/AMDGPU/elf-header-flags-mach.ll
+++ llvm/test/CodeGen/AMDGPU/elf-header-flags-mach.ll
@@ -54,8 +54,6 @@
 ; RUN: llc -filetype=obj -mtriple=amdgcn -mcpu=gfx909 < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX909 %s
 ; RUN: llc -filetype=obj -mtriple=amdgcn -mcpu=gfx90a < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX90A %s
 ; RUN: llc -filetype=obj -mtriple=amdgcn -mcpu=gfx90c < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX90C %s
-; RUN: llc -filetype=obj -mtriple=amdgcn -mcpu=gfx940 < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX940 %s
-; RUN: llc -filetype=obj -mtriple=amdgcn -mcpu=gfx941 < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX941 %s
 ; RUN: llc -filetype=obj -mtriple=amdgcn -mcpu=gfx942 < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX942 %s
 ; RUN: llc -filetype=obj -mtriple=amdgcn -mcpu=gfx950 < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX950 %s
 ; RUN: llc -filetype=obj -mtriple=amdgcn -mcpu=gfx1010 < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX1010 %s
@@ -137,8 +135,6 @@
 ; GFX909:        EF_AMDGPU_MACH_AMDGCN_GFX909 (0x31)
 ; GFX90A:        EF_AMDGPU_MACH_AMDGCN_GFX90A (0x3F)
 ; GFX90C:        EF_AMDGPU_MACH_AMDGCN_GFX90C (0x32)
-; GFX940:        EF_AMDGPU_MACH_AMDGCN_GFX940 (0x40)
-; GFX941:        EF_AMDGPU_MACH_AMDGCN_GFX941 (0x4B)
 ; GFX942:        EF_AMDGPU_MACH_AMDGCN_GFX942 (0x4C)
 ; GFX950:        EF_AMDGPU_MACH_AMDGCN_GFX950 (0x4F)
 ; GFX1010:       EF_AMDGPU_MACH_AMDGCN_GFX1010 (0x33)
diff --git llvm/test/CodeGen/AMDGPU/elf-header-flags-sramecc.ll llvm/test/CodeGen/AMDGPU/elf-header-flags-sramecc.ll
index 3ad2a9df764b..cf69c10bcef7 100644
--- llvm/test/CodeGen/AMDGPU/elf-header-flags-sramecc.ll
+++ llvm/test/CodeGen/AMDGPU/elf-header-flags-sramecc.ll
@@ -9,8 +9,8 @@
 ; RUN: llc -filetype=obj -mtriple=amdgcn -mcpu=gfx90a < %s | llvm-readobj --file-header - | FileCheck --check-prefix=SRAM-ECC-GFX90A %s
 ; RUN: llc -filetype=obj -mtriple=amdgcn -mcpu=gfx90a < %s | llvm-readobj --file-header - | FileCheck --check-prefix=SRAM-ECC-GFX90A %s
 
-; RUN: llc -filetype=obj -mtriple=amdgcn -mcpu=gfx940 < %s | llvm-readobj --file-header - | FileCheck --check-prefix=SRAM-ECC-GFX940 %s
-; RUN: llc -filetype=obj -mtriple=amdgcn -mcpu=gfx940 -mattr=+sramecc < %s | llvm-readobj --file-header - | FileCheck --check-prefix=SRAM-ECC-GFX940 %s
+; RUN: llc -filetype=obj -mtriple=amdgcn -mcpu=gfx942 < %s | llvm-readobj --file-header - | FileCheck --check-prefix=SRAM-ECC-GFX942 %s
+; RUN: llc -filetype=obj -mtriple=amdgcn -mcpu=gfx942 -mattr=+sramecc < %s | llvm-readobj --file-header - | FileCheck --check-prefix=SRAM-ECC-GFX942 %s
 
 ; RUN: llc -filetype=obj -mtriple=amdgcn -mcpu=gfx950 < %s | llvm-readobj --file-header - | FileCheck --check-prefix=SRAM-ECC-GFX950 %s
 ; RUN: llc -filetype=obj -mtriple=amdgcn -mcpu=gfx950 -mattr=+sramecc < %s | llvm-readobj --file-header - | FileCheck --check-prefix=SRAM-ECC-GFX950 %s
@@ -42,10 +42,10 @@
 ; SRAM-ECC-GFX90A:    EF_AMDGPU_MACH_AMDGCN_GFX90A (0x3F)
 ; SRAM-ECC-GFX90A:  ]
 
-; SRAM-ECC-GFX940: Flags [
-; SRAM-ECC-GFX940:    EF_AMDGPU_FEATURE_SRAMECC_V3 (0x200)
-; SRAM-ECC-GFX940:    EF_AMDGPU_MACH_AMDGCN_GFX940 (0x40)
-; SRAM-ECC-GFX940:  ]
+; SRAM-ECC-GFX942: Flags [
+; SRAM-ECC-GFX942:    EF_AMDGPU_FEATURE_SRAMECC_V3 (0x200)
+; SRAM-ECC-GFX942:    EF_AMDGPU_MACH_AMDGCN_GFX942 (0x40)
+; SRAM-ECC-GFX942:  ]
 
 ; SRAM-ECC-GFX950: Flags [
 ; SRAM-ECC-GFX950:    EF_AMDGPU_FEATURE_SRAMECC_V3 (0x200)
diff --git llvm/test/CodeGen/AMDGPU/eliminate-frame-index-s-add-i32.mir llvm/test/CodeGen/AMDGPU/eliminate-frame-index-s-add-i32.mir
index 001a72e36097..2f8ad7f56478 100644
--- llvm/test/CodeGen/AMDGPU/eliminate-frame-index-s-add-i32.mir
+++ llvm/test/CodeGen/AMDGPU/eliminate-frame-index-s-add-i32.mir
@@ -5,7 +5,7 @@
 # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck -check-prefix=MUBUFW64 %s
 # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck -check-prefix=MUBUFW32 %s
 
-# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck -check-prefix=FLATSCRW64 %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck -check-prefix=FLATSCRW64 %s
 # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck -check-prefix=FLATSCRW32 %s
 # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck -check-prefix=FLATSCRW32 %s
 
diff --git llvm/test/CodeGen/AMDGPU/eliminate-frame-index-scalar-bit-ops.mir llvm/test/CodeGen/AMDGPU/eliminate-frame-index-scalar-bit-ops.mir
index 1456bbc369b6..aecff1b13171 100644
--- llvm/test/CodeGen/AMDGPU/eliminate-frame-index-scalar-bit-ops.mir
+++ llvm/test/CodeGen/AMDGPU/eliminate-frame-index-scalar-bit-ops.mir
@@ -5,7 +5,7 @@
 # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck -check-prefix=MUBUFW64 %s
 # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck -check-prefix=MUBUFW32 %s
 
-# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck -check-prefix=FLATSCRW64 %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck -check-prefix=FLATSCRW64 %s
 # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck -check-prefix=FLATSCRW32 %s
 # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck -check-prefix=FLATSCRW32 %s
 
diff --git llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-co-u32.mir llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-co-u32.mir
index b5a9f0271101..12e8d24cb367 100644
--- llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-co-u32.mir
+++ llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-co-u32.mir
@@ -6,7 +6,7 @@
 # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+wavefrontsize64 -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck -check-prefixes=MUBUFW64,GFX10 %s
 
 # FIXME: Test in wave32
-# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -mattr=+wavefrontsize64 -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck -check-prefixes=FLATSCRW64,GFX940 %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -mattr=+wavefrontsize64 -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck -check-prefixes=FLATSCRW64,GFX942 %s
 # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=+wavefrontsize64 -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck -check-prefixes=FLATSCRW64,GFX11 %s
 # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -mattr=+wavefrontsize64 -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck -check-prefixes=FLATSCRW64,GFX12 %s
 
@@ -512,14 +512,14 @@ body:             |
     ; GFX10-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 128, killed $vgpr0, 0, implicit $exec
     ; GFX10-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc
     ;
-    ; GFX940-LABEL: name: v_add_co_u32_e64__fi_literal_offset__sgpr
-    ; GFX940: liveins: $sgpr8
-    ; GFX940-NEXT: {{  $}}
-    ; GFX940-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec
-    ; GFX940-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 $sgpr8, killed $vgpr1, 0, implicit $exec
-    ; GFX940-NEXT: $vgpr1 = V_MOV_B32_e32 128, implicit $exec
-    ; GFX940-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 killed $vgpr1, killed $vgpr0, 0, implicit $exec
-    ; GFX940-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc
+    ; GFX942-LABEL: name: v_add_co_u32_e64__fi_literal_offset__sgpr
+    ; GFX942: liveins: $sgpr8
+    ; GFX942-NEXT: {{  $}}
+    ; GFX942-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec
+    ; GFX942-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 $sgpr8, killed $vgpr1, 0, implicit $exec
+    ; GFX942-NEXT: $vgpr1 = V_MOV_B32_e32 128, implicit $exec
+    ; GFX942-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 killed $vgpr1, killed $vgpr0, 0, implicit $exec
+    ; GFX942-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc
     ;
     ; GFX11-LABEL: name: v_add_co_u32_e64__fi_literal_offset__sgpr
     ; GFX11: liveins: $sgpr8
@@ -596,14 +596,14 @@ body:             |
     ; GFX10-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 128, killed $vgpr0, 1, implicit $exec
     ; GFX10-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc
     ;
-    ; GFX940-LABEL: name: v_add_co_u32_e64__fi_literal_offset__sgpr_clamp
-    ; GFX940: liveins: $sgpr8
-    ; GFX940-NEXT: {{  $}}
-    ; GFX940-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec
-    ; GFX940-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 $sgpr8, killed $vgpr1, 0, implicit $exec
-    ; GFX940-NEXT: $vgpr1 = V_MOV_B32_e32 128, implicit $exec
-    ; GFX940-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 killed $vgpr1, killed $vgpr0, 1, implicit $exec
-    ; GFX940-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc
+    ; GFX942-LABEL: name: v_add_co_u32_e64__fi_literal_offset__sgpr_clamp
+    ; GFX942: liveins: $sgpr8
+    ; GFX942-NEXT: {{  $}}
+    ; GFX942-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec
+    ; GFX942-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 $sgpr8, killed $vgpr1, 0, implicit $exec
+    ; GFX942-NEXT: $vgpr1 = V_MOV_B32_e32 128, implicit $exec
+    ; GFX942-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 killed $vgpr1, killed $vgpr0, 1, implicit $exec
+    ; GFX942-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc
     ;
     ; GFX11-LABEL: name: v_add_co_u32_e64__fi_literal_offset__sgpr_clamp
     ; GFX11: liveins: $sgpr8
@@ -681,13 +681,13 @@ body:             |
     ; GFX10-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 128, killed $vgpr0, 0, implicit $exec
     ; GFX10-NEXT: SI_RETURN implicit $vgpr0
     ;
-    ; GFX940-LABEL: name: v_add_co_u32_e64__fi_literal_offset__vgpr
-    ; GFX940: liveins: $vgpr8
-    ; GFX940-NEXT: {{  $}}
-    ; GFX940-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 $sgpr32, $vgpr8, 0, implicit $exec
-    ; GFX940-NEXT: $vgpr1 = V_MOV_B32_e32 128, implicit $exec
-    ; GFX940-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 killed $vgpr1, killed $vgpr0, 0, implicit $exec
-    ; GFX940-NEXT: SI_RETURN implicit $vgpr0
+    ; GFX942-LABEL: name: v_add_co_u32_e64__fi_literal_offset__vgpr
+    ; GFX942: liveins: $vgpr8
+    ; GFX942-NEXT: {{  $}}
+    ; GFX942-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 $sgpr32, $vgpr8, 0, implicit $exec
+    ; GFX942-NEXT: $vgpr1 = V_MOV_B32_e32 128, implicit $exec
+    ; GFX942-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 killed $vgpr1, killed $vgpr0, 0, implicit $exec
+    ; GFX942-NEXT: SI_RETURN implicit $vgpr0
     ;
     ; GFX11-LABEL: name: v_add_co_u32_e64__fi_literal_offset__vgpr
     ; GFX11: liveins: $vgpr8
@@ -765,13 +765,13 @@ body:             |
     ; GFX10-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 128, killed $vgpr0, 1, implicit $exec
     ; GFX10-NEXT: SI_RETURN implicit $vgpr0
     ;
-    ; GFX940-LABEL: name: v_add_co_u32_e64__fi_literal_offset__vgpr__clamp
-    ; GFX940: liveins: $vgpr8
-    ; GFX940-NEXT: {{  $}}
-    ; GFX940-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 $sgpr32, $vgpr8, 0, implicit $exec
-    ; GFX940-NEXT: $vgpr1 = V_MOV_B32_e32 128, implicit $exec
-    ; GFX940-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 killed $vgpr1, killed $vgpr0, 1, implicit $exec
-    ; GFX940-NEXT: SI_RETURN implicit $vgpr0
+    ; GFX942-LABEL: name: v_add_co_u32_e64__fi_literal_offset__vgpr__clamp
+    ; GFX942: liveins: $vgpr8
+    ; GFX942-NEXT: {{  $}}
+    ; GFX942-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 $sgpr32, $vgpr8, 0, implicit $exec
+    ; GFX942-NEXT: $vgpr1 = V_MOV_B32_e32 128, implicit $exec
+    ; GFX942-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 killed $vgpr1, killed $vgpr0, 1, implicit $exec
+    ; GFX942-NEXT: SI_RETURN implicit $vgpr0
     ;
     ; GFX11-LABEL: name: v_add_co_u32_e64__fi_literal_offset__vgpr__clamp
     ; GFX11: liveins: $vgpr8
@@ -849,13 +849,13 @@ body:             |
     ; GFX10-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 128, killed $vgpr0, 0, implicit $exec
     ; GFX10-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc
     ;
-    ; GFX940-LABEL: name: v_add_co_u32_e64__fi_literal_offset__vgpr__live_vcc
-    ; GFX940: liveins: $vgpr8
-    ; GFX940-NEXT: {{  $}}
-    ; GFX940-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 $sgpr32, $vgpr8, 0, implicit $exec
-    ; GFX940-NEXT: $vgpr1 = V_MOV_B32_e32 128, implicit $exec
-    ; GFX940-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 killed $vgpr1, killed $vgpr0, 0, implicit $exec
-    ; GFX940-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc
+    ; GFX942-LABEL: name: v_add_co_u32_e64__fi_literal_offset__vgpr__live_vcc
+    ; GFX942: liveins: $vgpr8
+    ; GFX942-NEXT: {{  $}}
+    ; GFX942-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 $sgpr32, $vgpr8, 0, implicit $exec
+    ; GFX942-NEXT: $vgpr1 = V_MOV_B32_e32 128, implicit $exec
+    ; GFX942-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 killed $vgpr1, killed $vgpr0, 0, implicit $exec
+    ; GFX942-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc
     ;
     ; GFX11-LABEL: name: v_add_co_u32_e64__fi_literal_offset__vgpr__live_vcc
     ; GFX11: liveins: $vgpr8
@@ -1019,10 +1019,10 @@ body:             |
     ; GFX10-NEXT: renamable $vgpr0, dead $vcc = V_ADD_CO_U32_e64 84, 0, 1, implicit $exec
     ; GFX10-NEXT: SI_RETURN implicit $vgpr0
     ;
-    ; GFX940-LABEL: name: v_add_co_u32_e64__inline_imm__fi_offset_literal__kernel__clamp
-    ; GFX940: $sgpr4 = S_MOV_B32 72
-    ; GFX940-NEXT: renamable $vgpr0, dead $vcc = V_ADD_CO_U32_e64 12, killed $sgpr4, 1, implicit $exec
-    ; GFX940-NEXT: SI_RETURN implicit $vgpr0
+    ; GFX942-LABEL: name: v_add_co_u32_e64__inline_imm__fi_offset_literal__kernel__clamp
+    ; GFX942: $sgpr4 = S_MOV_B32 72
+    ; GFX942-NEXT: renamable $vgpr0, dead $vcc = V_ADD_CO_U32_e64 12, killed $sgpr4, 1, implicit $exec
+    ; GFX942-NEXT: SI_RETURN implicit $vgpr0
     ;
     ; GFX11-LABEL: name: v_add_co_u32_e64__inline_imm__fi_offset_literal__kernel__clamp
     ; GFX11: renamable $vgpr0, dead $vcc = V_ADD_CO_U32_e64 84, 0, 1, implicit $exec
@@ -1109,17 +1109,17 @@ body:             |
     ; GFX10-NEXT: S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, implicit $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, implicit $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, implicit $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, implicit $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, implicit $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, implicit $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, implicit $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, implicit $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, implicit $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, implicit $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, implicit $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, implicit $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, implicit $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, implicit $vgpr248_vgpr249_vgpr250_vgpr251, implicit $vgpr252_vgpr253, implicit $vgpr254, implicit $vgpr255
     ; GFX10-NEXT: SI_RETURN implicit $vgpr0
     ;
-    ; GFX940-LABEL: name: v_add_co_u32_e64__fi_literal_offset__sgpr__scavenge_spill_required
-    ; GFX940: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253, $vgpr254, $vgpr255, $sgpr8
-    ; GFX940-NEXT: {{  $}}
-    ; GFX940-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 132, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5)
-    ; GFX940-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec
-    ; GFX940-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 $sgpr8, killed $vgpr1, 0, implicit $exec
-    ; GFX940-NEXT: $vgpr1 = V_MOV_B32_e32 128, implicit $exec
-    ; GFX940-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 killed $vgpr1, killed $vgpr0, 0, implicit $exec
-    ; GFX940-NEXT: $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 132, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.2, addrspace 5)
-    ; GFX940-NEXT: S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, implicit $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, implicit $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, implicit $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, implicit $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, implicit $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, implicit $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, implicit $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, implicit $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, implicit $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, implicit $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, implicit $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, implicit $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, implicit $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, implicit $vgpr248_vgpr249_vgpr250_vgpr251, implicit $vgpr252_vgpr253, implicit $vgpr254, implicit $vgpr255
-    ; GFX940-NEXT: SI_RETURN implicit $vgpr0
+    ; GFX942-LABEL: name: v_add_co_u32_e64__fi_literal_offset__sgpr__scavenge_spill_required
+    ; GFX942: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253, $vgpr254, $vgpr255, $sgpr8
+    ; GFX942-NEXT: {{  $}}
+    ; GFX942-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 132, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5)
+    ; GFX942-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec
+    ; GFX942-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 $sgpr8, killed $vgpr1, 0, implicit $exec
+    ; GFX942-NEXT: $vgpr1 = V_MOV_B32_e32 128, implicit $exec
+    ; GFX942-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 killed $vgpr1, killed $vgpr0, 0, implicit $exec
+    ; GFX942-NEXT: $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 132, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.2, addrspace 5)
+    ; GFX942-NEXT: S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, implicit $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, implicit $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, implicit $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, implicit $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, implicit $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, implicit $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, implicit $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, implicit $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, implicit $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, implicit $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, implicit $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, implicit $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, implicit $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, implicit $vgpr248_vgpr249_vgpr250_vgpr251, implicit $vgpr252_vgpr253, implicit $vgpr254, implicit $vgpr255
+    ; GFX942-NEXT: SI_RETURN implicit $vgpr0
     ;
     ; GFX11-LABEL: name: v_add_co_u32_e64__fi_literal_offset__sgpr__scavenge_spill_required
     ; GFX11: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253, $vgpr254, $vgpr255, $sgpr8
@@ -1714,12 +1714,12 @@ body:             |
     ; MUBUFW64-NEXT: renamable $vgpr0, dead renamable $sgpr4_sgpr5 = V_ADD_CO_U32_e64 killed $sgpr4, killed $vgpr1, 0, implicit $exec
     ; MUBUFW64-NEXT: SI_RETURN implicit $vgpr0
     ;
-    ; GFX940-LABEL: name: v_add_co_u32_e64__fi_sgpr_func
-    ; GFX940: liveins: $sgpr4
-    ; GFX940-NEXT: {{  $}}
-    ; GFX940-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec
-    ; GFX940-NEXT: renamable $vgpr0, dead renamable $sgpr4_sgpr5 = V_ADD_CO_U32_e64 killed $sgpr4, killed $vgpr1, 0, implicit $exec
-    ; GFX940-NEXT: SI_RETURN implicit $vgpr0
+    ; GFX942-LABEL: name: v_add_co_u32_e64__fi_sgpr_func
+    ; GFX942: liveins: $sgpr4
+    ; GFX942-NEXT: {{  $}}
+    ; GFX942-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec
+    ; GFX942-NEXT: renamable $vgpr0, dead renamable $sgpr4_sgpr5 = V_ADD_CO_U32_e64 killed $sgpr4, killed $vgpr1, 0, implicit $exec
+    ; GFX942-NEXT: SI_RETURN implicit $vgpr0
     ;
     ; GFX11-LABEL: name: v_add_co_u32_e64__fi_sgpr_func
     ; GFX11: liveins: $sgpr4
@@ -1902,14 +1902,14 @@ body:             |
     ; GFX10-NEXT: renamable $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed renamable $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, 0, 0, 0, 0, implicit $exec :: (load (s32), addrspace 5)
     ; GFX10-NEXT: S_ENDPGM 0
     ;
-    ; GFX940-LABEL: name: v_add_co_u32_e64_fi_sgpr_clobbered_register
-    ; GFX940: liveins: $sgpr2_sgpr3, $sgpr4_sgpr5_sgpr6_sgpr7:0x000000000000003C
-    ; GFX940-NEXT: {{  $}}
-    ; GFX940-NEXT: renamable $sgpr0 = S_LSHL_B32 renamable $sgpr6, 2, implicit-def dead $scc
-    ; GFX940-NEXT: $vgpr1 = V_MOV_B32_e32 32772, implicit $exec
-    ; GFX940-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 killed $vgpr1, killed $sgpr0, 0, implicit $exec
-    ; GFX940-NEXT: renamable $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed renamable $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, 0, 0, 0, 0, implicit $exec :: (load (s32), addrspace 5)
-    ; GFX940-NEXT: S_ENDPGM 0
+    ; GFX942-LABEL: name: v_add_co_u32_e64_fi_sgpr_clobbered_register
+    ; GFX942: liveins: $sgpr2_sgpr3, $sgpr4_sgpr5_sgpr6_sgpr7:0x000000000000003C
+    ; GFX942-NEXT: {{  $}}
+    ; GFX942-NEXT: renamable $sgpr0 = S_LSHL_B32 renamable $sgpr6, 2, implicit-def dead $scc
+    ; GFX942-NEXT: $vgpr1 = V_MOV_B32_e32 32772, implicit $exec
+    ; GFX942-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 killed $vgpr1, killed $sgpr0, 0, implicit $exec
+    ; GFX942-NEXT: renamable $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed renamable $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, 0, 0, 0, 0, implicit $exec :: (load (s32), addrspace 5)
+    ; GFX942-NEXT: S_ENDPGM 0
     ;
     ; GFX11-LABEL: name: v_add_co_u32_e64_fi_sgpr_clobbered_register
     ; GFX11: liveins: $sgpr2_sgpr3, $sgpr4_sgpr5_sgpr6_sgpr7:0x000000000000003C
@@ -2006,14 +2006,14 @@ body:             |
     ; GFX10-NEXT: renamable $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed renamable $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, 0, 0, 0, 0, implicit $exec :: (load (s32), addrspace 5)
     ; GFX10-NEXT: S_ENDPGM 0
     ;
-    ; GFX940-LABEL: name: v_add_co_u32_e64_sgpr_fi_clobbered_register
-    ; GFX940: liveins: $sgpr2_sgpr3, $sgpr4_sgpr5_sgpr6_sgpr7:0x000000000000003C
-    ; GFX940-NEXT: {{  $}}
-    ; GFX940-NEXT: renamable $sgpr0 = S_LSHL_B32 renamable $sgpr6, 2, implicit-def dead $scc
-    ; GFX940-NEXT: $vgpr1 = V_MOV_B32_e32 32772, implicit $exec
-    ; GFX940-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 killed $vgpr1, killed $sgpr0, 0, implicit $exec
-    ; GFX940-NEXT: renamable $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed renamable $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, 0, 0, 0, 0, implicit $exec :: (load (s32), addrspace 5)
-    ; GFX940-NEXT: S_ENDPGM 0
+    ; GFX942-LABEL: name: v_add_co_u32_e64_sgpr_fi_clobbered_register
+    ; GFX942: liveins: $sgpr2_sgpr3, $sgpr4_sgpr5_sgpr6_sgpr7:0x000000000000003C
+    ; GFX942-NEXT: {{  $}}
+    ; GFX942-NEXT: renamable $sgpr0 = S_LSHL_B32 renamable $sgpr6, 2, implicit-def dead $scc
+    ; GFX942-NEXT: $vgpr1 = V_MOV_B32_e32 32772, implicit $exec
+    ; GFX942-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 killed $vgpr1, killed $sgpr0, 0, implicit $exec
+    ; GFX942-NEXT: renamable $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed renamable $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, 0, 0, 0, 0, implicit $exec :: (load (s32), addrspace 5)
+    ; GFX942-NEXT: S_ENDPGM 0
     ;
     ; GFX11-LABEL: name: v_add_co_u32_e64_sgpr_fi_clobbered_register
     ; GFX11: liveins: $sgpr2_sgpr3, $sgpr4_sgpr5_sgpr6_sgpr7:0x000000000000003C
@@ -2137,21 +2137,21 @@ body:             |
     ; GFX10-NEXT: $sgpr33 = frame-destroy COPY $sgpr4
     ; GFX10-NEXT: SI_RETURN implicit $vgpr0
     ;
-    ; GFX940-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_non_vcc
-    ; GFX940: liveins: $sgpr4, $sgpr5
-    ; GFX940-NEXT: {{  $}}
-    ; GFX940-NEXT: $sgpr4 = frame-setup COPY $sgpr33
-    ; GFX940-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 8191, implicit-def $scc
-    ; GFX940-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294959104, implicit-def dead $scc
-    ; GFX940-NEXT: $sgpr5 = frame-setup COPY $sgpr34
-    ; GFX940-NEXT: $sgpr34 = frame-setup COPY $sgpr32
-    ; GFX940-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 32768, implicit-def dead $scc
-    ; GFX940-NEXT: $sgpr6 = S_ADD_I32 $sgpr33, 12288, implicit-def $scc
-    ; GFX940-NEXT: renamable $vgpr0, dead renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 64, killed $sgpr6, 0, implicit $exec
-    ; GFX940-NEXT: $sgpr32 = frame-destroy COPY $sgpr34
-    ; GFX940-NEXT: $sgpr34 = frame-destroy COPY $sgpr5
-    ; GFX940-NEXT: $sgpr33 = frame-destroy COPY $sgpr4
-    ; GFX940-NEXT: SI_RETURN implicit $vgpr0
+    ; GFX942-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_non_vcc
+    ; GFX942: liveins: $sgpr4, $sgpr5
+    ; GFX942-NEXT: {{  $}}
+    ; GFX942-NEXT: $sgpr4 = frame-setup COPY $sgpr33
+    ; GFX942-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 8191, implicit-def $scc
+    ; GFX942-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294959104, implicit-def dead $scc
+    ; GFX942-NEXT: $sgpr5 = frame-setup COPY $sgpr34
+    ; GFX942-NEXT: $sgpr34 = frame-setup COPY $sgpr32
+    ; GFX942-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 32768, implicit-def dead $scc
+    ; GFX942-NEXT: $sgpr6 = S_ADD_I32 $sgpr33, 12288, implicit-def $scc
+    ; GFX942-NEXT: renamable $vgpr0, dead renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 64, killed $sgpr6, 0, implicit $exec
+    ; GFX942-NEXT: $sgpr32 = frame-destroy COPY $sgpr34
+    ; GFX942-NEXT: $sgpr34 = frame-destroy COPY $sgpr5
+    ; GFX942-NEXT: $sgpr33 = frame-destroy COPY $sgpr4
+    ; GFX942-NEXT: SI_RETURN implicit $vgpr0
     ;
     ; GFX11-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_non_vcc
     ; GFX11: liveins: $sgpr4, $sgpr5
@@ -2290,21 +2290,21 @@ body:             |
     ; GFX10-NEXT: $sgpr33 = frame-destroy COPY $sgpr4
     ; GFX10-NEXT: SI_RETURN implicit $vgpr0, implicit $sgpr8_sgpr9
     ;
-    ; GFX940-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_non_vcc_live
-    ; GFX940: liveins: $sgpr4, $sgpr5
-    ; GFX940-NEXT: {{  $}}
-    ; GFX940-NEXT: $sgpr4 = frame-setup COPY $sgpr33
-    ; GFX940-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 8191, implicit-def $scc
-    ; GFX940-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294959104, implicit-def dead $scc
-    ; GFX940-NEXT: $sgpr5 = frame-setup COPY $sgpr34
-    ; GFX940-NEXT: $sgpr34 = frame-setup COPY $sgpr32
-    ; GFX940-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 32768, implicit-def dead $scc
-    ; GFX940-NEXT: $sgpr6 = S_ADD_I32 $sgpr33, 12288, implicit-def $scc
-    ; GFX940-NEXT: renamable $vgpr0, renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 64, killed $sgpr6, 0, implicit $exec
-    ; GFX940-NEXT: $sgpr32 = frame-destroy COPY $sgpr34
-    ; GFX940-NEXT: $sgpr34 = frame-destroy COPY $sgpr5
-    ; GFX940-NEXT: $sgpr33 = frame-destroy COPY $sgpr4
-    ; GFX940-NEXT: SI_RETURN implicit $vgpr0, implicit $sgpr8_sgpr9
+    ; GFX942-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_non_vcc_live
+    ; GFX942: liveins: $sgpr4, $sgpr5
+    ; GFX942-NEXT: {{  $}}
+    ; GFX942-NEXT: $sgpr4 = frame-setup COPY $sgpr33
+    ; GFX942-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 8191, implicit-def $scc
+    ; GFX942-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294959104, implicit-def dead $scc
+    ; GFX942-NEXT: $sgpr5 = frame-setup COPY $sgpr34
+    ; GFX942-NEXT: $sgpr34 = frame-setup COPY $sgpr32
+    ; GFX942-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 32768, implicit-def dead $scc
+    ; GFX942-NEXT: $sgpr6 = S_ADD_I32 $sgpr33, 12288, implicit-def $scc
+    ; GFX942-NEXT: renamable $vgpr0, renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 64, killed $sgpr6, 0, implicit $exec
+    ; GFX942-NEXT: $sgpr32 = frame-destroy COPY $sgpr34
+    ; GFX942-NEXT: $sgpr34 = frame-destroy COPY $sgpr5
+    ; GFX942-NEXT: $sgpr33 = frame-destroy COPY $sgpr4
+    ; GFX942-NEXT: SI_RETURN implicit $vgpr0, implicit $sgpr8_sgpr9
     ;
     ; GFX11-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_non_vcc_live
     ; GFX11: liveins: $sgpr4, $sgpr5
@@ -2444,21 +2444,21 @@ body:             |
     ; GFX10-NEXT: $sgpr33 = frame-destroy COPY $sgpr4
     ; GFX10-NEXT: SI_RETURN implicit $vgpr0
     ;
-    ; GFX940-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_vcc
-    ; GFX940: liveins: $sgpr4, $sgpr5
-    ; GFX940-NEXT: {{  $}}
-    ; GFX940-NEXT: $sgpr4 = frame-setup COPY $sgpr33
-    ; GFX940-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 8191, implicit-def $scc
-    ; GFX940-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294959104, implicit-def dead $scc
-    ; GFX940-NEXT: $sgpr5 = frame-setup COPY $sgpr34
-    ; GFX940-NEXT: $sgpr34 = frame-setup COPY $sgpr32
-    ; GFX940-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 32768, implicit-def dead $scc
-    ; GFX940-NEXT: $sgpr6 = S_ADD_I32 $sgpr33, 12288, implicit-def $scc
-    ; GFX940-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 64, killed $sgpr6, 0, implicit $exec
-    ; GFX940-NEXT: $sgpr32 = frame-destroy COPY $sgpr34
-    ; GFX940-NEXT: $sgpr34 = frame-destroy COPY $sgpr5
-    ; GFX940-NEXT: $sgpr33 = frame-destroy COPY $sgpr4
-    ; GFX940-NEXT: SI_RETURN implicit $vgpr0
+    ; GFX942-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_vcc
+    ; GFX942: liveins: $sgpr4, $sgpr5
+    ; GFX942-NEXT: {{  $}}
+    ; GFX942-NEXT: $sgpr4 = frame-setup COPY $sgpr33
+    ; GFX942-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 8191, implicit-def $scc
+    ; GFX942-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294959104, implicit-def dead $scc
+    ; GFX942-NEXT: $sgpr5 = frame-setup COPY $sgpr34
+    ; GFX942-NEXT: $sgpr34 = frame-setup COPY $sgpr32
+    ; GFX942-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 32768, implicit-def dead $scc
+    ; GFX942-NEXT: $sgpr6 = S_ADD_I32 $sgpr33, 12288, implicit-def $scc
+    ; GFX942-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 64, killed $sgpr6, 0, implicit $exec
+    ; GFX942-NEXT: $sgpr32 = frame-destroy COPY $sgpr34
+    ; GFX942-NEXT: $sgpr34 = frame-destroy COPY $sgpr5
+    ; GFX942-NEXT: $sgpr33 = frame-destroy COPY $sgpr4
+    ; GFX942-NEXT: SI_RETURN implicit $vgpr0
     ;
     ; GFX11-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_vcc
     ; GFX11: liveins: $sgpr4, $sgpr5
@@ -2597,21 +2597,21 @@ body:             |
     ; GFX10-NEXT: $sgpr33 = frame-destroy COPY $sgpr4
     ; GFX10-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc
     ;
-    ; GFX940-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_vcc_live
-    ; GFX940: liveins: $sgpr4, $sgpr5
-    ; GFX940-NEXT: {{  $}}
-    ; GFX940-NEXT: $sgpr4 = frame-setup COPY $sgpr33
-    ; GFX940-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 8191, implicit-def $scc
-    ; GFX940-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294959104, implicit-def dead $scc
-    ; GFX940-NEXT: $sgpr5 = frame-setup COPY $sgpr34
-    ; GFX940-NEXT: $sgpr34 = frame-setup COPY $sgpr32
-    ; GFX940-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 32768, implicit-def dead $scc
-    ; GFX940-NEXT: $sgpr6 = S_ADD_I32 $sgpr33, 12288, implicit-def $scc
-    ; GFX940-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 64, killed $sgpr6, 0, implicit $exec
-    ; GFX940-NEXT: $sgpr32 = frame-destroy COPY $sgpr34
-    ; GFX940-NEXT: $sgpr34 = frame-destroy COPY $sgpr5
-    ; GFX940-NEXT: $sgpr33 = frame-destroy COPY $sgpr4
-    ; GFX940-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc
+    ; GFX942-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_vcc_live
+    ; GFX942: liveins: $sgpr4, $sgpr5
+    ; GFX942-NEXT: {{  $}}
+    ; GFX942-NEXT: $sgpr4 = frame-setup COPY $sgpr33
+    ; GFX942-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 8191, implicit-def $scc
+    ; GFX942-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294959104, implicit-def dead $scc
+    ; GFX942-NEXT: $sgpr5 = frame-setup COPY $sgpr34
+    ; GFX942-NEXT: $sgpr34 = frame-setup COPY $sgpr32
+    ; GFX942-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 32768, implicit-def dead $scc
+    ; GFX942-NEXT: $sgpr6 = S_ADD_I32 $sgpr33, 12288, implicit-def $scc
+    ; GFX942-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 64, killed $sgpr6, 0, implicit $exec
+    ; GFX942-NEXT: $sgpr32 = frame-destroy COPY $sgpr34
+    ; GFX942-NEXT: $sgpr34 = frame-destroy COPY $sgpr5
+    ; GFX942-NEXT: $sgpr33 = frame-destroy COPY $sgpr4
+    ; GFX942-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc
     ;
     ; GFX11-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_vcc_live
     ; GFX11: liveins: $sgpr4, $sgpr5
diff --git llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-u32.mir llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-u32.mir
index b7a5cf963138..6a4671058dc0 100644
--- llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-u32.mir
+++ llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-u32.mir
@@ -2,7 +2,7 @@
 # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck -check-prefix=MUBUF %s
 # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck -check-prefix=MUBUF %s
 # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck -check-prefix=MUBUFW32 %s
-# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck -check-prefix=FLATSCRW64 %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck -check-prefix=FLATSCRW64 %s
 # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck -check-prefix=FLATSCRW32 %s
 # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck -check-prefix=FLATSCRW32 %s
 
diff --git llvm/test/CodeGen/AMDGPU/flat-atomic-fadd.f32.ll llvm/test/CodeGen/AMDGPU/flat-atomic-fadd.f32.ll
index ef180cef7ed2..997432de6528 100644
--- llvm/test/CodeGen/AMDGPU/flat-atomic-fadd.f32.ll
+++ llvm/test/CodeGen/AMDGPU/flat-atomic-fadd.f32.ll
@@ -1,21 +1,21 @@
 ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-; RUN: llc -mtriple=amdgcn -mcpu=gfx940 -verify-machineinstrs -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX940 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -verify-machineinstrs -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX942 %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX11 %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx940 -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX940 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX942 %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX11 %s
 
 define amdgpu_ps void @flat_atomic_fadd_f32_no_rtn_intrinsic(ptr %ptr, float %data) {
-  ; GFX940-LABEL: name: flat_atomic_fadd_f32_no_rtn_intrinsic
-  ; GFX940: bb.0 (%ir-block.0):
-  ; GFX940-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2
-  ; GFX940-NEXT: {{  $}}
-  ; GFX940-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2
-  ; GFX940-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-  ; GFX940-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
-  ; GFX940-NEXT:   [[COPY3:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]]
-  ; GFX940-NEXT:   FLAT_ATOMIC_ADD_F32 killed [[COPY3]], [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load store syncscope("agent") seq_cst (s32) on %ir.ptr)
-  ; GFX940-NEXT:   S_ENDPGM 0
+  ; GFX942-LABEL: name: flat_atomic_fadd_f32_no_rtn_intrinsic
+  ; GFX942: bb.0 (%ir-block.0):
+  ; GFX942-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2
+  ; GFX942-NEXT: {{  $}}
+  ; GFX942-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+  ; GFX942-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+  ; GFX942-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
+  ; GFX942-NEXT:   [[COPY3:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]]
+  ; GFX942-NEXT:   FLAT_ATOMIC_ADD_F32 killed [[COPY3]], [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load store syncscope("agent") seq_cst (s32) on %ir.ptr)
+  ; GFX942-NEXT:   S_ENDPGM 0
   ;
   ; GFX11-LABEL: name: flat_atomic_fadd_f32_no_rtn_intrinsic
   ; GFX11: bb.0 (%ir-block.0):
@@ -33,18 +33,18 @@ define amdgpu_ps void @flat_atomic_fadd_f32_no_rtn_intrinsic(ptr %ptr, float %da
 }
 
 define amdgpu_ps float @flat_atomic_fadd_f32_rtn_intrinsic(ptr %ptr, float %data) {
-  ; GFX940-LABEL: name: flat_atomic_fadd_f32_rtn_intrinsic
-  ; GFX940: bb.0 (%ir-block.0):
-  ; GFX940-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2
-  ; GFX940-NEXT: {{  $}}
-  ; GFX940-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2
-  ; GFX940-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-  ; GFX940-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
-  ; GFX940-NEXT:   [[COPY3:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]]
-  ; GFX940-NEXT:   [[FLAT_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_F32_RTN killed [[COPY3]], [[COPY]], 0, 1, implicit $exec, implicit $flat_scr :: (load store syncscope("agent") seq_cst (s32) on %ir.ptr)
-  ; GFX940-NEXT:   $vgpr0 = COPY [[FLAT_ATOMIC_ADD_F32_RTN]]
-  ; GFX940-NEXT:   SI_RETURN_TO_EPILOG $vgpr0
+  ; GFX942-LABEL: name: flat_atomic_fadd_f32_rtn_intrinsic
+  ; GFX942: bb.0 (%ir-block.0):
+  ; GFX942-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2
+  ; GFX942-NEXT: {{  $}}
+  ; GFX942-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+  ; GFX942-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+  ; GFX942-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
+  ; GFX942-NEXT:   [[COPY3:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]]
+  ; GFX942-NEXT:   [[FLAT_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_F32_RTN killed [[COPY3]], [[COPY]], 0, 1, implicit $exec, implicit $flat_scr :: (load store syncscope("agent") seq_cst (s32) on %ir.ptr)
+  ; GFX942-NEXT:   $vgpr0 = COPY [[FLAT_ATOMIC_ADD_F32_RTN]]
+  ; GFX942-NEXT:   SI_RETURN_TO_EPILOG $vgpr0
   ;
   ; GFX11-LABEL: name: flat_atomic_fadd_f32_rtn_intrinsic
   ; GFX11: bb.0 (%ir-block.0):
@@ -63,17 +63,17 @@ define amdgpu_ps float @flat_atomic_fadd_f32_rtn_intrinsic(ptr %ptr, float %data
 }
 
 define amdgpu_ps void @flat_atomic_fadd_f32_no_rtn_atomicrmw(ptr %ptr, float %data) #0 {
-  ; GFX940-LABEL: name: flat_atomic_fadd_f32_no_rtn_atomicrmw
-  ; GFX940: bb.0 (%ir-block.0):
-  ; GFX940-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2
-  ; GFX940-NEXT: {{  $}}
-  ; GFX940-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2
-  ; GFX940-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-  ; GFX940-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
-  ; GFX940-NEXT:   [[COPY3:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]]
-  ; GFX940-NEXT:   FLAT_ATOMIC_ADD_F32 killed [[COPY3]], [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr)
-  ; GFX940-NEXT:   S_ENDPGM 0
+  ; GFX942-LABEL: name: flat_atomic_fadd_f32_no_rtn_atomicrmw
+  ; GFX942: bb.0 (%ir-block.0):
+  ; GFX942-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2
+  ; GFX942-NEXT: {{  $}}
+  ; GFX942-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+  ; GFX942-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+  ; GFX942-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
+  ; GFX942-NEXT:   [[COPY3:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]]
+  ; GFX942-NEXT:   FLAT_ATOMIC_ADD_F32 killed [[COPY3]], [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr)
+  ; GFX942-NEXT:   S_ENDPGM 0
   ;
   ; GFX11-LABEL: name: flat_atomic_fadd_f32_no_rtn_atomicrmw
   ; GFX11: bb.0 (%ir-block.0):
@@ -91,18 +91,18 @@ define amdgpu_ps void @flat_atomic_fadd_f32_no_rtn_atomicrmw(ptr %ptr, float %da
 }
 
 define amdgpu_ps float @flat_atomic_fadd_f32_rtn_atomicrmw(ptr %ptr, float %data) #0 {
-  ; GFX940-LABEL: name: flat_atomic_fadd_f32_rtn_atomicrmw
-  ; GFX940: bb.0 (%ir-block.0):
-  ; GFX940-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2
-  ; GFX940-NEXT: {{  $}}
-  ; GFX940-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2
-  ; GFX940-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-  ; GFX940-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
-  ; GFX940-NEXT:   [[COPY3:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]]
-  ; GFX940-NEXT:   [[FLAT_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_F32_RTN killed [[COPY3]], [[COPY]], 0, 1, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr)
-  ; GFX940-NEXT:   $vgpr0 = COPY [[FLAT_ATOMIC_ADD_F32_RTN]]
-  ; GFX940-NEXT:   SI_RETURN_TO_EPILOG $vgpr0
+  ; GFX942-LABEL: name: flat_atomic_fadd_f32_rtn_atomicrmw
+  ; GFX942: bb.0 (%ir-block.0):
+  ; GFX942-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2
+  ; GFX942-NEXT: {{  $}}
+  ; GFX942-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+  ; GFX942-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+  ; GFX942-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
+  ; GFX942-NEXT:   [[COPY3:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]]
+  ; GFX942-NEXT:   [[FLAT_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_F32_RTN killed [[COPY3]], [[COPY]], 0, 1, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr)
+  ; GFX942-NEXT:   $vgpr0 = COPY [[FLAT_ATOMIC_ADD_F32_RTN]]
+  ; GFX942-NEXT:   SI_RETURN_TO_EPILOG $vgpr0
   ;
   ; GFX11-LABEL: name: flat_atomic_fadd_f32_rtn_atomicrmw
   ; GFX11: bb.0 (%ir-block.0):
diff --git llvm/test/CodeGen/AMDGPU/flat-atomic-fadd.f64.ll llvm/test/CodeGen/AMDGPU/flat-atomic-fadd.f64.ll
index d64becc74ddc..36714b386e7e 100644
--- llvm/test/CodeGen/AMDGPU/flat-atomic-fadd.f64.ll
+++ llvm/test/CodeGen/AMDGPU/flat-atomic-fadd.f64.ll
@@ -1,153 +1,153 @@
 ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs -stop-after=finalize-isel < %s | FileCheck -check-prefix=GFX90A_GFX940 %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx940 -verify-machineinstrs -stop-after=finalize-isel < %s | FileCheck -check-prefix=GFX90A_GFX940 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs -stop-after=finalize-isel < %s | FileCheck -check-prefix=GFX90A_GFX942 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -verify-machineinstrs -stop-after=finalize-isel < %s | FileCheck -check-prefix=GFX90A_GFX942 %s
 
 define amdgpu_ps void @flat_atomic_fadd_f64_no_rtn_intrinsic(ptr %ptr, double %data) {
-  ; GFX90A_GFX940-LABEL: name: flat_atomic_fadd_f64_no_rtn_intrinsic
-  ; GFX90A_GFX940: bb.0 (%ir-block.0):
-  ; GFX90A_GFX940-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
-  ; GFX90A_GFX940-NEXT: {{  $}}
-  ; GFX90A_GFX940-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr3
-  ; GFX90A_GFX940-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GFX90A_GFX940-NEXT:   [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
-  ; GFX90A_GFX940-NEXT:   [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY]], %subreg.sub1
-  ; GFX90A_GFX940-NEXT:   [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
-  ; GFX90A_GFX940-NEXT:   [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY4:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]]
-  ; GFX90A_GFX940-NEXT:   [[COPY5:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]]
-  ; GFX90A_GFX940-NEXT:   FLAT_ATOMIC_ADD_F64 killed [[COPY4]], killed [[COPY5]], 0, 0, implicit $exec, implicit $flat_scr :: (load store syncscope("agent") seq_cst (s64) on %ir.ptr)
-  ; GFX90A_GFX940-NEXT:   S_ENDPGM 0
+  ; GFX90A_GFX942-LABEL: name: flat_atomic_fadd_f64_no_rtn_intrinsic
+  ; GFX90A_GFX942: bb.0 (%ir-block.0):
+  ; GFX90A_GFX942-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
+  ; GFX90A_GFX942-NEXT: {{  $}}
+  ; GFX90A_GFX942-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+  ; GFX90A_GFX942-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GFX90A_GFX942-NEXT:   [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
+  ; GFX90A_GFX942-NEXT:   [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY]], %subreg.sub1
+  ; GFX90A_GFX942-NEXT:   [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
+  ; GFX90A_GFX942-NEXT:   [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY4:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]]
+  ; GFX90A_GFX942-NEXT:   [[COPY5:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]]
+  ; GFX90A_GFX942-NEXT:   FLAT_ATOMIC_ADD_F64 killed [[COPY4]], killed [[COPY5]], 0, 0, implicit $exec, implicit $flat_scr :: (load store syncscope("agent") seq_cst (s64) on %ir.ptr)
+  ; GFX90A_GFX942-NEXT:   S_ENDPGM 0
   %ret = call double @llvm.amdgcn.flat.atomic.fadd.f64.p1.f64(ptr %ptr, double %data)
   ret void
 }
 
 define amdgpu_ps double @flat_atomic_fadd_f64_rtn_intrinsic(ptr %ptr, double %data) {
-  ; GFX90A_GFX940-LABEL: name: flat_atomic_fadd_f64_rtn_intrinsic
-  ; GFX90A_GFX940: bb.0 (%ir-block.0):
-  ; GFX90A_GFX940-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
-  ; GFX90A_GFX940-NEXT: {{  $}}
-  ; GFX90A_GFX940-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr3
-  ; GFX90A_GFX940-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GFX90A_GFX940-NEXT:   [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
-  ; GFX90A_GFX940-NEXT:   [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY]], %subreg.sub1
-  ; GFX90A_GFX940-NEXT:   [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
-  ; GFX90A_GFX940-NEXT:   [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY4:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]]
-  ; GFX90A_GFX940-NEXT:   [[COPY5:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]]
-  ; GFX90A_GFX940-NEXT:   [[FLAT_ATOMIC_ADD_F64_RTN:%[0-9]+]]:vreg_64_align2 = FLAT_ATOMIC_ADD_F64_RTN killed [[COPY4]], killed [[COPY5]], 0, 1, implicit $exec, implicit $flat_scr :: (load store syncscope("agent") seq_cst (s64) on %ir.ptr)
-  ; GFX90A_GFX940-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[FLAT_ATOMIC_ADD_F64_RTN]].sub0
-  ; GFX90A_GFX940-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[FLAT_ATOMIC_ADD_F64_RTN]].sub1
-  ; GFX90A_GFX940-NEXT:   $sgpr0 = COPY [[COPY6]]
-  ; GFX90A_GFX940-NEXT:   $sgpr1 = COPY [[COPY7]]
-  ; GFX90A_GFX940-NEXT:   SI_RETURN_TO_EPILOG $sgpr0, $sgpr1
+  ; GFX90A_GFX942-LABEL: name: flat_atomic_fadd_f64_rtn_intrinsic
+  ; GFX90A_GFX942: bb.0 (%ir-block.0):
+  ; GFX90A_GFX942-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
+  ; GFX90A_GFX942-NEXT: {{  $}}
+  ; GFX90A_GFX942-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+  ; GFX90A_GFX942-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GFX90A_GFX942-NEXT:   [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
+  ; GFX90A_GFX942-NEXT:   [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY]], %subreg.sub1
+  ; GFX90A_GFX942-NEXT:   [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
+  ; GFX90A_GFX942-NEXT:   [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY4:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]]
+  ; GFX90A_GFX942-NEXT:   [[COPY5:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]]
+  ; GFX90A_GFX942-NEXT:   [[FLAT_ATOMIC_ADD_F64_RTN:%[0-9]+]]:vreg_64_align2 = FLAT_ATOMIC_ADD_F64_RTN killed [[COPY4]], killed [[COPY5]], 0, 1, implicit $exec, implicit $flat_scr :: (load store syncscope("agent") seq_cst (s64) on %ir.ptr)
+  ; GFX90A_GFX942-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[FLAT_ATOMIC_ADD_F64_RTN]].sub0
+  ; GFX90A_GFX942-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[FLAT_ATOMIC_ADD_F64_RTN]].sub1
+  ; GFX90A_GFX942-NEXT:   $sgpr0 = COPY [[COPY6]]
+  ; GFX90A_GFX942-NEXT:   $sgpr1 = COPY [[COPY7]]
+  ; GFX90A_GFX942-NEXT:   SI_RETURN_TO_EPILOG $sgpr0, $sgpr1
   %ret = call double @llvm.amdgcn.flat.atomic.fadd.f64.p1.f64(ptr %ptr, double %data)
   ret double %ret
 }
 
 define amdgpu_ps void @flat_atomic_fadd_f64_no_rtn_atomicrmw(ptr %ptr, double %data) #0 {
-  ; GFX90A_GFX940-LABEL: name: flat_atomic_fadd_f64_no_rtn_atomicrmw
-  ; GFX90A_GFX940: bb.0 (%ir-block.0):
-  ; GFX90A_GFX940-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
-  ; GFX90A_GFX940-NEXT: {{  $}}
-  ; GFX90A_GFX940-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr3
-  ; GFX90A_GFX940-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GFX90A_GFX940-NEXT:   [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
-  ; GFX90A_GFX940-NEXT:   [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY]], %subreg.sub1
-  ; GFX90A_GFX940-NEXT:   [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
-  ; GFX90A_GFX940-NEXT:   [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY4:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]]
-  ; GFX90A_GFX940-NEXT:   [[COPY5:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]]
-  ; GFX90A_GFX940-NEXT:   FLAT_ATOMIC_ADD_F64 killed [[COPY4]], killed [[COPY5]], 0, 0, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr)
-  ; GFX90A_GFX940-NEXT:   S_ENDPGM 0
+  ; GFX90A_GFX942-LABEL: name: flat_atomic_fadd_f64_no_rtn_atomicrmw
+  ; GFX90A_GFX942: bb.0 (%ir-block.0):
+  ; GFX90A_GFX942-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
+  ; GFX90A_GFX942-NEXT: {{  $}}
+  ; GFX90A_GFX942-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+  ; GFX90A_GFX942-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GFX90A_GFX942-NEXT:   [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
+  ; GFX90A_GFX942-NEXT:   [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY]], %subreg.sub1
+  ; GFX90A_GFX942-NEXT:   [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
+  ; GFX90A_GFX942-NEXT:   [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY4:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]]
+  ; GFX90A_GFX942-NEXT:   [[COPY5:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]]
+  ; GFX90A_GFX942-NEXT:   FLAT_ATOMIC_ADD_F64 killed [[COPY4]], killed [[COPY5]], 0, 0, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr)
+  ; GFX90A_GFX942-NEXT:   S_ENDPGM 0
   %ret = atomicrmw fadd ptr %ptr, double %data syncscope("wavefront") monotonic, !noalias.addrspace !1, !amdgpu.no.fine.grained.memory !0
   ret void
 }
 
 define amdgpu_ps void @flat_atomic_fadd_f64_no_rtn_atomicrmw_noprivate(ptr %ptr, double %data) #0 {
-  ; GFX90A_GFX940-LABEL: name: flat_atomic_fadd_f64_no_rtn_atomicrmw_noprivate
-  ; GFX90A_GFX940: bb.0 (%ir-block.0):
-  ; GFX90A_GFX940-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
-  ; GFX90A_GFX940-NEXT: {{  $}}
-  ; GFX90A_GFX940-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr3
-  ; GFX90A_GFX940-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GFX90A_GFX940-NEXT:   [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
-  ; GFX90A_GFX940-NEXT:   [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY]], %subreg.sub1
-  ; GFX90A_GFX940-NEXT:   [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
-  ; GFX90A_GFX940-NEXT:   [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY4:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]]
-  ; GFX90A_GFX940-NEXT:   [[COPY5:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]]
-  ; GFX90A_GFX940-NEXT:   FLAT_ATOMIC_ADD_F64 killed [[COPY4]], killed [[COPY5]], 0, 0, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr)
-  ; GFX90A_GFX940-NEXT:   S_ENDPGM 0
+  ; GFX90A_GFX942-LABEL: name: flat_atomic_fadd_f64_no_rtn_atomicrmw_noprivate
+  ; GFX90A_GFX942: bb.0 (%ir-block.0):
+  ; GFX90A_GFX942-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
+  ; GFX90A_GFX942-NEXT: {{  $}}
+  ; GFX90A_GFX942-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+  ; GFX90A_GFX942-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GFX90A_GFX942-NEXT:   [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
+  ; GFX90A_GFX942-NEXT:   [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY]], %subreg.sub1
+  ; GFX90A_GFX942-NEXT:   [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
+  ; GFX90A_GFX942-NEXT:   [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY4:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]]
+  ; GFX90A_GFX942-NEXT:   [[COPY5:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]]
+  ; GFX90A_GFX942-NEXT:   FLAT_ATOMIC_ADD_F64 killed [[COPY4]], killed [[COPY5]], 0, 0, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr)
+  ; GFX90A_GFX942-NEXT:   S_ENDPGM 0
   %ret = atomicrmw fadd ptr %ptr, double %data syncscope("wavefront") monotonic, !noalias.addrspace !1, !amdgpu.no.fine.grained.memory !0
   ret void
 }
 
 define amdgpu_ps double @flat_atomic_fadd_f64_rtn_atomicrmw(ptr %ptr, double %data) #0 {
-  ; GFX90A_GFX940-LABEL: name: flat_atomic_fadd_f64_rtn_atomicrmw
-  ; GFX90A_GFX940: bb.0 (%ir-block.0):
-  ; GFX90A_GFX940-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
-  ; GFX90A_GFX940-NEXT: {{  $}}
-  ; GFX90A_GFX940-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr3
-  ; GFX90A_GFX940-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GFX90A_GFX940-NEXT:   [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
-  ; GFX90A_GFX940-NEXT:   [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY]], %subreg.sub1
-  ; GFX90A_GFX940-NEXT:   [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
-  ; GFX90A_GFX940-NEXT:   [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY4:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]]
-  ; GFX90A_GFX940-NEXT:   [[COPY5:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]]
-  ; GFX90A_GFX940-NEXT:   [[FLAT_ATOMIC_ADD_F64_RTN:%[0-9]+]]:vreg_64_align2 = FLAT_ATOMIC_ADD_F64_RTN killed [[COPY4]], killed [[COPY5]], 0, 1, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr)
-  ; GFX90A_GFX940-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[FLAT_ATOMIC_ADD_F64_RTN]].sub0
-  ; GFX90A_GFX940-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[FLAT_ATOMIC_ADD_F64_RTN]].sub1
-  ; GFX90A_GFX940-NEXT:   $sgpr0 = COPY [[COPY6]]
-  ; GFX90A_GFX940-NEXT:   $sgpr1 = COPY [[COPY7]]
-  ; GFX90A_GFX940-NEXT:   SI_RETURN_TO_EPILOG $sgpr0, $sgpr1
+  ; GFX90A_GFX942-LABEL: name: flat_atomic_fadd_f64_rtn_atomicrmw
+  ; GFX90A_GFX942: bb.0 (%ir-block.0):
+  ; GFX90A_GFX942-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
+  ; GFX90A_GFX942-NEXT: {{  $}}
+  ; GFX90A_GFX942-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+  ; GFX90A_GFX942-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GFX90A_GFX942-NEXT:   [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
+  ; GFX90A_GFX942-NEXT:   [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY]], %subreg.sub1
+  ; GFX90A_GFX942-NEXT:   [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
+  ; GFX90A_GFX942-NEXT:   [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY4:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]]
+  ; GFX90A_GFX942-NEXT:   [[COPY5:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]]
+  ; GFX90A_GFX942-NEXT:   [[FLAT_ATOMIC_ADD_F64_RTN:%[0-9]+]]:vreg_64_align2 = FLAT_ATOMIC_ADD_F64_RTN killed [[COPY4]], killed [[COPY5]], 0, 1, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr)
+  ; GFX90A_GFX942-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[FLAT_ATOMIC_ADD_F64_RTN]].sub0
+  ; GFX90A_GFX942-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[FLAT_ATOMIC_ADD_F64_RTN]].sub1
+  ; GFX90A_GFX942-NEXT:   $sgpr0 = COPY [[COPY6]]
+  ; GFX90A_GFX942-NEXT:   $sgpr1 = COPY [[COPY7]]
+  ; GFX90A_GFX942-NEXT:   SI_RETURN_TO_EPILOG $sgpr0, $sgpr1
   %ret = atomicrmw fadd ptr %ptr, double %data syncscope("wavefront") monotonic, !noalias.addrspace !1, !amdgpu.no.fine.grained.memory !0
   ret double %ret
 }
 
 define amdgpu_ps double @flat_atomic_fadd_f64_rtn_atomicrmw__noprivate(ptr %ptr, double %data) #0 {
-  ; GFX90A_GFX940-LABEL: name: flat_atomic_fadd_f64_rtn_atomicrmw__noprivate
-  ; GFX90A_GFX940: bb.0 (%ir-block.0):
-  ; GFX90A_GFX940-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
-  ; GFX90A_GFX940-NEXT: {{  $}}
-  ; GFX90A_GFX940-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr3
-  ; GFX90A_GFX940-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GFX90A_GFX940-NEXT:   [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
-  ; GFX90A_GFX940-NEXT:   [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY]], %subreg.sub1
-  ; GFX90A_GFX940-NEXT:   [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
-  ; GFX90A_GFX940-NEXT:   [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY4:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]]
-  ; GFX90A_GFX940-NEXT:   [[COPY5:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]]
-  ; GFX90A_GFX940-NEXT:   [[FLAT_ATOMIC_ADD_F64_RTN:%[0-9]+]]:vreg_64_align2 = FLAT_ATOMIC_ADD_F64_RTN killed [[COPY4]], killed [[COPY5]], 0, 1, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr)
-  ; GFX90A_GFX940-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[FLAT_ATOMIC_ADD_F64_RTN]].sub0
-  ; GFX90A_GFX940-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[FLAT_ATOMIC_ADD_F64_RTN]].sub1
-  ; GFX90A_GFX940-NEXT:   $sgpr0 = COPY [[COPY6]]
-  ; GFX90A_GFX940-NEXT:   $sgpr1 = COPY [[COPY7]]
-  ; GFX90A_GFX940-NEXT:   SI_RETURN_TO_EPILOG $sgpr0, $sgpr1
+  ; GFX90A_GFX942-LABEL: name: flat_atomic_fadd_f64_rtn_atomicrmw__noprivate
+  ; GFX90A_GFX942: bb.0 (%ir-block.0):
+  ; GFX90A_GFX942-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
+  ; GFX90A_GFX942-NEXT: {{  $}}
+  ; GFX90A_GFX942-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+  ; GFX90A_GFX942-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GFX90A_GFX942-NEXT:   [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
+  ; GFX90A_GFX942-NEXT:   [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY]], %subreg.sub1
+  ; GFX90A_GFX942-NEXT:   [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
+  ; GFX90A_GFX942-NEXT:   [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY4:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]]
+  ; GFX90A_GFX942-NEXT:   [[COPY5:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]]
+  ; GFX90A_GFX942-NEXT:   [[FLAT_ATOMIC_ADD_F64_RTN:%[0-9]+]]:vreg_64_align2 = FLAT_ATOMIC_ADD_F64_RTN killed [[COPY4]], killed [[COPY5]], 0, 1, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr)
+  ; GFX90A_GFX942-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[FLAT_ATOMIC_ADD_F64_RTN]].sub0
+  ; GFX90A_GFX942-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[FLAT_ATOMIC_ADD_F64_RTN]].sub1
+  ; GFX90A_GFX942-NEXT:   $sgpr0 = COPY [[COPY6]]
+  ; GFX90A_GFX942-NEXT:   $sgpr1 = COPY [[COPY7]]
+  ; GFX90A_GFX942-NEXT:   SI_RETURN_TO_EPILOG $sgpr0, $sgpr1
   %ret = atomicrmw fadd ptr %ptr, double %data syncscope("wavefront") monotonic, !noalias.addrspace !1, !amdgpu.no.fine.grained.memory !0
   ret double %ret
 }
diff --git llvm/test/CodeGen/AMDGPU/flat-atomicrmw-fadd.ll llvm/test/CodeGen/AMDGPU/flat-atomicrmw-fadd.ll
index 707cae953483..2825d9048660 100644
--- llvm/test/CodeGen/AMDGPU/flat-atomicrmw-fadd.ll
+++ llvm/test/CodeGen/AMDGPU/flat-atomicrmw-fadd.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
 ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 < %s | FileCheck -check-prefix=GFX12 %s
-; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx940 < %s | FileCheck -check-prefix=GFX940 %s
+; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx942 < %s | FileCheck -check-prefix=GFX942 %s
 ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 < %s | FileCheck -check-prefix=GFX11 %s
 ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s
 ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx90a < %s | FileCheck -check-prefix=GFX90A %s
@@ -26,14 +26,14 @@ define float @flat_agent_atomic_fadd_ret_f32__amdgpu_no_fine_grained_memory__amd
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fadd_ret_f32__amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_add_f32 v0, v[0:1], v2 sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fadd_ret_f32__amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_add_f32 v0, v[0:1], v2 sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fadd_ret_f32__amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode:
 ; GFX11:       ; %bb.0:
@@ -202,14 +202,14 @@ define float @flat_agent_atomic_fadd_ret_f32__offset12b_pos__amdgpu_no_fine_grai
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fadd_ret_f32__offset12b_pos__amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_add_f32 v0, v[0:1], v2 offset:2044 sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fadd_ret_f32__offset12b_pos__amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_add_f32 v0, v[0:1], v2 offset:2044 sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fadd_ret_f32__offset12b_pos__amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode:
 ; GFX11:       ; %bb.0:
@@ -388,17 +388,17 @@ define float @flat_agent_atomic_fadd_ret_f32__offset12b_neg__amdgpu_no_fine_grai
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fadd_ret_f32__offset12b_neg__amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_add_co_u32_e32 v0, vcc, 0xfffff800, v0
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_add_f32 v0, v[0:1], v2 sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fadd_ret_f32__offset12b_neg__amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_add_co_u32_e32 v0, vcc, 0xfffff800, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_add_f32 v0, v[0:1], v2 sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fadd_ret_f32__offset12b_neg__amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode:
 ; GFX11:       ; %bb.0:
@@ -582,14 +582,14 @@ define void @flat_agent_atomic_fadd_noret_f32__amdgpu_no_fine_grained_memory__am
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fadd_noret_f32__amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_add_f32 v[0:1], v2
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fadd_noret_f32__amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_add_f32 v[0:1], v2
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fadd_noret_f32__amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode:
 ; GFX11:       ; %bb.0:
@@ -788,14 +788,14 @@ define void @flat_agent_atomic_fadd_noret_f32__offset12b_pos__amdgpu_no_fine_gra
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fadd_noret_f32__offset12b_pos__amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_add_f32 v[0:1], v2 offset:2044
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fadd_noret_f32__offset12b_pos__amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_add_f32 v[0:1], v2 offset:2044
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fadd_noret_f32__offset12b_pos__amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode:
 ; GFX11:       ; %bb.0:
@@ -1005,17 +1005,17 @@ define void @flat_agent_atomic_fadd_noret_f32__offset12b_neg__amdgpu_no_fine_gra
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fadd_noret_f32__offset12b_neg__amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_add_co_u32_e32 v0, vcc, 0xfffff800, v0
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_add_f32 v[0:1], v2
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fadd_noret_f32__offset12b_neg__amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_add_co_u32_e32 v0, vcc, 0xfffff800, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_add_f32 v[0:1], v2
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fadd_noret_f32__offset12b_neg__amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode:
 ; GFX11:       ; %bb.0:
@@ -1228,14 +1228,14 @@ define float @flat_system_atomic_fadd_ret_f32__offset12b_pos__amdgpu_no_fine_gra
 ; GFX12-NEXT:    global_inv scope:SCOPE_SYS
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_system_atomic_fadd_ret_f32__offset12b_pos__amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_wbl2 sc0 sc1
-; GFX940-NEXT:    flat_atomic_add_f32 v0, v[0:1], v2 offset:2044 sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc0 sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_system_atomic_fadd_ret_f32__offset12b_pos__amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_wbl2 sc0 sc1
+; GFX942-NEXT:    flat_atomic_add_f32 v0, v[0:1], v2 offset:2044 sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc0 sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_system_atomic_fadd_ret_f32__offset12b_pos__amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode:
 ; GFX11:       ; %bb.0:
@@ -1417,14 +1417,14 @@ define void @flat_system_atomic_fadd_noret_f32__offset12b_pos__amdgpu_no_fine_gr
 ; GFX12-NEXT:    global_inv scope:SCOPE_SYS
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_system_atomic_fadd_noret_f32__offset12b_pos__amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_wbl2 sc0 sc1
-; GFX940-NEXT:    flat_atomic_add_f32 v[0:1], v2 offset:2044 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc0 sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_system_atomic_fadd_noret_f32__offset12b_pos__amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_wbl2 sc0 sc1
+; GFX942-NEXT:    flat_atomic_add_f32 v[0:1], v2 offset:2044 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc0 sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_system_atomic_fadd_noret_f32__offset12b_pos__amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode:
 ; GFX11:       ; %bb.0:
@@ -1636,14 +1636,14 @@ define void @flat_agent_atomic_fadd_noret_f32_maybe_remote(ptr %ptr, float %val)
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fadd_noret_f32_maybe_remote:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_add_f32 v[0:1], v2 offset:2044
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fadd_noret_f32_maybe_remote:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_add_f32 v[0:1], v2 offset:2044
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fadd_noret_f32_maybe_remote:
 ; GFX11:       ; %bb.0:
@@ -1800,14 +1800,14 @@ define void @flat_agent_atomic_fadd_noret_f32___amdgpu_no_fine_grained_memory(pt
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fadd_noret_f32___amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_add_f32 v[0:1], v2 offset:2044
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fadd_noret_f32___amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_add_f32 v[0:1], v2 offset:2044
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fadd_noret_f32___amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -1951,14 +1951,14 @@ define void @flat_agent_atomic_fadd_noret_f32___amdgpu_no_fine_grained_memory__a
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fadd_noret_f32___amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_add_f32 v[0:1], v2 offset:2044
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fadd_noret_f32___amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_add_f32 v[0:1], v2 offset:2044
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fadd_noret_f32___amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode:
 ; GFX11:       ; %bb.0:
@@ -2168,14 +2168,14 @@ define void @flat_agent_atomic_fadd_noret_f32_amdgpu_ignore_denormal_mode(ptr %p
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fadd_noret_f32_amdgpu_ignore_denormal_mode:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_add_f32 v[0:1], v2 offset:2044
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fadd_noret_f32_amdgpu_ignore_denormal_mode:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_add_f32 v[0:1], v2 offset:2044
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fadd_noret_f32_amdgpu_ignore_denormal_mode:
 ; GFX11:       ; %bb.0:
@@ -2336,14 +2336,14 @@ define float @flat_agent_atomic_fadd_ret_f32__ftz__amdgpu_no_fine_grained_memory
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fadd_ret_f32__ftz__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_add_f32 v0, v[0:1], v2 sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fadd_ret_f32__ftz__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_add_f32 v0, v[0:1], v2 sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fadd_ret_f32__ftz__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -2512,14 +2512,14 @@ define float @flat_agent_atomic_fadd_ret_f32__offset12b_pos__ftz__amdgpu_no_fine
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fadd_ret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_add_f32 v0, v[0:1], v2 offset:2044 sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fadd_ret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_add_f32 v0, v[0:1], v2 offset:2044 sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fadd_ret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -2698,17 +2698,17 @@ define float @flat_agent_atomic_fadd_ret_f32__offset12b_neg__ftz__amdgpu_no_fine
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fadd_ret_f32__offset12b_neg__ftz__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_add_co_u32_e32 v0, vcc, 0xfffff800, v0
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_add_f32 v0, v[0:1], v2 sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fadd_ret_f32__offset12b_neg__ftz__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_add_co_u32_e32 v0, vcc, 0xfffff800, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_add_f32 v0, v[0:1], v2 sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fadd_ret_f32__offset12b_neg__ftz__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -2892,14 +2892,14 @@ define void @flat_agent_atomic_fadd_noret_f32__ftz__amdgpu_no_fine_grained_memor
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fadd_noret_f32__ftz__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_add_f32 v[0:1], v2
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fadd_noret_f32__ftz__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_add_f32 v[0:1], v2
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fadd_noret_f32__ftz__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -3098,14 +3098,14 @@ define void @flat_agent_atomic_fadd_noret_f32__offset12b_pos__ftz__amdgpu_no_fin
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fadd_noret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_add_f32 v[0:1], v2 offset:2044
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fadd_noret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_add_f32 v[0:1], v2 offset:2044
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fadd_noret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -3315,17 +3315,17 @@ define void @flat_agent_atomic_fadd_noret_f32__offset12b_neg__ftz__amdgpu_no_fin
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fadd_noret_f32__offset12b_neg__ftz__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_add_co_u32_e32 v0, vcc, 0xfffff800, v0
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_add_f32 v[0:1], v2
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fadd_noret_f32__offset12b_neg__ftz__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_add_co_u32_e32 v0, vcc, 0xfffff800, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_add_f32 v[0:1], v2
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fadd_noret_f32__offset12b_neg__ftz__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -3538,14 +3538,14 @@ define float @flat_system_atomic_fadd_ret_f32__offset12b_pos__ftz__amdgpu_no_fin
 ; GFX12-NEXT:    global_inv scope:SCOPE_SYS
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_system_atomic_fadd_ret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_wbl2 sc0 sc1
-; GFX940-NEXT:    flat_atomic_add_f32 v0, v[0:1], v2 offset:2044 sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc0 sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_system_atomic_fadd_ret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_wbl2 sc0 sc1
+; GFX942-NEXT:    flat_atomic_add_f32 v0, v[0:1], v2 offset:2044 sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc0 sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_system_atomic_fadd_ret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -3727,14 +3727,14 @@ define void @flat_system_atomic_fadd_noret_f32__offset12b_pos__ftz__amdgpu_no_fi
 ; GFX12-NEXT:    global_inv scope:SCOPE_SYS
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_system_atomic_fadd_noret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_wbl2 sc0 sc1
-; GFX940-NEXT:    flat_atomic_add_f32 v[0:1], v2 offset:2044 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc0 sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_system_atomic_fadd_noret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_wbl2 sc0 sc1
+; GFX942-NEXT:    flat_atomic_add_f32 v[0:1], v2 offset:2044 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc0 sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_system_atomic_fadd_noret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -3947,14 +3947,14 @@ define float @flat_agent_atomic_fadd_ret_f32__ieee__amdgpu_no_fine_grained_memor
 ; GFX12-NEXT:    global_inv scope:SCOPE_SYS
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fadd_ret_f32__ieee__amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_wbl2 sc0 sc1
-; GFX940-NEXT:    flat_atomic_add_f32 v0, v[0:1], v2 offset:2044 sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc0 sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fadd_ret_f32__ieee__amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_wbl2 sc0 sc1
+; GFX942-NEXT:    flat_atomic_add_f32 v0, v[0:1], v2 offset:2044 sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc0 sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fadd_ret_f32__ieee__amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode:
 ; GFX11:       ; %bb.0:
@@ -4136,14 +4136,14 @@ define void @flat_agent_atomic_fadd_noret_f32__ieee__amdgpu_no_fine_grained_memo
 ; GFX12-NEXT:    global_inv scope:SCOPE_SYS
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fadd_noret_f32__ieee__amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_wbl2 sc0 sc1
-; GFX940-NEXT:    flat_atomic_add_f32 v[0:1], v2 offset:2044 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc0 sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fadd_noret_f32__ieee__amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_wbl2 sc0 sc1
+; GFX942-NEXT:    flat_atomic_add_f32 v[0:1], v2 offset:2044 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc0 sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fadd_noret_f32__ieee__amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode:
 ; GFX11:       ; %bb.0:
@@ -4355,14 +4355,14 @@ define float @flat_agent_atomic_fadd_ret_f32__amdgpu_no_remote_memory__amdgpu_ig
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fadd_ret_f32__amdgpu_no_remote_memory__amdgpu_ignore_denormal_mode:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_add_f32 v0, v[0:1], v2 sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fadd_ret_f32__amdgpu_no_remote_memory__amdgpu_ignore_denormal_mode:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_add_f32 v0, v[0:1], v2 sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fadd_ret_f32__amdgpu_no_remote_memory__amdgpu_ignore_denormal_mode:
 ; GFX11:       ; %bb.0:
@@ -4519,14 +4519,14 @@ define void @flat_agent_atomic_fadd_noret_f32__amdgpu_no_remote_memory__amdgpu_i
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fadd_noret_f32__amdgpu_no_remote_memory__amdgpu_ignore_denormal_mode:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_add_f32 v[0:1], v2
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fadd_noret_f32__amdgpu_no_remote_memory__amdgpu_ignore_denormal_mode:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_add_f32 v[0:1], v2
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fadd_noret_f32__amdgpu_no_remote_memory__amdgpu_ignore_denormal_mode:
 ; GFX11:       ; %bb.0:
@@ -4676,14 +4676,14 @@ define float @flat_agent_atomic_fadd_ret_f32__amdgpu_no_remote_memory(ptr %ptr,
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fadd_ret_f32__amdgpu_no_remote_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_add_f32 v0, v[0:1], v2 sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fadd_ret_f32__amdgpu_no_remote_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_add_f32 v0, v[0:1], v2 sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fadd_ret_f32__amdgpu_no_remote_memory:
 ; GFX11:       ; %bb.0:
@@ -4840,14 +4840,14 @@ define void @flat_agent_atomic_fadd_noret_f32__amdgpu_no_remote_memory(ptr %ptr,
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fadd_noret_f32__amdgpu_no_remote_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_add_f32 v[0:1], v2
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fadd_noret_f32__amdgpu_no_remote_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_add_f32 v[0:1], v2
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fadd_noret_f32__amdgpu_no_remote_memory:
 ; GFX11:       ; %bb.0:
@@ -4997,14 +4997,14 @@ define float @flat_agent_atomic_fadd_ret_f32__amdgpu_no_fine_grained_memory_amdg
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fadd_ret_f32__amdgpu_no_fine_grained_memory_amdgpu_no_remote_memory__amdgpu_ignore_denormal_mode:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_add_f32 v0, v[0:1], v2 sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fadd_ret_f32__amdgpu_no_fine_grained_memory_amdgpu_no_remote_memory__amdgpu_ignore_denormal_mode:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_add_f32 v0, v[0:1], v2 sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fadd_ret_f32__amdgpu_no_fine_grained_memory_amdgpu_no_remote_memory__amdgpu_ignore_denormal_mode:
 ; GFX11:       ; %bb.0:
@@ -5173,14 +5173,14 @@ define void @flat_agent_atomic_fadd_noret_f32__amdgpu_no_fine_grained_memory_amd
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fadd_noret_f32__amdgpu_no_fine_grained_memory_amdgpu_no_remote_memory__amdgpu_ignore_denormal_mode:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_add_f32 v[0:1], v2
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fadd_noret_f32__amdgpu_no_fine_grained_memory_amdgpu_no_remote_memory__amdgpu_ignore_denormal_mode:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_add_f32 v[0:1], v2
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fadd_noret_f32__amdgpu_no_fine_grained_memory_amdgpu_no_remote_memory__amdgpu_ignore_denormal_mode:
 ; GFX11:       ; %bb.0:
@@ -5379,14 +5379,14 @@ define float @flat_agent_atomic_fadd_ret_f32__amdgpu_no_fine_grained_memory_amdg
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fadd_ret_f32__amdgpu_no_fine_grained_memory_amdgpu_no_remote_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_add_f32 v0, v[0:1], v2 sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fadd_ret_f32__amdgpu_no_fine_grained_memory_amdgpu_no_remote_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_add_f32 v0, v[0:1], v2 sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fadd_ret_f32__amdgpu_no_fine_grained_memory_amdgpu_no_remote_memory:
 ; GFX11:       ; %bb.0:
@@ -5527,14 +5527,14 @@ define void @flat_agent_atomic_fadd_noret_f32__amdgpu_no_fine_grained_memory_amd
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fadd_noret_f32__amdgpu_no_fine_grained_memory_amdgpu_no_remote_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_add_f32 v[0:1], v2
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fadd_noret_f32__amdgpu_no_fine_grained_memory_amdgpu_no_remote_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_add_f32 v[0:1], v2
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fadd_noret_f32__amdgpu_no_fine_grained_memory_amdgpu_no_remote_memory:
 ; GFX11:       ; %bb.0:
@@ -5717,43 +5717,43 @@ define double @flat_agent_atomic_fadd_ret_f64__amdgpu_no_fine_grained_memory(ptr
 ; GFX12-NEXT:    v_dual_mov_b32 v0, v4 :: v_dual_mov_b32 v1, v5
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fadd_ret_f64__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v5, v1
-; GFX940-NEXT:    s_mov_b64 s[0:1], src_private_base
-; GFX940-NEXT:    v_mov_b32_e32 v4, v0
-; GFX940-NEXT:    v_cmp_ne_u32_e32 vcc, s1, v5
-; GFX940-NEXT:    ; implicit-def: $vgpr0_vgpr1
-; GFX940-NEXT:    s_and_saveexec_b64 s[0:1], vcc
-; GFX940-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB30_3
-; GFX940-NEXT:  ; %bb.1: ; %Flow
-; GFX940-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB30_4
-; GFX940-NEXT:  .LBB30_2: ; %atomicrmw.phi
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
-; GFX940-NEXT:  .LBB30_3: ; %atomicrmw.global
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_add_f64 v[0:1], v[4:5], v[2:3] sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    ; implicit-def: $vgpr4_vgpr5
-; GFX940-NEXT:    ; implicit-def: $vgpr2_vgpr3
-; GFX940-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
-; GFX940-NEXT:    s_cbranch_execz .LBB30_2
-; GFX940-NEXT:  .LBB30_4: ; %atomicrmw.private
-; GFX940-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[4:5]
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v4, -1, v4, vcc
-; GFX940-NEXT:    scratch_load_dwordx2 v[0:1], v4, off
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_add_f64 v[2:3], v[0:1], v[2:3]
-; GFX940-NEXT:    scratch_store_dwordx2 v4, v[2:3], off sc0 sc1
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fadd_ret_f64__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v5, v1
+; GFX942-NEXT:    s_mov_b64 s[0:1], src_private_base
+; GFX942-NEXT:    v_mov_b32_e32 v4, v0
+; GFX942-NEXT:    v_cmp_ne_u32_e32 vcc, s1, v5
+; GFX942-NEXT:    ; implicit-def: $vgpr0_vgpr1
+; GFX942-NEXT:    s_and_saveexec_b64 s[0:1], vcc
+; GFX942-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB30_3
+; GFX942-NEXT:  ; %bb.1: ; %Flow
+; GFX942-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB30_4
+; GFX942-NEXT:  .LBB30_2: ; %atomicrmw.phi
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-NEXT:  .LBB30_3: ; %atomicrmw.global
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_add_f64 v[0:1], v[4:5], v[2:3] sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    ; implicit-def: $vgpr4_vgpr5
+; GFX942-NEXT:    ; implicit-def: $vgpr2_vgpr3
+; GFX942-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
+; GFX942-NEXT:    s_cbranch_execz .LBB30_2
+; GFX942-NEXT:  .LBB30_4: ; %atomicrmw.private
+; GFX942-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[4:5]
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v4, -1, v4, vcc
+; GFX942-NEXT:    scratch_load_dwordx2 v[0:1], v4, off
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_add_f64 v[2:3], v[0:1], v[2:3]
+; GFX942-NEXT:    scratch_store_dwordx2 v4, v[2:3], off sc0 sc1
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fadd_ret_f64__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -6115,43 +6115,43 @@ define double @flat_agent_atomic_fadd_ret_f64__offset12b_pos__amdgpu_no_fine_gra
 ; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s0
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fadd_ret_f64__offset12b_pos__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0x7f8
-; GFX940-NEXT:    v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1]
-; GFX940-NEXT:    s_mov_b64 s[0:1], src_private_base
-; GFX940-NEXT:    v_cmp_ne_u32_e32 vcc, s1, v5
-; GFX940-NEXT:    ; implicit-def: $vgpr0_vgpr1
-; GFX940-NEXT:    s_and_saveexec_b64 s[0:1], vcc
-; GFX940-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB31_3
-; GFX940-NEXT:  ; %bb.1: ; %Flow
-; GFX940-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB31_4
-; GFX940-NEXT:  .LBB31_2: ; %atomicrmw.phi
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
-; GFX940-NEXT:  .LBB31_3: ; %atomicrmw.global
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_add_f64 v[0:1], v[4:5], v[2:3] sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    ; implicit-def: $vgpr4_vgpr5
-; GFX940-NEXT:    ; implicit-def: $vgpr2_vgpr3
-; GFX940-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
-; GFX940-NEXT:    s_cbranch_execz .LBB31_2
-; GFX940-NEXT:  .LBB31_4: ; %atomicrmw.private
-; GFX940-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[4:5]
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v4, -1, v4, vcc
-; GFX940-NEXT:    scratch_load_dwordx2 v[0:1], v4, off
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_add_f64 v[2:3], v[0:1], v[2:3]
-; GFX940-NEXT:    scratch_store_dwordx2 v4, v[2:3], off sc0 sc1
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fadd_ret_f64__offset12b_pos__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0x7f8
+; GFX942-NEXT:    v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1]
+; GFX942-NEXT:    s_mov_b64 s[0:1], src_private_base
+; GFX942-NEXT:    v_cmp_ne_u32_e32 vcc, s1, v5
+; GFX942-NEXT:    ; implicit-def: $vgpr0_vgpr1
+; GFX942-NEXT:    s_and_saveexec_b64 s[0:1], vcc
+; GFX942-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB31_3
+; GFX942-NEXT:  ; %bb.1: ; %Flow
+; GFX942-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB31_4
+; GFX942-NEXT:  .LBB31_2: ; %atomicrmw.phi
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-NEXT:  .LBB31_3: ; %atomicrmw.global
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_add_f64 v[0:1], v[4:5], v[2:3] sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    ; implicit-def: $vgpr4_vgpr5
+; GFX942-NEXT:    ; implicit-def: $vgpr2_vgpr3
+; GFX942-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
+; GFX942-NEXT:    s_cbranch_execz .LBB31_2
+; GFX942-NEXT:  .LBB31_4: ; %atomicrmw.private
+; GFX942-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[4:5]
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v4, -1, v4, vcc
+; GFX942-NEXT:    scratch_load_dwordx2 v[0:1], v4, off
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_add_f64 v[2:3], v[0:1], v[2:3]
+; GFX942-NEXT:    scratch_store_dwordx2 v4, v[2:3], off sc0 sc1
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fadd_ret_f64__offset12b_pos__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -6536,44 +6536,44 @@ define double @flat_agent_atomic_fadd_ret_f64__offset12b_neg__amdgpu_no_fine_gra
 ; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s0
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fadd_ret_f64__offset12b_neg__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    s_movk_i32 s0, 0xf800
-; GFX940-NEXT:    s_mov_b32 s1, -1
-; GFX940-NEXT:    v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1]
-; GFX940-NEXT:    s_mov_b64 s[0:1], src_private_base
-; GFX940-NEXT:    v_cmp_ne_u32_e32 vcc, s1, v5
-; GFX940-NEXT:    ; implicit-def: $vgpr0_vgpr1
-; GFX940-NEXT:    s_and_saveexec_b64 s[0:1], vcc
-; GFX940-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB32_3
-; GFX940-NEXT:  ; %bb.1: ; %Flow
-; GFX940-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB32_4
-; GFX940-NEXT:  .LBB32_2: ; %atomicrmw.phi
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
-; GFX940-NEXT:  .LBB32_3: ; %atomicrmw.global
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_add_f64 v[0:1], v[4:5], v[2:3] sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    ; implicit-def: $vgpr4_vgpr5
-; GFX940-NEXT:    ; implicit-def: $vgpr2_vgpr3
-; GFX940-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
-; GFX940-NEXT:    s_cbranch_execz .LBB32_2
-; GFX940-NEXT:  .LBB32_4: ; %atomicrmw.private
-; GFX940-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[4:5]
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v4, -1, v4, vcc
-; GFX940-NEXT:    scratch_load_dwordx2 v[0:1], v4, off
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_add_f64 v[2:3], v[0:1], v[2:3]
-; GFX940-NEXT:    scratch_store_dwordx2 v4, v[2:3], off sc0 sc1
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fadd_ret_f64__offset12b_neg__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    s_movk_i32 s0, 0xf800
+; GFX942-NEXT:    s_mov_b32 s1, -1
+; GFX942-NEXT:    v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1]
+; GFX942-NEXT:    s_mov_b64 s[0:1], src_private_base
+; GFX942-NEXT:    v_cmp_ne_u32_e32 vcc, s1, v5
+; GFX942-NEXT:    ; implicit-def: $vgpr0_vgpr1
+; GFX942-NEXT:    s_and_saveexec_b64 s[0:1], vcc
+; GFX942-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB32_3
+; GFX942-NEXT:  ; %bb.1: ; %Flow
+; GFX942-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB32_4
+; GFX942-NEXT:  .LBB32_2: ; %atomicrmw.phi
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-NEXT:  .LBB32_3: ; %atomicrmw.global
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_add_f64 v[0:1], v[4:5], v[2:3] sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    ; implicit-def: $vgpr4_vgpr5
+; GFX942-NEXT:    ; implicit-def: $vgpr2_vgpr3
+; GFX942-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
+; GFX942-NEXT:    s_cbranch_execz .LBB32_2
+; GFX942-NEXT:  .LBB32_4: ; %atomicrmw.private
+; GFX942-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[4:5]
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v4, -1, v4, vcc
+; GFX942-NEXT:    scratch_load_dwordx2 v[0:1], v4, off
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_add_f64 v[2:3], v[0:1], v[2:3]
+; GFX942-NEXT:    scratch_store_dwordx2 v4, v[2:3], off sc0 sc1
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fadd_ret_f64__offset12b_neg__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -6951,40 +6951,40 @@ define void @flat_agent_atomic_fadd_noret_f64__amdgpu_no_fine_grained_memory(ptr
 ; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s0
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fadd_noret_f64__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    s_mov_b64 s[0:1], src_private_base
-; GFX940-NEXT:    v_cmp_ne_u32_e32 vcc, s1, v1
-; GFX940-NEXT:    s_and_saveexec_b64 s[0:1], vcc
-; GFX940-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB33_3
-; GFX940-NEXT:  ; %bb.1: ; %Flow
-; GFX940-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB33_4
-; GFX940-NEXT:  .LBB33_2: ; %atomicrmw.phi
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
-; GFX940-NEXT:  .LBB33_3: ; %atomicrmw.global
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_add_f64 v[0:1], v[2:3]
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    ; implicit-def: $vgpr0_vgpr1
-; GFX940-NEXT:    ; implicit-def: $vgpr2_vgpr3
-; GFX940-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
-; GFX940-NEXT:    s_cbranch_execz .LBB33_2
-; GFX940-NEXT:  .LBB33_4: ; %atomicrmw.private
-; GFX940-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[0:1]
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v4, -1, v0, vcc
-; GFX940-NEXT:    scratch_load_dwordx2 v[0:1], v4, off
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_add_f64 v[0:1], v[0:1], v[2:3]
-; GFX940-NEXT:    scratch_store_dwordx2 v4, v[0:1], off sc0 sc1
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fadd_noret_f64__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    s_mov_b64 s[0:1], src_private_base
+; GFX942-NEXT:    v_cmp_ne_u32_e32 vcc, s1, v1
+; GFX942-NEXT:    s_and_saveexec_b64 s[0:1], vcc
+; GFX942-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB33_3
+; GFX942-NEXT:  ; %bb.1: ; %Flow
+; GFX942-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB33_4
+; GFX942-NEXT:  .LBB33_2: ; %atomicrmw.phi
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-NEXT:  .LBB33_3: ; %atomicrmw.global
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_add_f64 v[0:1], v[2:3]
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    ; implicit-def: $vgpr0_vgpr1
+; GFX942-NEXT:    ; implicit-def: $vgpr2_vgpr3
+; GFX942-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
+; GFX942-NEXT:    s_cbranch_execz .LBB33_2
+; GFX942-NEXT:  .LBB33_4: ; %atomicrmw.private
+; GFX942-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[0:1]
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v4, -1, v0, vcc
+; GFX942-NEXT:    scratch_load_dwordx2 v[0:1], v4, off
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_add_f64 v[0:1], v[0:1], v[2:3]
+; GFX942-NEXT:    scratch_store_dwordx2 v4, v[0:1], off sc0 sc1
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fadd_noret_f64__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -7346,42 +7346,42 @@ define void @flat_agent_atomic_fadd_noret_f64__offset12b_pos__amdgpu_no_fine_gra
 ; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s0
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fadd_noret_f64__offset12b_pos__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0x7f8
-; GFX940-NEXT:    v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1]
-; GFX940-NEXT:    s_mov_b64 s[0:1], src_private_base
-; GFX940-NEXT:    v_cmp_ne_u32_e32 vcc, s1, v1
-; GFX940-NEXT:    s_and_saveexec_b64 s[0:1], vcc
-; GFX940-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB34_3
-; GFX940-NEXT:  ; %bb.1: ; %Flow
-; GFX940-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB34_4
-; GFX940-NEXT:  .LBB34_2: ; %atomicrmw.phi
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
-; GFX940-NEXT:  .LBB34_3: ; %atomicrmw.global
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_add_f64 v[0:1], v[2:3]
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    ; implicit-def: $vgpr0_vgpr1
-; GFX940-NEXT:    ; implicit-def: $vgpr2_vgpr3
-; GFX940-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
-; GFX940-NEXT:    s_cbranch_execz .LBB34_2
-; GFX940-NEXT:  .LBB34_4: ; %atomicrmw.private
-; GFX940-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[0:1]
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v4, -1, v0, vcc
-; GFX940-NEXT:    scratch_load_dwordx2 v[0:1], v4, off
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_add_f64 v[0:1], v[0:1], v[2:3]
-; GFX940-NEXT:    scratch_store_dwordx2 v4, v[0:1], off sc0 sc1
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fadd_noret_f64__offset12b_pos__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0x7f8
+; GFX942-NEXT:    v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1]
+; GFX942-NEXT:    s_mov_b64 s[0:1], src_private_base
+; GFX942-NEXT:    v_cmp_ne_u32_e32 vcc, s1, v1
+; GFX942-NEXT:    s_and_saveexec_b64 s[0:1], vcc
+; GFX942-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB34_3
+; GFX942-NEXT:  ; %bb.1: ; %Flow
+; GFX942-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB34_4
+; GFX942-NEXT:  .LBB34_2: ; %atomicrmw.phi
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-NEXT:  .LBB34_3: ; %atomicrmw.global
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_add_f64 v[0:1], v[2:3]
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    ; implicit-def: $vgpr0_vgpr1
+; GFX942-NEXT:    ; implicit-def: $vgpr2_vgpr3
+; GFX942-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
+; GFX942-NEXT:    s_cbranch_execz .LBB34_2
+; GFX942-NEXT:  .LBB34_4: ; %atomicrmw.private
+; GFX942-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[0:1]
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v4, -1, v0, vcc
+; GFX942-NEXT:    scratch_load_dwordx2 v[0:1], v4, off
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_add_f64 v[0:1], v[0:1], v[2:3]
+; GFX942-NEXT:    scratch_store_dwordx2 v4, v[0:1], off sc0 sc1
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fadd_noret_f64__offset12b_pos__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -7757,43 +7757,43 @@ define void @flat_agent_atomic_fadd_noret_f64__offset12b_neg__amdgpu_no_fine_gra
 ; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s0
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fadd_noret_f64__offset12b_neg__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    s_movk_i32 s0, 0xf800
-; GFX940-NEXT:    s_mov_b32 s1, -1
-; GFX940-NEXT:    v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1]
-; GFX940-NEXT:    s_mov_b64 s[0:1], src_private_base
-; GFX940-NEXT:    v_cmp_ne_u32_e32 vcc, s1, v1
-; GFX940-NEXT:    s_and_saveexec_b64 s[0:1], vcc
-; GFX940-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB35_3
-; GFX940-NEXT:  ; %bb.1: ; %Flow
-; GFX940-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB35_4
-; GFX940-NEXT:  .LBB35_2: ; %atomicrmw.phi
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
-; GFX940-NEXT:  .LBB35_3: ; %atomicrmw.global
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_add_f64 v[0:1], v[2:3]
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    ; implicit-def: $vgpr0_vgpr1
-; GFX940-NEXT:    ; implicit-def: $vgpr2_vgpr3
-; GFX940-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
-; GFX940-NEXT:    s_cbranch_execz .LBB35_2
-; GFX940-NEXT:  .LBB35_4: ; %atomicrmw.private
-; GFX940-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[0:1]
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v4, -1, v0, vcc
-; GFX940-NEXT:    scratch_load_dwordx2 v[0:1], v4, off
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_add_f64 v[0:1], v[0:1], v[2:3]
-; GFX940-NEXT:    scratch_store_dwordx2 v4, v[0:1], off sc0 sc1
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fadd_noret_f64__offset12b_neg__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    s_movk_i32 s0, 0xf800
+; GFX942-NEXT:    s_mov_b32 s1, -1
+; GFX942-NEXT:    v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1]
+; GFX942-NEXT:    s_mov_b64 s[0:1], src_private_base
+; GFX942-NEXT:    v_cmp_ne_u32_e32 vcc, s1, v1
+; GFX942-NEXT:    s_and_saveexec_b64 s[0:1], vcc
+; GFX942-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB35_3
+; GFX942-NEXT:  ; %bb.1: ; %Flow
+; GFX942-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB35_4
+; GFX942-NEXT:  .LBB35_2: ; %atomicrmw.phi
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-NEXT:  .LBB35_3: ; %atomicrmw.global
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_add_f64 v[0:1], v[2:3]
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    ; implicit-def: $vgpr0_vgpr1
+; GFX942-NEXT:    ; implicit-def: $vgpr2_vgpr3
+; GFX942-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
+; GFX942-NEXT:    s_cbranch_execz .LBB35_2
+; GFX942-NEXT:  .LBB35_4: ; %atomicrmw.private
+; GFX942-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[0:1]
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v4, -1, v0, vcc
+; GFX942-NEXT:    scratch_load_dwordx2 v[0:1], v4, off
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_add_f64 v[0:1], v[0:1], v[2:3]
+; GFX942-NEXT:    scratch_store_dwordx2 v4, v[0:1], off sc0 sc1
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fadd_noret_f64__offset12b_neg__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -8155,38 +8155,38 @@ define half @flat_agent_atomic_fadd_ret_f16__amdgpu_no_fine_grained_memory(ptr %
 ; GFX12-NEXT:    v_lshrrev_b32_e32 v0, v3, v5
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fadd_ret_f16__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v3, v0
-; GFX940-NEXT:    v_and_b32_e32 v0, -4, v3
-; GFX940-NEXT:    flat_load_dword v4, v[0:1]
-; GFX940-NEXT:    v_and_b32_e32 v3, 3, v3
-; GFX940-NEXT:    v_lshlrev_b32_e32 v3, 3, v3
-; GFX940-NEXT:    s_mov_b32 s0, 0xffff
-; GFX940-NEXT:    v_lshlrev_b32_e64 v5, v3, s0
-; GFX940-NEXT:    v_not_b32_e32 v5, v5
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:  .LBB36_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v7, v4
-; GFX940-NEXT:    v_lshrrev_b32_e32 v4, v3, v7
-; GFX940-NEXT:    v_add_f16_e32 v4, v4, v2
-; GFX940-NEXT:    v_lshlrev_b32_e32 v4, v3, v4
-; GFX940-NEXT:    v_and_or_b32 v6, v7, v5, v4
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v4, v[0:1], v[6:7] sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v4, v7
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB36_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    v_lshrrev_b32_e32 v0, v3, v4
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fadd_ret_f16__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v3, v0
+; GFX942-NEXT:    v_and_b32_e32 v0, -4, v3
+; GFX942-NEXT:    flat_load_dword v4, v[0:1]
+; GFX942-NEXT:    v_and_b32_e32 v3, 3, v3
+; GFX942-NEXT:    v_lshlrev_b32_e32 v3, 3, v3
+; GFX942-NEXT:    s_mov_b32 s0, 0xffff
+; GFX942-NEXT:    v_lshlrev_b32_e64 v5, v3, s0
+; GFX942-NEXT:    v_not_b32_e32 v5, v5
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:  .LBB36_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v7, v4
+; GFX942-NEXT:    v_lshrrev_b32_e32 v4, v3, v7
+; GFX942-NEXT:    v_add_f16_e32 v4, v4, v2
+; GFX942-NEXT:    v_lshlrev_b32_e32 v4, v3, v4
+; GFX942-NEXT:    v_and_or_b32 v6, v7, v5, v4
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v4, v[0:1], v[6:7] sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v4, v7
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB36_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    v_lshrrev_b32_e32 v0, v3, v4
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fadd_ret_f16__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -8445,40 +8445,40 @@ define half @flat_agent_atomic_fadd_ret_f16__offset12b_pos__amdgpu_no_fine_grain
 ; GFX12-NEXT:    v_lshrrev_b32_e32 v0, v3, v5
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fadd_ret_f16__offset12b_pos__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0x7fe
-; GFX940-NEXT:    v_lshl_add_u64 v[6:7], v[0:1], 0, s[0:1]
-; GFX940-NEXT:    v_and_b32_e32 v0, -4, v6
-; GFX940-NEXT:    v_mov_b32_e32 v1, v7
-; GFX940-NEXT:    flat_load_dword v4, v[0:1]
-; GFX940-NEXT:    v_and_b32_e32 v3, 3, v6
-; GFX940-NEXT:    v_lshlrev_b32_e32 v3, 3, v3
-; GFX940-NEXT:    s_mov_b32 s0, 0xffff
-; GFX940-NEXT:    v_lshlrev_b32_e64 v5, v3, s0
-; GFX940-NEXT:    v_not_b32_e32 v5, v5
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:  .LBB37_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v7, v4
-; GFX940-NEXT:    v_lshrrev_b32_e32 v4, v3, v7
-; GFX940-NEXT:    v_add_f16_e32 v4, v4, v2
-; GFX940-NEXT:    v_lshlrev_b32_e32 v4, v3, v4
-; GFX940-NEXT:    v_and_or_b32 v6, v7, v5, v4
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v4, v[0:1], v[6:7] sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v4, v7
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB37_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    v_lshrrev_b32_e32 v0, v3, v4
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fadd_ret_f16__offset12b_pos__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0x7fe
+; GFX942-NEXT:    v_lshl_add_u64 v[6:7], v[0:1], 0, s[0:1]
+; GFX942-NEXT:    v_and_b32_e32 v0, -4, v6
+; GFX942-NEXT:    v_mov_b32_e32 v1, v7
+; GFX942-NEXT:    flat_load_dword v4, v[0:1]
+; GFX942-NEXT:    v_and_b32_e32 v3, 3, v6
+; GFX942-NEXT:    v_lshlrev_b32_e32 v3, 3, v3
+; GFX942-NEXT:    s_mov_b32 s0, 0xffff
+; GFX942-NEXT:    v_lshlrev_b32_e64 v5, v3, s0
+; GFX942-NEXT:    v_not_b32_e32 v5, v5
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:  .LBB37_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v7, v4
+; GFX942-NEXT:    v_lshrrev_b32_e32 v4, v3, v7
+; GFX942-NEXT:    v_add_f16_e32 v4, v4, v2
+; GFX942-NEXT:    v_lshlrev_b32_e32 v4, v3, v4
+; GFX942-NEXT:    v_and_or_b32 v6, v7, v5, v4
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v4, v[0:1], v[6:7] sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v4, v7
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB37_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    v_lshrrev_b32_e32 v0, v3, v4
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fadd_ret_f16__offset12b_pos__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -8744,41 +8744,41 @@ define half @flat_agent_atomic_fadd_ret_f16__offset12b_neg__amdgpu_no_fine_grain
 ; GFX12-NEXT:    v_lshrrev_b32_e32 v0, v3, v5
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fadd_ret_f16__offset12b_neg__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    s_movk_i32 s0, 0xf800
-; GFX940-NEXT:    s_mov_b32 s1, -1
-; GFX940-NEXT:    v_lshl_add_u64 v[6:7], v[0:1], 0, s[0:1]
-; GFX940-NEXT:    v_and_b32_e32 v0, -4, v6
-; GFX940-NEXT:    v_mov_b32_e32 v1, v7
-; GFX940-NEXT:    flat_load_dword v4, v[0:1]
-; GFX940-NEXT:    v_and_b32_e32 v3, 3, v6
-; GFX940-NEXT:    v_lshlrev_b32_e32 v3, 3, v3
-; GFX940-NEXT:    s_mov_b32 s0, 0xffff
-; GFX940-NEXT:    v_lshlrev_b32_e64 v5, v3, s0
-; GFX940-NEXT:    v_not_b32_e32 v5, v5
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:  .LBB38_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v7, v4
-; GFX940-NEXT:    v_lshrrev_b32_e32 v4, v3, v7
-; GFX940-NEXT:    v_add_f16_e32 v4, v4, v2
-; GFX940-NEXT:    v_lshlrev_b32_e32 v4, v3, v4
-; GFX940-NEXT:    v_and_or_b32 v6, v7, v5, v4
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v4, v[0:1], v[6:7] sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v4, v7
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB38_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    v_lshrrev_b32_e32 v0, v3, v4
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fadd_ret_f16__offset12b_neg__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    s_movk_i32 s0, 0xf800
+; GFX942-NEXT:    s_mov_b32 s1, -1
+; GFX942-NEXT:    v_lshl_add_u64 v[6:7], v[0:1], 0, s[0:1]
+; GFX942-NEXT:    v_and_b32_e32 v0, -4, v6
+; GFX942-NEXT:    v_mov_b32_e32 v1, v7
+; GFX942-NEXT:    flat_load_dword v4, v[0:1]
+; GFX942-NEXT:    v_and_b32_e32 v3, 3, v6
+; GFX942-NEXT:    v_lshlrev_b32_e32 v3, 3, v3
+; GFX942-NEXT:    s_mov_b32 s0, 0xffff
+; GFX942-NEXT:    v_lshlrev_b32_e64 v5, v3, s0
+; GFX942-NEXT:    v_not_b32_e32 v5, v5
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:  .LBB38_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v7, v4
+; GFX942-NEXT:    v_lshrrev_b32_e32 v4, v3, v7
+; GFX942-NEXT:    v_add_f16_e32 v4, v4, v2
+; GFX942-NEXT:    v_lshlrev_b32_e32 v4, v3, v4
+; GFX942-NEXT:    v_and_or_b32 v6, v7, v5, v4
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v4, v[0:1], v[6:7] sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v4, v7
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB38_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    v_lshrrev_b32_e32 v0, v3, v4
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fadd_ret_f16__offset12b_neg__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -9040,37 +9040,37 @@ define void @flat_agent_atomic_fadd_noret_f16__amdgpu_no_fine_grained_memory(ptr
 ; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s0
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fadd_noret_f16__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v3, v0
-; GFX940-NEXT:    v_and_b32_e32 v0, -4, v3
-; GFX940-NEXT:    flat_load_dword v5, v[0:1]
-; GFX940-NEXT:    v_and_b32_e32 v3, 3, v3
-; GFX940-NEXT:    v_lshlrev_b32_e32 v3, 3, v3
-; GFX940-NEXT:    s_mov_b32 s0, 0xffff
-; GFX940-NEXT:    v_lshlrev_b32_e64 v4, v3, s0
-; GFX940-NEXT:    v_not_b32_e32 v6, v4
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:  .LBB39_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_lshrrev_b32_e32 v4, v3, v5
-; GFX940-NEXT:    v_add_f16_e32 v4, v4, v2
-; GFX940-NEXT:    v_lshlrev_b32_e32 v4, v3, v4
-; GFX940-NEXT:    v_and_or_b32 v4, v5, v6, v4
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v4, v[0:1], v[4:5] sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v4, v5
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v5, v4
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB39_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fadd_noret_f16__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v3, v0
+; GFX942-NEXT:    v_and_b32_e32 v0, -4, v3
+; GFX942-NEXT:    flat_load_dword v5, v[0:1]
+; GFX942-NEXT:    v_and_b32_e32 v3, 3, v3
+; GFX942-NEXT:    v_lshlrev_b32_e32 v3, 3, v3
+; GFX942-NEXT:    s_mov_b32 s0, 0xffff
+; GFX942-NEXT:    v_lshlrev_b32_e64 v4, v3, s0
+; GFX942-NEXT:    v_not_b32_e32 v6, v4
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:  .LBB39_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_lshrrev_b32_e32 v4, v3, v5
+; GFX942-NEXT:    v_add_f16_e32 v4, v4, v2
+; GFX942-NEXT:    v_lshlrev_b32_e32 v4, v3, v4
+; GFX942-NEXT:    v_and_or_b32 v4, v5, v6, v4
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v4, v[0:1], v[4:5] sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v4, v5
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v5, v4
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB39_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fadd_noret_f16__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -9319,39 +9319,39 @@ define void @flat_agent_atomic_fadd_noret_f16__offset12b_pos__amdgpu_no_fine_gra
 ; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s0
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fadd_noret_f16__offset12b_pos__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0x7fe
-; GFX940-NEXT:    v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1]
-; GFX940-NEXT:    v_and_b32_e32 v0, -4, v4
-; GFX940-NEXT:    v_mov_b32_e32 v1, v5
-; GFX940-NEXT:    flat_load_dword v5, v[0:1]
-; GFX940-NEXT:    v_and_b32_e32 v3, 3, v4
-; GFX940-NEXT:    v_lshlrev_b32_e32 v3, 3, v3
-; GFX940-NEXT:    s_mov_b32 s0, 0xffff
-; GFX940-NEXT:    v_lshlrev_b32_e64 v4, v3, s0
-; GFX940-NEXT:    v_not_b32_e32 v6, v4
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:  .LBB40_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_lshrrev_b32_e32 v4, v3, v5
-; GFX940-NEXT:    v_add_f16_e32 v4, v4, v2
-; GFX940-NEXT:    v_lshlrev_b32_e32 v4, v3, v4
-; GFX940-NEXT:    v_and_or_b32 v4, v5, v6, v4
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v4, v[0:1], v[4:5] sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v4, v5
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v5, v4
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB40_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fadd_noret_f16__offset12b_pos__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0x7fe
+; GFX942-NEXT:    v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1]
+; GFX942-NEXT:    v_and_b32_e32 v0, -4, v4
+; GFX942-NEXT:    v_mov_b32_e32 v1, v5
+; GFX942-NEXT:    flat_load_dword v5, v[0:1]
+; GFX942-NEXT:    v_and_b32_e32 v3, 3, v4
+; GFX942-NEXT:    v_lshlrev_b32_e32 v3, 3, v3
+; GFX942-NEXT:    s_mov_b32 s0, 0xffff
+; GFX942-NEXT:    v_lshlrev_b32_e64 v4, v3, s0
+; GFX942-NEXT:    v_not_b32_e32 v6, v4
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:  .LBB40_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_lshrrev_b32_e32 v4, v3, v5
+; GFX942-NEXT:    v_add_f16_e32 v4, v4, v2
+; GFX942-NEXT:    v_lshlrev_b32_e32 v4, v3, v4
+; GFX942-NEXT:    v_and_or_b32 v4, v5, v6, v4
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v4, v[0:1], v[4:5] sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v4, v5
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v5, v4
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB40_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fadd_noret_f16__offset12b_pos__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -9607,40 +9607,40 @@ define void @flat_agent_atomic_fadd_noret_f16__offset12b_neg__amdgpu_no_fine_gra
 ; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s0
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fadd_noret_f16__offset12b_neg__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    s_movk_i32 s0, 0xf800
-; GFX940-NEXT:    s_mov_b32 s1, -1
-; GFX940-NEXT:    v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1]
-; GFX940-NEXT:    v_and_b32_e32 v0, -4, v4
-; GFX940-NEXT:    v_mov_b32_e32 v1, v5
-; GFX940-NEXT:    flat_load_dword v5, v[0:1]
-; GFX940-NEXT:    v_and_b32_e32 v3, 3, v4
-; GFX940-NEXT:    v_lshlrev_b32_e32 v3, 3, v3
-; GFX940-NEXT:    s_mov_b32 s0, 0xffff
-; GFX940-NEXT:    v_lshlrev_b32_e64 v4, v3, s0
-; GFX940-NEXT:    v_not_b32_e32 v6, v4
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:  .LBB41_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_lshrrev_b32_e32 v4, v3, v5
-; GFX940-NEXT:    v_add_f16_e32 v4, v4, v2
-; GFX940-NEXT:    v_lshlrev_b32_e32 v4, v3, v4
-; GFX940-NEXT:    v_and_or_b32 v4, v5, v6, v4
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v4, v[0:1], v[4:5] sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v4, v5
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v5, v4
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB41_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fadd_noret_f16__offset12b_neg__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    s_movk_i32 s0, 0xf800
+; GFX942-NEXT:    s_mov_b32 s1, -1
+; GFX942-NEXT:    v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1]
+; GFX942-NEXT:    v_and_b32_e32 v0, -4, v4
+; GFX942-NEXT:    v_mov_b32_e32 v1, v5
+; GFX942-NEXT:    flat_load_dword v5, v[0:1]
+; GFX942-NEXT:    v_and_b32_e32 v3, 3, v4
+; GFX942-NEXT:    v_lshlrev_b32_e32 v3, 3, v3
+; GFX942-NEXT:    s_mov_b32 s0, 0xffff
+; GFX942-NEXT:    v_lshlrev_b32_e64 v4, v3, s0
+; GFX942-NEXT:    v_not_b32_e32 v6, v4
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:  .LBB41_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_lshrrev_b32_e32 v4, v3, v5
+; GFX942-NEXT:    v_add_f16_e32 v4, v4, v2
+; GFX942-NEXT:    v_lshlrev_b32_e32 v4, v3, v4
+; GFX942-NEXT:    v_and_or_b32 v4, v5, v6, v4
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v4, v[0:1], v[4:5] sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v4, v5
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v5, v4
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB41_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fadd_noret_f16__offset12b_neg__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -9883,29 +9883,29 @@ define void @flat_agent_atomic_fadd_noret_f16__offset12b__align4_pos__amdgpu_no_
 ; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s0
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fadd_noret_f16__offset12b__align4_pos__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    flat_load_dword v5, v[0:1] offset:2046
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    s_mov_b32 s2, 0xffff0000
-; GFX940-NEXT:  .LBB42_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_add_f16_e32 v3, v5, v2
-; GFX940-NEXT:    v_and_or_b32 v4, v5, s2, v3
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2046 sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v5, v3
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB42_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fadd_noret_f16__offset12b__align4_pos__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    flat_load_dword v5, v[0:1] offset:2046
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    s_mov_b32 s2, 0xffff0000
+; GFX942-NEXT:  .LBB42_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_add_f16_e32 v3, v5, v2
+; GFX942-NEXT:    v_and_or_b32 v4, v5, s2, v3
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2046 sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v5, v3
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB42_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fadd_noret_f16__offset12b__align4_pos__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -10099,30 +10099,30 @@ define half @flat_agent_atomic_fadd_ret_f16__offset12b_pos__align4__amdgpu_no_fi
 ; GFX12-NEXT:    v_mov_b32_e32 v0, v3
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fadd_ret_f16__offset12b_pos__align4__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    flat_load_dword v3, v[0:1] offset:2046
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    s_mov_b32 s2, 0xffff0000
-; GFX940-NEXT:  .LBB43_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v5, v3
-; GFX940-NEXT:    v_add_f16_e32 v3, v5, v2
-; GFX940-NEXT:    v_and_or_b32 v4, v5, s2, v3
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2046 sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB43_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v0, v3
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fadd_ret_f16__offset12b_pos__align4__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    flat_load_dword v3, v[0:1] offset:2046
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    s_mov_b32 s2, 0xffff0000
+; GFX942-NEXT:  .LBB43_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v5, v3
+; GFX942-NEXT:    v_add_f16_e32 v3, v5, v2
+; GFX942-NEXT:    v_and_or_b32 v4, v5, s2, v3
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2046 sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB43_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v0, v3
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fadd_ret_f16__offset12b_pos__align4__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -10335,40 +10335,40 @@ define half @flat_system_atomic_fadd_ret_f16__offset12b_pos__amdgpu_no_fine_grai
 ; GFX12-NEXT:    v_lshrrev_b32_e32 v0, v3, v5
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_system_atomic_fadd_ret_f16__offset12b_pos__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0x7fe
-; GFX940-NEXT:    v_lshl_add_u64 v[6:7], v[0:1], 0, s[0:1]
-; GFX940-NEXT:    v_and_b32_e32 v0, -4, v6
-; GFX940-NEXT:    v_mov_b32_e32 v1, v7
-; GFX940-NEXT:    flat_load_dword v4, v[0:1]
-; GFX940-NEXT:    v_and_b32_e32 v3, 3, v6
-; GFX940-NEXT:    v_lshlrev_b32_e32 v3, 3, v3
-; GFX940-NEXT:    s_mov_b32 s0, 0xffff
-; GFX940-NEXT:    v_lshlrev_b32_e64 v5, v3, s0
-; GFX940-NEXT:    v_not_b32_e32 v5, v5
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:  .LBB44_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v7, v4
-; GFX940-NEXT:    v_lshrrev_b32_e32 v4, v3, v7
-; GFX940-NEXT:    v_add_f16_e32 v4, v4, v2
-; GFX940-NEXT:    v_lshlrev_b32_e32 v4, v3, v4
-; GFX940-NEXT:    v_and_or_b32 v6, v7, v5, v4
-; GFX940-NEXT:    buffer_wbl2 sc0 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v4, v[0:1], v[6:7] sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc0 sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v4, v7
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB44_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    v_lshrrev_b32_e32 v0, v3, v4
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_system_atomic_fadd_ret_f16__offset12b_pos__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0x7fe
+; GFX942-NEXT:    v_lshl_add_u64 v[6:7], v[0:1], 0, s[0:1]
+; GFX942-NEXT:    v_and_b32_e32 v0, -4, v6
+; GFX942-NEXT:    v_mov_b32_e32 v1, v7
+; GFX942-NEXT:    flat_load_dword v4, v[0:1]
+; GFX942-NEXT:    v_and_b32_e32 v3, 3, v6
+; GFX942-NEXT:    v_lshlrev_b32_e32 v3, 3, v3
+; GFX942-NEXT:    s_mov_b32 s0, 0xffff
+; GFX942-NEXT:    v_lshlrev_b32_e64 v5, v3, s0
+; GFX942-NEXT:    v_not_b32_e32 v5, v5
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:  .LBB44_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v7, v4
+; GFX942-NEXT:    v_lshrrev_b32_e32 v4, v3, v7
+; GFX942-NEXT:    v_add_f16_e32 v4, v4, v2
+; GFX942-NEXT:    v_lshlrev_b32_e32 v4, v3, v4
+; GFX942-NEXT:    v_and_or_b32 v6, v7, v5, v4
+; GFX942-NEXT:    buffer_wbl2 sc0 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v4, v[0:1], v[6:7] sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc0 sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v4, v7
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB44_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    v_lshrrev_b32_e32 v0, v3, v4
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_system_atomic_fadd_ret_f16__offset12b_pos__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -10635,39 +10635,39 @@ define void @flat_system_atomic_fadd_noret_f16__offset12b_pos__amdgpu_no_fine_gr
 ; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s0
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_system_atomic_fadd_noret_f16__offset12b_pos__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0x7fe
-; GFX940-NEXT:    v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1]
-; GFX940-NEXT:    v_and_b32_e32 v0, -4, v4
-; GFX940-NEXT:    v_mov_b32_e32 v1, v5
-; GFX940-NEXT:    flat_load_dword v5, v[0:1]
-; GFX940-NEXT:    v_and_b32_e32 v3, 3, v4
-; GFX940-NEXT:    v_lshlrev_b32_e32 v3, 3, v3
-; GFX940-NEXT:    s_mov_b32 s0, 0xffff
-; GFX940-NEXT:    v_lshlrev_b32_e64 v4, v3, s0
-; GFX940-NEXT:    v_not_b32_e32 v6, v4
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:  .LBB45_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_lshrrev_b32_e32 v4, v3, v5
-; GFX940-NEXT:    v_add_f16_e32 v4, v4, v2
-; GFX940-NEXT:    v_lshlrev_b32_e32 v4, v3, v4
-; GFX940-NEXT:    v_and_or_b32 v4, v5, v6, v4
-; GFX940-NEXT:    buffer_wbl2 sc0 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v4, v[0:1], v[4:5] sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc0 sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v4, v5
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v5, v4
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB45_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_system_atomic_fadd_noret_f16__offset12b_pos__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0x7fe
+; GFX942-NEXT:    v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1]
+; GFX942-NEXT:    v_and_b32_e32 v0, -4, v4
+; GFX942-NEXT:    v_mov_b32_e32 v1, v5
+; GFX942-NEXT:    flat_load_dword v5, v[0:1]
+; GFX942-NEXT:    v_and_b32_e32 v3, 3, v4
+; GFX942-NEXT:    v_lshlrev_b32_e32 v3, 3, v3
+; GFX942-NEXT:    s_mov_b32 s0, 0xffff
+; GFX942-NEXT:    v_lshlrev_b32_e64 v4, v3, s0
+; GFX942-NEXT:    v_not_b32_e32 v6, v4
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:  .LBB45_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_lshrrev_b32_e32 v4, v3, v5
+; GFX942-NEXT:    v_add_f16_e32 v4, v4, v2
+; GFX942-NEXT:    v_lshlrev_b32_e32 v4, v3, v4
+; GFX942-NEXT:    v_and_or_b32 v4, v5, v6, v4
+; GFX942-NEXT:    buffer_wbl2 sc0 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v4, v[0:1], v[4:5] sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc0 sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v4, v5
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v5, v4
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB45_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_system_atomic_fadd_noret_f16__offset12b_pos__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -10938,47 +10938,47 @@ define bfloat @flat_agent_atomic_fadd_ret_bf16__amdgpu_no_fine_grained_memory(pt
 ; GFX12-NEXT:    v_lshrrev_b32_e32 v0, v3, v5
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fadd_ret_bf16__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v3, v0
-; GFX940-NEXT:    v_and_b32_e32 v0, -4, v3
-; GFX940-NEXT:    flat_load_dword v5, v[0:1]
-; GFX940-NEXT:    v_and_b32_e32 v3, 3, v3
-; GFX940-NEXT:    v_lshlrev_b32_e32 v3, 3, v3
-; GFX940-NEXT:    s_mov_b32 s0, 0xffff
-; GFX940-NEXT:    v_lshlrev_b32_e64 v4, v3, s0
-; GFX940-NEXT:    v_not_b32_e32 v4, v4
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
-; GFX940-NEXT:    s_movk_i32 s2, 0x7fff
-; GFX940-NEXT:  .LBB46_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v7, v5
-; GFX940-NEXT:    v_lshrrev_b32_sdwa v5, v3, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_add_f32_e32 v5, v5, v2
-; GFX940-NEXT:    v_bfe_u32 v6, v5, 16, 1
-; GFX940-NEXT:    v_or_b32_e32 v8, 0x400000, v5
-; GFX940-NEXT:    v_add3_u32 v6, v6, v5, s2
-; GFX940-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v5, v6, v8, vcc
-; GFX940-NEXT:    v_lshlrev_b32_sdwa v5, v3, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX940-NEXT:    v_and_or_b32 v6, v7, v4, v5
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v5, v[0:1], v[6:7] sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v5, v7
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB46_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    v_lshrrev_b32_e32 v0, v3, v5
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fadd_ret_bf16__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v3, v0
+; GFX942-NEXT:    v_and_b32_e32 v0, -4, v3
+; GFX942-NEXT:    flat_load_dword v5, v[0:1]
+; GFX942-NEXT:    v_and_b32_e32 v3, 3, v3
+; GFX942-NEXT:    v_lshlrev_b32_e32 v3, 3, v3
+; GFX942-NEXT:    s_mov_b32 s0, 0xffff
+; GFX942-NEXT:    v_lshlrev_b32_e64 v4, v3, s0
+; GFX942-NEXT:    v_not_b32_e32 v4, v4
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
+; GFX942-NEXT:    s_movk_i32 s2, 0x7fff
+; GFX942-NEXT:  .LBB46_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v7, v5
+; GFX942-NEXT:    v_lshrrev_b32_sdwa v5, v3, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_add_f32_e32 v5, v5, v2
+; GFX942-NEXT:    v_bfe_u32 v6, v5, 16, 1
+; GFX942-NEXT:    v_or_b32_e32 v8, 0x400000, v5
+; GFX942-NEXT:    v_add3_u32 v6, v6, v5, s2
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v5, v6, v8, vcc
+; GFX942-NEXT:    v_lshlrev_b32_sdwa v5, v3, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX942-NEXT:    v_and_or_b32 v6, v7, v4, v5
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v5, v[0:1], v[6:7] sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v5, v7
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB46_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    v_lshrrev_b32_e32 v0, v3, v5
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fadd_ret_bf16__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -11283,49 +11283,49 @@ define bfloat @flat_agent_atomic_fadd_ret_bf16__offset12b_pos__amdgpu_no_fine_gr
 ; GFX12-NEXT:    v_lshrrev_b32_e32 v0, v3, v5
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fadd_ret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0x7fe
-; GFX940-NEXT:    v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1]
-; GFX940-NEXT:    v_and_b32_e32 v0, -4, v4
-; GFX940-NEXT:    v_mov_b32_e32 v1, v5
-; GFX940-NEXT:    flat_load_dword v5, v[0:1]
-; GFX940-NEXT:    v_and_b32_e32 v3, 3, v4
-; GFX940-NEXT:    v_lshlrev_b32_e32 v3, 3, v3
-; GFX940-NEXT:    s_mov_b32 s0, 0xffff
-; GFX940-NEXT:    v_lshlrev_b32_e64 v4, v3, s0
-; GFX940-NEXT:    v_not_b32_e32 v4, v4
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
-; GFX940-NEXT:    s_movk_i32 s2, 0x7fff
-; GFX940-NEXT:  .LBB47_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v7, v5
-; GFX940-NEXT:    v_lshrrev_b32_sdwa v5, v3, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_add_f32_e32 v5, v5, v2
-; GFX940-NEXT:    v_bfe_u32 v6, v5, 16, 1
-; GFX940-NEXT:    v_or_b32_e32 v8, 0x400000, v5
-; GFX940-NEXT:    v_add3_u32 v6, v6, v5, s2
-; GFX940-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v5, v6, v8, vcc
-; GFX940-NEXT:    v_lshlrev_b32_sdwa v5, v3, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX940-NEXT:    v_and_or_b32 v6, v7, v4, v5
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v5, v[0:1], v[6:7] sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v5, v7
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB47_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    v_lshrrev_b32_e32 v0, v3, v5
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fadd_ret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0x7fe
+; GFX942-NEXT:    v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1]
+; GFX942-NEXT:    v_and_b32_e32 v0, -4, v4
+; GFX942-NEXT:    v_mov_b32_e32 v1, v5
+; GFX942-NEXT:    flat_load_dword v5, v[0:1]
+; GFX942-NEXT:    v_and_b32_e32 v3, 3, v4
+; GFX942-NEXT:    v_lshlrev_b32_e32 v3, 3, v3
+; GFX942-NEXT:    s_mov_b32 s0, 0xffff
+; GFX942-NEXT:    v_lshlrev_b32_e64 v4, v3, s0
+; GFX942-NEXT:    v_not_b32_e32 v4, v4
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
+; GFX942-NEXT:    s_movk_i32 s2, 0x7fff
+; GFX942-NEXT:  .LBB47_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v7, v5
+; GFX942-NEXT:    v_lshrrev_b32_sdwa v5, v3, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_add_f32_e32 v5, v5, v2
+; GFX942-NEXT:    v_bfe_u32 v6, v5, 16, 1
+; GFX942-NEXT:    v_or_b32_e32 v8, 0x400000, v5
+; GFX942-NEXT:    v_add3_u32 v6, v6, v5, s2
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v5, v6, v8, vcc
+; GFX942-NEXT:    v_lshlrev_b32_sdwa v5, v3, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX942-NEXT:    v_and_or_b32 v6, v7, v4, v5
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v5, v[0:1], v[6:7] sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v5, v7
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB47_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    v_lshrrev_b32_e32 v0, v3, v5
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fadd_ret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -11638,50 +11638,50 @@ define bfloat @flat_agent_atomic_fadd_ret_bf16__offset12b_neg__amdgpu_no_fine_gr
 ; GFX12-NEXT:    v_lshrrev_b32_e32 v0, v3, v5
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fadd_ret_bf16__offset12b_neg__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    s_movk_i32 s0, 0xf800
-; GFX940-NEXT:    s_mov_b32 s1, -1
-; GFX940-NEXT:    v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1]
-; GFX940-NEXT:    v_and_b32_e32 v0, -4, v4
-; GFX940-NEXT:    v_mov_b32_e32 v1, v5
-; GFX940-NEXT:    flat_load_dword v5, v[0:1]
-; GFX940-NEXT:    v_and_b32_e32 v3, 3, v4
-; GFX940-NEXT:    v_lshlrev_b32_e32 v3, 3, v3
-; GFX940-NEXT:    s_mov_b32 s0, 0xffff
-; GFX940-NEXT:    v_lshlrev_b32_e64 v4, v3, s0
-; GFX940-NEXT:    v_not_b32_e32 v4, v4
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
-; GFX940-NEXT:    s_movk_i32 s2, 0x7fff
-; GFX940-NEXT:  .LBB48_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v7, v5
-; GFX940-NEXT:    v_lshrrev_b32_sdwa v5, v3, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_add_f32_e32 v5, v5, v2
-; GFX940-NEXT:    v_bfe_u32 v6, v5, 16, 1
-; GFX940-NEXT:    v_or_b32_e32 v8, 0x400000, v5
-; GFX940-NEXT:    v_add3_u32 v6, v6, v5, s2
-; GFX940-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v5, v6, v8, vcc
-; GFX940-NEXT:    v_lshlrev_b32_sdwa v5, v3, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX940-NEXT:    v_and_or_b32 v6, v7, v4, v5
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v5, v[0:1], v[6:7] sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v5, v7
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB48_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    v_lshrrev_b32_e32 v0, v3, v5
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fadd_ret_bf16__offset12b_neg__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    s_movk_i32 s0, 0xf800
+; GFX942-NEXT:    s_mov_b32 s1, -1
+; GFX942-NEXT:    v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1]
+; GFX942-NEXT:    v_and_b32_e32 v0, -4, v4
+; GFX942-NEXT:    v_mov_b32_e32 v1, v5
+; GFX942-NEXT:    flat_load_dword v5, v[0:1]
+; GFX942-NEXT:    v_and_b32_e32 v3, 3, v4
+; GFX942-NEXT:    v_lshlrev_b32_e32 v3, 3, v3
+; GFX942-NEXT:    s_mov_b32 s0, 0xffff
+; GFX942-NEXT:    v_lshlrev_b32_e64 v4, v3, s0
+; GFX942-NEXT:    v_not_b32_e32 v4, v4
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
+; GFX942-NEXT:    s_movk_i32 s2, 0x7fff
+; GFX942-NEXT:  .LBB48_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v7, v5
+; GFX942-NEXT:    v_lshrrev_b32_sdwa v5, v3, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_add_f32_e32 v5, v5, v2
+; GFX942-NEXT:    v_bfe_u32 v6, v5, 16, 1
+; GFX942-NEXT:    v_or_b32_e32 v8, 0x400000, v5
+; GFX942-NEXT:    v_add3_u32 v6, v6, v5, s2
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v5, v6, v8, vcc
+; GFX942-NEXT:    v_lshlrev_b32_sdwa v5, v3, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX942-NEXT:    v_and_or_b32 v6, v7, v4, v5
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v5, v[0:1], v[6:7] sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v5, v7
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB48_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    v_lshrrev_b32_e32 v0, v3, v5
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fadd_ret_bf16__offset12b_neg__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -11992,48 +11992,48 @@ define void @flat_agent_atomic_fadd_noret_bf16__offset12b_pos__amdgpu_no_fine_gr
 ; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s0
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fadd_noret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0x7fe
-; GFX940-NEXT:    v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1]
-; GFX940-NEXT:    v_and_b32_e32 v0, -4, v4
-; GFX940-NEXT:    v_mov_b32_e32 v1, v5
-; GFX940-NEXT:    flat_load_dword v3, v[0:1]
-; GFX940-NEXT:    v_and_b32_e32 v4, 3, v4
-; GFX940-NEXT:    v_lshlrev_b32_e32 v4, 3, v4
-; GFX940-NEXT:    s_mov_b32 s0, 0xffff
-; GFX940-NEXT:    v_lshlrev_b32_e64 v5, v4, s0
-; GFX940-NEXT:    v_not_b32_e32 v5, v5
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    v_lshlrev_b32_e32 v6, 16, v2
-; GFX940-NEXT:    s_movk_i32 s2, 0x7fff
-; GFX940-NEXT:  .LBB49_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_lshrrev_b32_sdwa v2, v4, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_add_f32_e32 v2, v2, v6
-; GFX940-NEXT:    v_bfe_u32 v7, v2, 16, 1
-; GFX940-NEXT:    v_or_b32_e32 v8, 0x400000, v2
-; GFX940-NEXT:    v_add3_u32 v7, v7, v2, s2
-; GFX940-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v2, v7, v8, vcc
-; GFX940-NEXT:    v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX940-NEXT:    v_and_or_b32 v2, v3, v5, v2
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v3, v2
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB49_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fadd_noret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0x7fe
+; GFX942-NEXT:    v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1]
+; GFX942-NEXT:    v_and_b32_e32 v0, -4, v4
+; GFX942-NEXT:    v_mov_b32_e32 v1, v5
+; GFX942-NEXT:    flat_load_dword v3, v[0:1]
+; GFX942-NEXT:    v_and_b32_e32 v4, 3, v4
+; GFX942-NEXT:    v_lshlrev_b32_e32 v4, 3, v4
+; GFX942-NEXT:    s_mov_b32 s0, 0xffff
+; GFX942-NEXT:    v_lshlrev_b32_e64 v5, v4, s0
+; GFX942-NEXT:    v_not_b32_e32 v5, v5
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    v_lshlrev_b32_e32 v6, 16, v2
+; GFX942-NEXT:    s_movk_i32 s2, 0x7fff
+; GFX942-NEXT:  .LBB49_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_lshrrev_b32_sdwa v2, v4, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_add_f32_e32 v2, v2, v6
+; GFX942-NEXT:    v_bfe_u32 v7, v2, 16, 1
+; GFX942-NEXT:    v_or_b32_e32 v8, 0x400000, v2
+; GFX942-NEXT:    v_add3_u32 v7, v7, v2, s2
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v7, v8, vcc
+; GFX942-NEXT:    v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX942-NEXT:    v_and_or_b32 v2, v3, v5, v2
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v3, v2
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB49_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fadd_noret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -12336,49 +12336,49 @@ define void @flat_agent_atomic_fadd_noret_bf16__offset12b_neg__amdgpu_no_fine_gr
 ; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s0
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fadd_noret_bf16__offset12b_neg__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    s_movk_i32 s0, 0xf800
-; GFX940-NEXT:    s_mov_b32 s1, -1
-; GFX940-NEXT:    v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1]
-; GFX940-NEXT:    v_and_b32_e32 v0, -4, v4
-; GFX940-NEXT:    v_mov_b32_e32 v1, v5
-; GFX940-NEXT:    flat_load_dword v3, v[0:1]
-; GFX940-NEXT:    v_and_b32_e32 v4, 3, v4
-; GFX940-NEXT:    v_lshlrev_b32_e32 v4, 3, v4
-; GFX940-NEXT:    s_mov_b32 s0, 0xffff
-; GFX940-NEXT:    v_lshlrev_b32_e64 v5, v4, s0
-; GFX940-NEXT:    v_not_b32_e32 v5, v5
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    v_lshlrev_b32_e32 v6, 16, v2
-; GFX940-NEXT:    s_movk_i32 s2, 0x7fff
-; GFX940-NEXT:  .LBB50_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_lshrrev_b32_sdwa v2, v4, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_add_f32_e32 v2, v2, v6
-; GFX940-NEXT:    v_bfe_u32 v7, v2, 16, 1
-; GFX940-NEXT:    v_or_b32_e32 v8, 0x400000, v2
-; GFX940-NEXT:    v_add3_u32 v7, v7, v2, s2
-; GFX940-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v2, v7, v8, vcc
-; GFX940-NEXT:    v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX940-NEXT:    v_and_or_b32 v2, v3, v5, v2
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v3, v2
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB50_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fadd_noret_bf16__offset12b_neg__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    s_movk_i32 s0, 0xf800
+; GFX942-NEXT:    s_mov_b32 s1, -1
+; GFX942-NEXT:    v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1]
+; GFX942-NEXT:    v_and_b32_e32 v0, -4, v4
+; GFX942-NEXT:    v_mov_b32_e32 v1, v5
+; GFX942-NEXT:    flat_load_dword v3, v[0:1]
+; GFX942-NEXT:    v_and_b32_e32 v4, 3, v4
+; GFX942-NEXT:    v_lshlrev_b32_e32 v4, 3, v4
+; GFX942-NEXT:    s_mov_b32 s0, 0xffff
+; GFX942-NEXT:    v_lshlrev_b32_e64 v5, v4, s0
+; GFX942-NEXT:    v_not_b32_e32 v5, v5
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    v_lshlrev_b32_e32 v6, 16, v2
+; GFX942-NEXT:    s_movk_i32 s2, 0x7fff
+; GFX942-NEXT:  .LBB50_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_lshrrev_b32_sdwa v2, v4, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_add_f32_e32 v2, v2, v6
+; GFX942-NEXT:    v_bfe_u32 v7, v2, 16, 1
+; GFX942-NEXT:    v_or_b32_e32 v8, 0x400000, v2
+; GFX942-NEXT:    v_add3_u32 v7, v7, v2, s2
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v7, v8, vcc
+; GFX942-NEXT:    v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX942-NEXT:    v_and_or_b32 v2, v3, v5, v2
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v3, v2
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB50_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fadd_noret_bf16__offset12b_neg__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -12670,40 +12670,40 @@ define bfloat @flat_agent_atomic_fadd_ret_bf16__offset12b_pos__align4__amdgpu_no
 ; GFX12-NEXT:    v_mov_b32_e32 v0, v3
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fadd_ret_bf16__offset12b_pos__align4__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    flat_load_dword v3, v[0:1] offset:2046
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
-; GFX940-NEXT:    s_movk_i32 s2, 0x7fff
-; GFX940-NEXT:    s_mov_b32 s3, 0xffff0000
-; GFX940-NEXT:  .LBB51_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v5, v3
-; GFX940-NEXT:    v_lshlrev_b32_e32 v3, 16, v5
-; GFX940-NEXT:    v_add_f32_e32 v3, v3, v2
-; GFX940-NEXT:    v_bfe_u32 v4, v3, 16, 1
-; GFX940-NEXT:    v_or_b32_e32 v6, 0x400000, v3
-; GFX940-NEXT:    v_add3_u32 v4, v4, v3, s2
-; GFX940-NEXT:    v_cmp_u_f32_e32 vcc, v3, v3
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v3, v4, v6, vcc
-; GFX940-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
-; GFX940-NEXT:    v_and_or_b32 v4, v5, s3, v3
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2046 sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB51_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v0, v3
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fadd_ret_bf16__offset12b_pos__align4__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    flat_load_dword v3, v[0:1] offset:2046
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
+; GFX942-NEXT:    s_movk_i32 s2, 0x7fff
+; GFX942-NEXT:    s_mov_b32 s3, 0xffff0000
+; GFX942-NEXT:  .LBB51_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v5, v3
+; GFX942-NEXT:    v_lshlrev_b32_e32 v3, 16, v5
+; GFX942-NEXT:    v_add_f32_e32 v3, v3, v2
+; GFX942-NEXT:    v_bfe_u32 v4, v3, 16, 1
+; GFX942-NEXT:    v_or_b32_e32 v6, 0x400000, v3
+; GFX942-NEXT:    v_add3_u32 v4, v4, v3, s2
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v3, v3
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v4, v6, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
+; GFX942-NEXT:    v_and_or_b32 v4, v5, s3, v3
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2046 sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB51_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v0, v3
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fadd_ret_bf16__offset12b_pos__align4__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -12953,39 +12953,39 @@ define void @flat_agent_atomic_fadd_noret_bf16__offset12b__align4_pos__amdgpu_no
 ; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s0
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fadd_noret_bf16__offset12b__align4_pos__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    flat_load_dword v3, v[0:1] offset:2046
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    v_lshlrev_b32_e32 v4, 16, v2
-; GFX940-NEXT:    s_movk_i32 s2, 0x7fff
-; GFX940-NEXT:    s_mov_b32 s3, 0xffff0000
-; GFX940-NEXT:  .LBB52_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_lshlrev_b32_e32 v2, 16, v3
-; GFX940-NEXT:    v_add_f32_e32 v2, v2, v4
-; GFX940-NEXT:    v_bfe_u32 v5, v2, 16, 1
-; GFX940-NEXT:    v_or_b32_e32 v6, 0x400000, v2
-; GFX940-NEXT:    v_add3_u32 v5, v5, v2, s2
-; GFX940-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v2, v5, v6, vcc
-; GFX940-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
-; GFX940-NEXT:    v_and_or_b32 v2, v3, s3, v2
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2046 sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v3, v2
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB52_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fadd_noret_bf16__offset12b__align4_pos__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    flat_load_dword v3, v[0:1] offset:2046
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    v_lshlrev_b32_e32 v4, 16, v2
+; GFX942-NEXT:    s_movk_i32 s2, 0x7fff
+; GFX942-NEXT:    s_mov_b32 s3, 0xffff0000
+; GFX942-NEXT:  .LBB52_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_lshlrev_b32_e32 v2, 16, v3
+; GFX942-NEXT:    v_add_f32_e32 v2, v2, v4
+; GFX942-NEXT:    v_bfe_u32 v5, v2, 16, 1
+; GFX942-NEXT:    v_or_b32_e32 v6, 0x400000, v2
+; GFX942-NEXT:    v_add3_u32 v5, v5, v2, s2
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v5, v6, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
+; GFX942-NEXT:    v_and_or_b32 v2, v3, s3, v2
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2046 sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v3, v2
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB52_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fadd_noret_bf16__offset12b__align4_pos__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -13240,46 +13240,46 @@ define void @flat_agent_atomic_fadd_noret_bf16__amdgpu_no_fine_grained_memory(pt
 ; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s0
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fadd_noret_bf16__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v3, v0
-; GFX940-NEXT:    v_and_b32_e32 v0, -4, v3
-; GFX940-NEXT:    flat_load_dword v5, v[0:1]
-; GFX940-NEXT:    v_and_b32_e32 v3, 3, v3
-; GFX940-NEXT:    v_lshlrev_b32_e32 v3, 3, v3
-; GFX940-NEXT:    s_mov_b32 s0, 0xffff
-; GFX940-NEXT:    v_lshlrev_b32_e64 v4, v3, s0
-; GFX940-NEXT:    v_not_b32_e32 v6, v4
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
-; GFX940-NEXT:    s_movk_i32 s2, 0x7fff
-; GFX940-NEXT:  .LBB53_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_lshrrev_b32_sdwa v4, v3, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_add_f32_e32 v4, v4, v2
-; GFX940-NEXT:    v_bfe_u32 v7, v4, 16, 1
-; GFX940-NEXT:    v_or_b32_e32 v8, 0x400000, v4
-; GFX940-NEXT:    v_add3_u32 v7, v7, v4, s2
-; GFX940-NEXT:    v_cmp_u_f32_e32 vcc, v4, v4
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v4, v7, v8, vcc
-; GFX940-NEXT:    v_lshlrev_b32_sdwa v4, v3, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX940-NEXT:    v_and_or_b32 v4, v5, v6, v4
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v4, v[0:1], v[4:5] sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v4, v5
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v5, v4
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB53_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fadd_noret_bf16__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v3, v0
+; GFX942-NEXT:    v_and_b32_e32 v0, -4, v3
+; GFX942-NEXT:    flat_load_dword v5, v[0:1]
+; GFX942-NEXT:    v_and_b32_e32 v3, 3, v3
+; GFX942-NEXT:    v_lshlrev_b32_e32 v3, 3, v3
+; GFX942-NEXT:    s_mov_b32 s0, 0xffff
+; GFX942-NEXT:    v_lshlrev_b32_e64 v4, v3, s0
+; GFX942-NEXT:    v_not_b32_e32 v6, v4
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
+; GFX942-NEXT:    s_movk_i32 s2, 0x7fff
+; GFX942-NEXT:  .LBB53_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_lshrrev_b32_sdwa v4, v3, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_add_f32_e32 v4, v4, v2
+; GFX942-NEXT:    v_bfe_u32 v7, v4, 16, 1
+; GFX942-NEXT:    v_or_b32_e32 v8, 0x400000, v4
+; GFX942-NEXT:    v_add3_u32 v7, v7, v4, s2
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v4, v4
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v4, v7, v8, vcc
+; GFX942-NEXT:    v_lshlrev_b32_sdwa v4, v3, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX942-NEXT:    v_and_or_b32 v4, v5, v6, v4
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v4, v[0:1], v[4:5] sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v4, v5
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v5, v4
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB53_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fadd_noret_bf16__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -13577,49 +13577,49 @@ define bfloat @flat_system_atomic_fadd_ret_bf16__offset12b_pos__amdgpu_no_fine_g
 ; GFX12-NEXT:    v_lshrrev_b32_e32 v0, v3, v5
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_system_atomic_fadd_ret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0x7fe
-; GFX940-NEXT:    v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1]
-; GFX940-NEXT:    v_and_b32_e32 v0, -4, v4
-; GFX940-NEXT:    v_mov_b32_e32 v1, v5
-; GFX940-NEXT:    flat_load_dword v5, v[0:1]
-; GFX940-NEXT:    v_and_b32_e32 v3, 3, v4
-; GFX940-NEXT:    v_lshlrev_b32_e32 v3, 3, v3
-; GFX940-NEXT:    s_mov_b32 s0, 0xffff
-; GFX940-NEXT:    v_lshlrev_b32_e64 v4, v3, s0
-; GFX940-NEXT:    v_not_b32_e32 v4, v4
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
-; GFX940-NEXT:    s_movk_i32 s2, 0x7fff
-; GFX940-NEXT:  .LBB54_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v7, v5
-; GFX940-NEXT:    v_lshrrev_b32_sdwa v5, v3, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_add_f32_e32 v5, v5, v2
-; GFX940-NEXT:    v_bfe_u32 v6, v5, 16, 1
-; GFX940-NEXT:    v_or_b32_e32 v8, 0x400000, v5
-; GFX940-NEXT:    v_add3_u32 v6, v6, v5, s2
-; GFX940-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v5, v6, v8, vcc
-; GFX940-NEXT:    v_lshlrev_b32_sdwa v5, v3, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX940-NEXT:    v_and_or_b32 v6, v7, v4, v5
-; GFX940-NEXT:    buffer_wbl2 sc0 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v5, v[0:1], v[6:7] sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc0 sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v5, v7
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB54_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    v_lshrrev_b32_e32 v0, v3, v5
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_system_atomic_fadd_ret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0x7fe
+; GFX942-NEXT:    v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1]
+; GFX942-NEXT:    v_and_b32_e32 v0, -4, v4
+; GFX942-NEXT:    v_mov_b32_e32 v1, v5
+; GFX942-NEXT:    flat_load_dword v5, v[0:1]
+; GFX942-NEXT:    v_and_b32_e32 v3, 3, v4
+; GFX942-NEXT:    v_lshlrev_b32_e32 v3, 3, v3
+; GFX942-NEXT:    s_mov_b32 s0, 0xffff
+; GFX942-NEXT:    v_lshlrev_b32_e64 v4, v3, s0
+; GFX942-NEXT:    v_not_b32_e32 v4, v4
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
+; GFX942-NEXT:    s_movk_i32 s2, 0x7fff
+; GFX942-NEXT:  .LBB54_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v7, v5
+; GFX942-NEXT:    v_lshrrev_b32_sdwa v5, v3, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_add_f32_e32 v5, v5, v2
+; GFX942-NEXT:    v_bfe_u32 v6, v5, 16, 1
+; GFX942-NEXT:    v_or_b32_e32 v8, 0x400000, v5
+; GFX942-NEXT:    v_add3_u32 v6, v6, v5, s2
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v5, v6, v8, vcc
+; GFX942-NEXT:    v_lshlrev_b32_sdwa v5, v3, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX942-NEXT:    v_and_or_b32 v6, v7, v4, v5
+; GFX942-NEXT:    buffer_wbl2 sc0 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v5, v[0:1], v[6:7] sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc0 sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v5, v7
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB54_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    v_lshrrev_b32_e32 v0, v3, v5
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_system_atomic_fadd_ret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -13933,48 +13933,48 @@ define void @flat_system_atomic_fadd_noret_bf16__offset12b_pos__amdgpu_no_fine_g
 ; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s0
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_system_atomic_fadd_noret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0x7fe
-; GFX940-NEXT:    v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1]
-; GFX940-NEXT:    v_and_b32_e32 v0, -4, v4
-; GFX940-NEXT:    v_mov_b32_e32 v1, v5
-; GFX940-NEXT:    flat_load_dword v3, v[0:1]
-; GFX940-NEXT:    v_and_b32_e32 v4, 3, v4
-; GFX940-NEXT:    v_lshlrev_b32_e32 v4, 3, v4
-; GFX940-NEXT:    s_mov_b32 s0, 0xffff
-; GFX940-NEXT:    v_lshlrev_b32_e64 v5, v4, s0
-; GFX940-NEXT:    v_not_b32_e32 v5, v5
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    v_lshlrev_b32_e32 v6, 16, v2
-; GFX940-NEXT:    s_movk_i32 s2, 0x7fff
-; GFX940-NEXT:  .LBB55_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_lshrrev_b32_sdwa v2, v4, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_add_f32_e32 v2, v2, v6
-; GFX940-NEXT:    v_bfe_u32 v7, v2, 16, 1
-; GFX940-NEXT:    v_or_b32_e32 v8, 0x400000, v2
-; GFX940-NEXT:    v_add3_u32 v7, v7, v2, s2
-; GFX940-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v2, v7, v8, vcc
-; GFX940-NEXT:    v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX940-NEXT:    v_and_or_b32 v2, v3, v5, v2
-; GFX940-NEXT:    buffer_wbl2 sc0 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc0 sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v3, v2
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB55_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_system_atomic_fadd_noret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0x7fe
+; GFX942-NEXT:    v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1]
+; GFX942-NEXT:    v_and_b32_e32 v0, -4, v4
+; GFX942-NEXT:    v_mov_b32_e32 v1, v5
+; GFX942-NEXT:    flat_load_dword v3, v[0:1]
+; GFX942-NEXT:    v_and_b32_e32 v4, 3, v4
+; GFX942-NEXT:    v_lshlrev_b32_e32 v4, 3, v4
+; GFX942-NEXT:    s_mov_b32 s0, 0xffff
+; GFX942-NEXT:    v_lshlrev_b32_e64 v5, v4, s0
+; GFX942-NEXT:    v_not_b32_e32 v5, v5
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    v_lshlrev_b32_e32 v6, 16, v2
+; GFX942-NEXT:    s_movk_i32 s2, 0x7fff
+; GFX942-NEXT:  .LBB55_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_lshrrev_b32_sdwa v2, v4, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_add_f32_e32 v2, v2, v6
+; GFX942-NEXT:    v_bfe_u32 v7, v2, 16, 1
+; GFX942-NEXT:    v_or_b32_e32 v8, 0x400000, v2
+; GFX942-NEXT:    v_add3_u32 v7, v7, v2, s2
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v7, v8, vcc
+; GFX942-NEXT:    v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX942-NEXT:    v_and_or_b32 v2, v3, v5, v2
+; GFX942-NEXT:    buffer_wbl2 sc0 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc0 sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v3, v2
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB55_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_system_atomic_fadd_noret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -14242,14 +14242,14 @@ define <2 x half> @flat_agent_atomic_fadd_ret_v2f16__amdgpu_no_fine_grained_memo
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fadd_ret_v2f16__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_pk_add_f16 v0, v[0:1], v2 sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fadd_ret_v2f16__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_pk_add_f16 v0, v[0:1], v2 sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fadd_ret_v2f16__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -14431,14 +14431,14 @@ define <2 x half> @flat_agent_atomic_fadd_ret_v2f16__offset12b_pos__amdgpu_no_fi
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fadd_ret_v2f16__offset12b_pos__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_pk_add_f16 v0, v[0:1], v2 offset:2044 sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fadd_ret_v2f16__offset12b_pos__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_pk_add_f16 v0, v[0:1], v2 offset:2044 sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fadd_ret_v2f16__offset12b_pos__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -14623,17 +14623,17 @@ define <2 x half> @flat_agent_atomic_fadd_ret_v2f16__offset12b_neg__amdgpu_no_fi
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fadd_ret_v2f16__offset12b_neg__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_add_co_u32_e32 v0, vcc, 0xfffff800, v0
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_pk_add_f16 v0, v[0:1], v2 sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fadd_ret_v2f16__offset12b_neg__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_add_co_u32_e32 v0, vcc, 0xfffff800, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_pk_add_f16 v0, v[0:1], v2 sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fadd_ret_v2f16__offset12b_neg__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -14829,14 +14829,14 @@ define void @flat_agent_atomic_fadd_noret_v2f16__amdgpu_no_fine_grained_memory(p
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fadd_noret_v2f16__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_pk_add_f16 v[0:1], v2
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fadd_noret_v2f16__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_pk_add_f16 v[0:1], v2
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fadd_noret_v2f16__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -15010,14 +15010,14 @@ define void @flat_agent_atomic_fadd_noret_v2f16__offset12b_pos__amdgpu_no_fine_g
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fadd_noret_v2f16__offset12b_pos__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_pk_add_f16 v[0:1], v2 offset:2044
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fadd_noret_v2f16__offset12b_pos__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_pk_add_f16 v[0:1], v2 offset:2044
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fadd_noret_v2f16__offset12b_pos__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -15198,17 +15198,17 @@ define void @flat_agent_atomic_fadd_noret_v2f16__offset12b_neg__amdgpu_no_fine_g
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fadd_noret_v2f16__offset12b_neg__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_add_co_u32_e32 v0, vcc, 0xfffff800, v0
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_pk_add_f16 v[0:1], v2
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fadd_noret_v2f16__offset12b_neg__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_add_co_u32_e32 v0, vcc, 0xfffff800, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_pk_add_f16 v[0:1], v2
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fadd_noret_v2f16__offset12b_neg__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -15402,14 +15402,14 @@ define <2 x half> @flat_system_atomic_fadd_ret_v2f16__offset12b_pos__amdgpu_no_f
 ; GFX12-NEXT:    global_inv scope:SCOPE_SYS
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_system_atomic_fadd_ret_v2f16__offset12b_pos__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_wbl2 sc0 sc1
-; GFX940-NEXT:    flat_atomic_pk_add_f16 v0, v[0:1], v2 offset:2044 sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc0 sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_system_atomic_fadd_ret_v2f16__offset12b_pos__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_wbl2 sc0 sc1
+; GFX942-NEXT:    flat_atomic_pk_add_f16 v0, v[0:1], v2 offset:2044 sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc0 sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_system_atomic_fadd_ret_v2f16__offset12b_pos__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -15597,14 +15597,14 @@ define void @flat_system_atomic_fadd_noret_v2f16__offset12b_pos__amdgpu_no_fine_
 ; GFX12-NEXT:    global_inv scope:SCOPE_SYS
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_system_atomic_fadd_noret_v2f16__offset12b_pos__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_wbl2 sc0 sc1
-; GFX940-NEXT:    flat_atomic_pk_add_f16 v[0:1], v2 offset:2044 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc0 sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_system_atomic_fadd_noret_v2f16__offset12b_pos__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_wbl2 sc0 sc1
+; GFX942-NEXT:    flat_atomic_pk_add_f16 v[0:1], v2 offset:2044 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc0 sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_system_atomic_fadd_noret_v2f16__offset12b_pos__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -15787,14 +15787,14 @@ define <2 x half> @flat_agent_atomic_fadd_ret_v2f16__amdgpu_no_remote_memory(ptr
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fadd_ret_v2f16__amdgpu_no_remote_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_pk_add_f16 v0, v[0:1], v2 sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fadd_ret_v2f16__amdgpu_no_remote_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_pk_add_f16 v0, v[0:1], v2 sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fadd_ret_v2f16__amdgpu_no_remote_memory:
 ; GFX11:       ; %bb.0:
@@ -15976,14 +15976,14 @@ define void @flat_agent_atomic_fadd_noret_v2f16__amdgpu_no_remote_memory(ptr %pt
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fadd_noret_v2f16__amdgpu_no_remote_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_pk_add_f16 v[0:1], v2
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fadd_noret_v2f16__amdgpu_no_remote_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_pk_add_f16 v[0:1], v2
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fadd_noret_v2f16__amdgpu_no_remote_memory:
 ; GFX11:       ; %bb.0:
@@ -16157,14 +16157,14 @@ define <2 x half> @flat_agent_atomic_fadd_ret_v2f16__amdgpu_no_fine_grained_memo
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fadd_ret_v2f16__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_pk_add_f16 v0, v[0:1], v2 sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fadd_ret_v2f16__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_pk_add_f16 v0, v[0:1], v2 sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fadd_ret_v2f16__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory:
 ; GFX11:       ; %bb.0:
@@ -16346,14 +16346,14 @@ define void @flat_agent_atomic_fadd_noret_v2f16__amdgpu_no_fine_grained_memory__
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fadd_noret_v2f16__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_pk_add_f16 v[0:1], v2
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fadd_noret_v2f16__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_pk_add_f16 v[0:1], v2
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fadd_noret_v2f16__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory:
 ; GFX11:       ; %bb.0:
@@ -16531,14 +16531,14 @@ define <2 x bfloat> @flat_agent_atomic_fadd_ret_v2bf16__amdgpu_no_fine_grained_m
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fadd_ret_v2bf16__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_pk_add_bf16 v0, v[0:1], v2 sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fadd_ret_v2bf16__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_pk_add_bf16 v0, v[0:1], v2 sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fadd_ret_v2bf16__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -16806,14 +16806,14 @@ define <2 x bfloat> @flat_agent_atomic_fadd_ret_v2bf16__offset12b_pos__amdgpu_no
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fadd_ret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_pk_add_bf16 v0, v[0:1], v2 offset:2044 sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fadd_ret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_pk_add_bf16 v0, v[0:1], v2 offset:2044 sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fadd_ret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -17084,17 +17084,17 @@ define <2 x bfloat> @flat_agent_atomic_fadd_ret_v2bf16__offset12b_neg__amdgpu_no
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fadd_ret_v2bf16__offset12b_neg__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_add_co_u32_e32 v0, vcc, 0xfffff800, v0
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_pk_add_bf16 v0, v[0:1], v2 sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fadd_ret_v2bf16__offset12b_neg__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_add_co_u32_e32 v0, vcc, 0xfffff800, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_pk_add_bf16 v0, v[0:1], v2 sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fadd_ret_v2bf16__offset12b_neg__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -17376,14 +17376,14 @@ define void @flat_agent_atomic_fadd_noret_v2bf16__amdgpu_no_fine_grained_memory(
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fadd_noret_v2bf16__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_pk_add_bf16 v[0:1], v2
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fadd_noret_v2bf16__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_pk_add_bf16 v[0:1], v2
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fadd_noret_v2bf16__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -17643,14 +17643,14 @@ define void @flat_agent_atomic_fadd_noret_v2bf16__offset12b_pos__amdgpu_no_fine_
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fadd_noret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_pk_add_bf16 v[0:1], v2 offset:2044
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fadd_noret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_pk_add_bf16 v[0:1], v2 offset:2044
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fadd_noret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -17917,17 +17917,17 @@ define void @flat_agent_atomic_fadd_noret_v2bf16__offset12b_neg__amdgpu_no_fine_
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fadd_noret_v2bf16__offset12b_neg__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_add_co_u32_e32 v0, vcc, 0xfffff800, v0
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_pk_add_bf16 v[0:1], v2
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fadd_noret_v2bf16__offset12b_neg__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_add_co_u32_e32 v0, vcc, 0xfffff800, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_pk_add_bf16 v[0:1], v2
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fadd_noret_v2bf16__offset12b_neg__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -18207,14 +18207,14 @@ define <2 x bfloat> @flat_system_atomic_fadd_ret_v2bf16__offset12b_pos__amdgpu_n
 ; GFX12-NEXT:    global_inv scope:SCOPE_SYS
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_system_atomic_fadd_ret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_wbl2 sc0 sc1
-; GFX940-NEXT:    flat_atomic_pk_add_bf16 v0, v[0:1], v2 offset:2044 sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc0 sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_system_atomic_fadd_ret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_wbl2 sc0 sc1
+; GFX942-NEXT:    flat_atomic_pk_add_bf16 v0, v[0:1], v2 offset:2044 sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc0 sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_system_atomic_fadd_ret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -18488,14 +18488,14 @@ define void @flat_system_atomic_fadd_noret_v2bf16__offset12b_pos__amdgpu_no_fine
 ; GFX12-NEXT:    global_inv scope:SCOPE_SYS
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_system_atomic_fadd_noret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_wbl2 sc0 sc1
-; GFX940-NEXT:    flat_atomic_pk_add_bf16 v[0:1], v2 offset:2044 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc0 sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_system_atomic_fadd_noret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_wbl2 sc0 sc1
+; GFX942-NEXT:    flat_atomic_pk_add_bf16 v[0:1], v2 offset:2044 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc0 sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_system_atomic_fadd_noret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -18764,14 +18764,14 @@ define <2 x bfloat> @flat_agent_atomic_fadd_ret_v2bf16__amdgpu_no_remote_memory(
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fadd_ret_v2bf16__amdgpu_no_remote_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_pk_add_bf16 v0, v[0:1], v2 sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fadd_ret_v2bf16__amdgpu_no_remote_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_pk_add_bf16 v0, v[0:1], v2 sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fadd_ret_v2bf16__amdgpu_no_remote_memory:
 ; GFX11:       ; %bb.0:
@@ -19039,14 +19039,14 @@ define void @flat_agent_atomic_fadd_noret_v2bf16__amdgpu_no_remote_memory(ptr %p
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fadd_noret_v2bf16__amdgpu_no_remote_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_pk_add_bf16 v[0:1], v2
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fadd_noret_v2bf16__amdgpu_no_remote_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_pk_add_bf16 v[0:1], v2
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fadd_noret_v2bf16__amdgpu_no_remote_memory:
 ; GFX11:       ; %bb.0:
@@ -19306,14 +19306,14 @@ define <2 x bfloat> @flat_agent_atomic_fadd_ret_v2bf16__amdgpu_no_fine_grained_m
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fadd_ret_v2bf16__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_pk_add_bf16 v0, v[0:1], v2 sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fadd_ret_v2bf16__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_pk_add_bf16 v0, v[0:1], v2 sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fadd_ret_v2bf16__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory:
 ; GFX11:       ; %bb.0:
@@ -19581,14 +19581,14 @@ define void @flat_agent_atomic_fadd_noret_v2bf16__amdgpu_no_fine_grained_memory_
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fadd_noret_v2bf16__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_pk_add_bf16 v[0:1], v2
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fadd_noret_v2bf16__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_pk_add_bf16 v[0:1], v2
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fadd_noret_v2bf16__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory:
 ; GFX11:       ; %bb.0:
diff --git llvm/test/CodeGen/AMDGPU/flat-atomicrmw-fmax.ll llvm/test/CodeGen/AMDGPU/flat-atomicrmw-fmax.ll
index 5aa9be627594..f2a69b999fe6 100644
--- llvm/test/CodeGen/AMDGPU/flat-atomicrmw-fmax.ll
+++ llvm/test/CodeGen/AMDGPU/flat-atomicrmw-fmax.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
 ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 < %s | FileCheck -check-prefix=GFX12 %s
-; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx940 < %s | FileCheck -check-prefix=GFX940 %s
+; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx942 < %s | FileCheck -check-prefix=GFX942 %s
 ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 < %s | FileCheck -check-prefix=GFX11 %s
 ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s
 ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx90a < %s | FileCheck -check-prefix=GFX90A %s
@@ -26,30 +26,30 @@ define float @flat_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_memory(ptr
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    flat_load_dword v3, v[0:1]
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    v_max_f32_e32 v2, v2, v2
-; GFX940-NEXT:  .LBB0_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v5, v3
-; GFX940-NEXT:    v_max_f32_e32 v3, v5, v5
-; GFX940-NEXT:    v_max_f32_e32 v4, v3, v2
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v3, v[0:1], v[4:5] sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB0_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v0, v3
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    flat_load_dword v3, v[0:1]
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    v_max_f32_e32 v2, v2, v2
+; GFX942-NEXT:  .LBB0_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v5, v3
+; GFX942-NEXT:    v_max_f32_e32 v3, v5, v5
+; GFX942-NEXT:    v_max_f32_e32 v4, v3, v2
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v3, v[0:1], v[4:5] sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB0_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v0, v3
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -168,30 +168,30 @@ define float @flat_agent_atomic_fmax_ret_f32__offset12b_pos__amdgpu_no_fine_grai
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fmax_ret_f32__offset12b_pos__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    flat_load_dword v3, v[0:1] offset:2044
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    v_max_f32_e32 v2, v2, v2
-; GFX940-NEXT:  .LBB1_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v5, v3
-; GFX940-NEXT:    v_max_f32_e32 v3, v5, v5
-; GFX940-NEXT:    v_max_f32_e32 v4, v3, v2
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2044 sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB1_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v0, v3
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fmax_ret_f32__offset12b_pos__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    flat_load_dword v3, v[0:1] offset:2044
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    v_max_f32_e32 v2, v2, v2
+; GFX942-NEXT:  .LBB1_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v5, v3
+; GFX942-NEXT:    v_max_f32_e32 v3, v5, v5
+; GFX942-NEXT:    v_max_f32_e32 v4, v3, v2
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2044 sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB1_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v0, v3
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fmax_ret_f32__offset12b_pos__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -316,37 +316,37 @@ define float @flat_agent_atomic_fmax_ret_f32__offset12b_neg__amdgpu_no_fine_grai
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fmax_ret_f32__offset12b_neg__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v4, v0
-; GFX940-NEXT:    v_mov_b32_e32 v5, v1
-; GFX940-NEXT:    v_add_co_u32_e32 v0, vcc, 0xfffff800, v4
-; GFX940-NEXT:    s_movk_i32 s0, 0xf800
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v5, vcc
-; GFX940-NEXT:    flat_load_dword v0, v[0:1]
-; GFX940-NEXT:    s_mov_b32 s1, -1
-; GFX940-NEXT:    v_lshl_add_u64 v[4:5], v[4:5], 0, s[0:1]
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    v_max_f32_e32 v1, v2, v2
-; GFX940-NEXT:  .LBB2_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v3, v0
-; GFX940-NEXT:    v_max_f32_e32 v0, v3, v3
-; GFX940-NEXT:    v_max_f32_e32 v2, v0, v1
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v0, v[4:5], v[2:3] sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v0, v3
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB2_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fmax_ret_f32__offset12b_neg__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v4, v0
+; GFX942-NEXT:    v_mov_b32_e32 v5, v1
+; GFX942-NEXT:    v_add_co_u32_e32 v0, vcc, 0xfffff800, v4
+; GFX942-NEXT:    s_movk_i32 s0, 0xf800
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v5, vcc
+; GFX942-NEXT:    flat_load_dword v0, v[0:1]
+; GFX942-NEXT:    s_mov_b32 s1, -1
+; GFX942-NEXT:    v_lshl_add_u64 v[4:5], v[4:5], 0, s[0:1]
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    v_max_f32_e32 v1, v2, v2
+; GFX942-NEXT:  .LBB2_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v3, v0
+; GFX942-NEXT:    v_max_f32_e32 v0, v3, v3
+; GFX942-NEXT:    v_max_f32_e32 v2, v0, v1
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v0, v[4:5], v[2:3] sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v0, v3
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB2_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fmax_ret_f32__offset12b_neg__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -479,29 +479,29 @@ define void @flat_agent_atomic_fmax_noret_f32__amdgpu_no_fine_grained_memory(ptr
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fmax_noret_f32__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    flat_load_dword v3, v[0:1]
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    v_max_f32_e32 v4, v2, v2
-; GFX940-NEXT:  .LBB3_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_max_f32_e32 v2, v3, v3
-; GFX940-NEXT:    v_max_f32_e32 v2, v2, v4
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v3, v2
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB3_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fmax_noret_f32__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    flat_load_dword v3, v[0:1]
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    v_max_f32_e32 v4, v2, v2
+; GFX942-NEXT:  .LBB3_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_max_f32_e32 v2, v3, v3
+; GFX942-NEXT:    v_max_f32_e32 v2, v2, v4
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v3, v2
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB3_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fmax_noret_f32__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -619,29 +619,29 @@ define void @flat_agent_atomic_fmax_noret_f32__offset12b_pos__amdgpu_no_fine_gra
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fmax_noret_f32__offset12b_pos__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    flat_load_dword v3, v[0:1] offset:2044
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    v_max_f32_e32 v4, v2, v2
-; GFX940-NEXT:  .LBB4_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_max_f32_e32 v2, v3, v3
-; GFX940-NEXT:    v_max_f32_e32 v2, v2, v4
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2044 sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v3, v2
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB4_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fmax_noret_f32__offset12b_pos__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    flat_load_dword v3, v[0:1] offset:2044
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    v_max_f32_e32 v4, v2, v2
+; GFX942-NEXT:  .LBB4_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_max_f32_e32 v2, v3, v3
+; GFX942-NEXT:    v_max_f32_e32 v2, v2, v4
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2044 sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v3, v2
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB4_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fmax_noret_f32__offset12b_pos__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -766,35 +766,35 @@ define void @flat_agent_atomic_fmax_noret_f32__offset12b_neg__amdgpu_no_fine_gra
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fmax_noret_f32__offset12b_neg__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_add_co_u32_e32 v4, vcc, 0xfffff800, v0
-; GFX940-NEXT:    s_movk_i32 s0, 0xf800
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_addc_co_u32_e32 v5, vcc, -1, v1, vcc
-; GFX940-NEXT:    flat_load_dword v3, v[4:5]
-; GFX940-NEXT:    s_mov_b32 s1, -1
-; GFX940-NEXT:    v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1]
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    v_max_f32_e32 v4, v2, v2
-; GFX940-NEXT:  .LBB5_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_max_f32_e32 v2, v3, v3
-; GFX940-NEXT:    v_max_f32_e32 v2, v2, v4
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v3, v2
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB5_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fmax_noret_f32__offset12b_neg__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_add_co_u32_e32 v4, vcc, 0xfffff800, v0
+; GFX942-NEXT:    s_movk_i32 s0, 0xf800
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_addc_co_u32_e32 v5, vcc, -1, v1, vcc
+; GFX942-NEXT:    flat_load_dword v3, v[4:5]
+; GFX942-NEXT:    s_mov_b32 s1, -1
+; GFX942-NEXT:    v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1]
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    v_max_f32_e32 v4, v2, v2
+; GFX942-NEXT:  .LBB5_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_max_f32_e32 v2, v3, v3
+; GFX942-NEXT:    v_max_f32_e32 v2, v2, v4
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v3, v2
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB5_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fmax_noret_f32__offset12b_neg__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -930,30 +930,30 @@ define float @flat_system_atomic_fmax_ret_f32__offset12b_pos__amdgpu_no_fine_gra
 ; GFX12-NEXT:    global_inv scope:SCOPE_SYS
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_system_atomic_fmax_ret_f32__offset12b_pos__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    flat_load_dword v3, v[0:1] offset:2044
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    v_max_f32_e32 v2, v2, v2
-; GFX940-NEXT:  .LBB6_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v5, v3
-; GFX940-NEXT:    v_max_f32_e32 v3, v5, v5
-; GFX940-NEXT:    v_max_f32_e32 v4, v3, v2
-; GFX940-NEXT:    buffer_wbl2 sc0 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2044 sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc0 sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB6_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v0, v3
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_system_atomic_fmax_ret_f32__offset12b_pos__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    flat_load_dword v3, v[0:1] offset:2044
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    v_max_f32_e32 v2, v2, v2
+; GFX942-NEXT:  .LBB6_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v5, v3
+; GFX942-NEXT:    v_max_f32_e32 v3, v5, v5
+; GFX942-NEXT:    v_max_f32_e32 v4, v3, v2
+; GFX942-NEXT:    buffer_wbl2 sc0 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2044 sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc0 sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB6_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v0, v3
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_system_atomic_fmax_ret_f32__offset12b_pos__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -1081,29 +1081,29 @@ define void @flat_system_atomic_fmax_noret_f32__offset12b_pos__amdgpu_no_fine_gr
 ; GFX12-NEXT:    global_inv scope:SCOPE_SYS
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_system_atomic_fmax_noret_f32__offset12b_pos__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    flat_load_dword v3, v[0:1] offset:2044
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    v_max_f32_e32 v4, v2, v2
-; GFX940-NEXT:  .LBB7_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_max_f32_e32 v2, v3, v3
-; GFX940-NEXT:    v_max_f32_e32 v2, v2, v4
-; GFX940-NEXT:    buffer_wbl2 sc0 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2044 sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc0 sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v3, v2
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB7_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_system_atomic_fmax_noret_f32__offset12b_pos__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    flat_load_dword v3, v[0:1] offset:2044
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    v_max_f32_e32 v4, v2, v2
+; GFX942-NEXT:  .LBB7_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_max_f32_e32 v2, v3, v3
+; GFX942-NEXT:    v_max_f32_e32 v2, v2, v4
+; GFX942-NEXT:    buffer_wbl2 sc0 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2044 sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc0 sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v3, v2
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB7_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_system_atomic_fmax_noret_f32__offset12b_pos__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -1230,30 +1230,30 @@ define float @flat_agent_atomic_fmax_ret_f32__amdgpu_no_remote_memory(ptr %ptr,
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fmax_ret_f32__amdgpu_no_remote_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    flat_load_dword v3, v[0:1]
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    v_max_f32_e32 v2, v2, v2
-; GFX940-NEXT:  .LBB8_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v5, v3
-; GFX940-NEXT:    v_max_f32_e32 v3, v5, v5
-; GFX940-NEXT:    v_max_f32_e32 v4, v3, v2
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v3, v[0:1], v[4:5] sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB8_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v0, v3
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fmax_ret_f32__amdgpu_no_remote_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    flat_load_dword v3, v[0:1]
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    v_max_f32_e32 v2, v2, v2
+; GFX942-NEXT:  .LBB8_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v5, v3
+; GFX942-NEXT:    v_max_f32_e32 v3, v5, v5
+; GFX942-NEXT:    v_max_f32_e32 v4, v3, v2
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v3, v[0:1], v[4:5] sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB8_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v0, v3
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fmax_ret_f32__amdgpu_no_remote_memory:
 ; GFX11:       ; %bb.0:
@@ -1422,30 +1422,30 @@ define float @flat_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_memory__amd
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    flat_load_dword v3, v[0:1]
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    v_max_f32_e32 v2, v2, v2
-; GFX940-NEXT:  .LBB9_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v5, v3
-; GFX940-NEXT:    v_max_f32_e32 v3, v5, v5
-; GFX940-NEXT:    v_max_f32_e32 v4, v3, v2
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v3, v[0:1], v[4:5] sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB9_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v0, v3
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    flat_load_dword v3, v[0:1]
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    v_max_f32_e32 v2, v2, v2
+; GFX942-NEXT:  .LBB9_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v5, v3
+; GFX942-NEXT:    v_max_f32_e32 v3, v5, v5
+; GFX942-NEXT:    v_max_f32_e32 v4, v3, v2
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v3, v[0:1], v[4:5] sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB9_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v0, v3
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory:
 ; GFX11:       ; %bb.0:
@@ -1568,30 +1568,30 @@ define float @flat_agent_atomic_fmax_ret_f32__ftz__amdgpu_no_fine_grained_memory
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fmax_ret_f32__ftz__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    flat_load_dword v3, v[0:1]
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    v_max_f32_e32 v2, v2, v2
-; GFX940-NEXT:  .LBB10_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v5, v3
-; GFX940-NEXT:    v_max_f32_e32 v3, v5, v5
-; GFX940-NEXT:    v_max_f32_e32 v4, v3, v2
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v3, v[0:1], v[4:5] sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB10_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v0, v3
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fmax_ret_f32__ftz__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    flat_load_dword v3, v[0:1]
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    v_max_f32_e32 v2, v2, v2
+; GFX942-NEXT:  .LBB10_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v5, v3
+; GFX942-NEXT:    v_max_f32_e32 v3, v5, v5
+; GFX942-NEXT:    v_max_f32_e32 v4, v3, v2
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v3, v[0:1], v[4:5] sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB10_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v0, v3
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fmax_ret_f32__ftz__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -1710,30 +1710,30 @@ define float @flat_agent_atomic_fmax_ret_f32__offset12b_pos__ftz__amdgpu_no_fine
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fmax_ret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    flat_load_dword v3, v[0:1] offset:2044
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    v_max_f32_e32 v2, v2, v2
-; GFX940-NEXT:  .LBB11_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v5, v3
-; GFX940-NEXT:    v_max_f32_e32 v3, v5, v5
-; GFX940-NEXT:    v_max_f32_e32 v4, v3, v2
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2044 sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB11_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v0, v3
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fmax_ret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    flat_load_dword v3, v[0:1] offset:2044
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    v_max_f32_e32 v2, v2, v2
+; GFX942-NEXT:  .LBB11_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v5, v3
+; GFX942-NEXT:    v_max_f32_e32 v3, v5, v5
+; GFX942-NEXT:    v_max_f32_e32 v4, v3, v2
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2044 sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB11_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v0, v3
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fmax_ret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -1858,37 +1858,37 @@ define float @flat_agent_atomic_fmax_ret_f32__offset12b_neg__ftz__amdgpu_no_fine
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fmax_ret_f32__offset12b_neg__ftz__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v4, v0
-; GFX940-NEXT:    v_mov_b32_e32 v5, v1
-; GFX940-NEXT:    v_add_co_u32_e32 v0, vcc, 0xfffff800, v4
-; GFX940-NEXT:    s_movk_i32 s0, 0xf800
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v5, vcc
-; GFX940-NEXT:    flat_load_dword v0, v[0:1]
-; GFX940-NEXT:    s_mov_b32 s1, -1
-; GFX940-NEXT:    v_lshl_add_u64 v[4:5], v[4:5], 0, s[0:1]
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    v_max_f32_e32 v1, v2, v2
-; GFX940-NEXT:  .LBB12_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v3, v0
-; GFX940-NEXT:    v_max_f32_e32 v0, v3, v3
-; GFX940-NEXT:    v_max_f32_e32 v2, v0, v1
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v0, v[4:5], v[2:3] sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v0, v3
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB12_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fmax_ret_f32__offset12b_neg__ftz__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v4, v0
+; GFX942-NEXT:    v_mov_b32_e32 v5, v1
+; GFX942-NEXT:    v_add_co_u32_e32 v0, vcc, 0xfffff800, v4
+; GFX942-NEXT:    s_movk_i32 s0, 0xf800
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v5, vcc
+; GFX942-NEXT:    flat_load_dword v0, v[0:1]
+; GFX942-NEXT:    s_mov_b32 s1, -1
+; GFX942-NEXT:    v_lshl_add_u64 v[4:5], v[4:5], 0, s[0:1]
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    v_max_f32_e32 v1, v2, v2
+; GFX942-NEXT:  .LBB12_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v3, v0
+; GFX942-NEXT:    v_max_f32_e32 v0, v3, v3
+; GFX942-NEXT:    v_max_f32_e32 v2, v0, v1
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v0, v[4:5], v[2:3] sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v0, v3
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB12_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fmax_ret_f32__offset12b_neg__ftz__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -2021,29 +2021,29 @@ define void @flat_agent_atomic_fmax_noret_f32__ftz__amdgpu_no_fine_grained_memor
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fmax_noret_f32__ftz__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    flat_load_dword v3, v[0:1]
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    v_max_f32_e32 v4, v2, v2
-; GFX940-NEXT:  .LBB13_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_max_f32_e32 v2, v3, v3
-; GFX940-NEXT:    v_max_f32_e32 v2, v2, v4
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v3, v2
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB13_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fmax_noret_f32__ftz__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    flat_load_dword v3, v[0:1]
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    v_max_f32_e32 v4, v2, v2
+; GFX942-NEXT:  .LBB13_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_max_f32_e32 v2, v3, v3
+; GFX942-NEXT:    v_max_f32_e32 v2, v2, v4
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v3, v2
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB13_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fmax_noret_f32__ftz__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -2161,29 +2161,29 @@ define void @flat_agent_atomic_fmax_noret_f32__offset12b_pos__ftz__amdgpu_no_fin
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fmax_noret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    flat_load_dword v3, v[0:1] offset:2044
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    v_max_f32_e32 v4, v2, v2
-; GFX940-NEXT:  .LBB14_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_max_f32_e32 v2, v3, v3
-; GFX940-NEXT:    v_max_f32_e32 v2, v2, v4
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2044 sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v3, v2
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB14_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fmax_noret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    flat_load_dword v3, v[0:1] offset:2044
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    v_max_f32_e32 v4, v2, v2
+; GFX942-NEXT:  .LBB14_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_max_f32_e32 v2, v3, v3
+; GFX942-NEXT:    v_max_f32_e32 v2, v2, v4
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2044 sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v3, v2
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB14_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fmax_noret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -2308,35 +2308,35 @@ define void @flat_agent_atomic_fmax_noret_f32__offset12b_neg__ftz__amdgpu_no_fin
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fmax_noret_f32__offset12b_neg__ftz__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_add_co_u32_e32 v4, vcc, 0xfffff800, v0
-; GFX940-NEXT:    s_movk_i32 s0, 0xf800
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_addc_co_u32_e32 v5, vcc, -1, v1, vcc
-; GFX940-NEXT:    flat_load_dword v3, v[4:5]
-; GFX940-NEXT:    s_mov_b32 s1, -1
-; GFX940-NEXT:    v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1]
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    v_max_f32_e32 v4, v2, v2
-; GFX940-NEXT:  .LBB15_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_max_f32_e32 v2, v3, v3
-; GFX940-NEXT:    v_max_f32_e32 v2, v2, v4
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v3, v2
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB15_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fmax_noret_f32__offset12b_neg__ftz__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_add_co_u32_e32 v4, vcc, 0xfffff800, v0
+; GFX942-NEXT:    s_movk_i32 s0, 0xf800
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_addc_co_u32_e32 v5, vcc, -1, v1, vcc
+; GFX942-NEXT:    flat_load_dword v3, v[4:5]
+; GFX942-NEXT:    s_mov_b32 s1, -1
+; GFX942-NEXT:    v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1]
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    v_max_f32_e32 v4, v2, v2
+; GFX942-NEXT:  .LBB15_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_max_f32_e32 v2, v3, v3
+; GFX942-NEXT:    v_max_f32_e32 v2, v2, v4
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v3, v2
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB15_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fmax_noret_f32__offset12b_neg__ftz__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -2472,30 +2472,30 @@ define float @flat_system_atomic_fmax_ret_f32__offset12b_pos__ftz__amdgpu_no_fin
 ; GFX12-NEXT:    global_inv scope:SCOPE_SYS
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_system_atomic_fmax_ret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    flat_load_dword v3, v[0:1] offset:2044
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    v_max_f32_e32 v2, v2, v2
-; GFX940-NEXT:  .LBB16_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v5, v3
-; GFX940-NEXT:    v_max_f32_e32 v3, v5, v5
-; GFX940-NEXT:    v_max_f32_e32 v4, v3, v2
-; GFX940-NEXT:    buffer_wbl2 sc0 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2044 sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc0 sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB16_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v0, v3
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_system_atomic_fmax_ret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    flat_load_dword v3, v[0:1] offset:2044
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    v_max_f32_e32 v2, v2, v2
+; GFX942-NEXT:  .LBB16_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v5, v3
+; GFX942-NEXT:    v_max_f32_e32 v3, v5, v5
+; GFX942-NEXT:    v_max_f32_e32 v4, v3, v2
+; GFX942-NEXT:    buffer_wbl2 sc0 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2044 sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc0 sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB16_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v0, v3
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_system_atomic_fmax_ret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -2623,29 +2623,29 @@ define void @flat_system_atomic_fmax_noret_f32__offset12b_pos__ftz__amdgpu_no_fi
 ; GFX12-NEXT:    global_inv scope:SCOPE_SYS
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_system_atomic_fmax_noret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    flat_load_dword v3, v[0:1] offset:2044
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    v_max_f32_e32 v4, v2, v2
-; GFX940-NEXT:  .LBB17_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_max_f32_e32 v2, v3, v3
-; GFX940-NEXT:    v_max_f32_e32 v2, v2, v4
-; GFX940-NEXT:    buffer_wbl2 sc0 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2044 sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc0 sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v3, v2
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB17_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_system_atomic_fmax_noret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    flat_load_dword v3, v[0:1] offset:2044
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    v_max_f32_e32 v4, v2, v2
+; GFX942-NEXT:  .LBB17_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_max_f32_e32 v2, v3, v3
+; GFX942-NEXT:    v_max_f32_e32 v2, v2, v4
+; GFX942-NEXT:    buffer_wbl2 sc0 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2044 sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc0 sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v3, v2
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB17_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_system_atomic_fmax_noret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -2822,45 +2822,45 @@ define double @flat_agent_atomic_fmax_ret_f64__amdgpu_no_fine_grained_memory(ptr
 ; GFX12-NEXT:    v_dual_mov_b32 v0, v2 :: v_dual_mov_b32 v1, v3
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fmax_ret_f64__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v5, v1
-; GFX940-NEXT:    s_mov_b64 s[0:1], src_private_base
-; GFX940-NEXT:    v_mov_b32_e32 v4, v0
-; GFX940-NEXT:    v_cmp_ne_u32_e32 vcc, s1, v5
-; GFX940-NEXT:    ; implicit-def: $vgpr0_vgpr1
-; GFX940-NEXT:    s_and_saveexec_b64 s[0:1], vcc
-; GFX940-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB18_3
-; GFX940-NEXT:  ; %bb.1: ; %Flow
-; GFX940-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB18_4
-; GFX940-NEXT:  .LBB18_2: ; %atomicrmw.phi
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
-; GFX940-NEXT:  .LBB18_3: ; %atomicrmw.global
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_max_f64 v[0:1], v[4:5], v[2:3] sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    ; implicit-def: $vgpr4_vgpr5
-; GFX940-NEXT:    ; implicit-def: $vgpr2_vgpr3
-; GFX940-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
-; GFX940-NEXT:    s_cbranch_execz .LBB18_2
-; GFX940-NEXT:  .LBB18_4: ; %atomicrmw.private
-; GFX940-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[4:5]
-; GFX940-NEXT:    v_max_f64 v[2:3], v[2:3], v[2:3]
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cndmask_b32_e32 v6, -1, v4, vcc
-; GFX940-NEXT:    scratch_load_dwordx2 v[0:1], v6, off
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_max_f64 v[4:5], v[0:1], v[0:1]
-; GFX940-NEXT:    v_max_f64 v[2:3], v[4:5], v[2:3]
-; GFX940-NEXT:    scratch_store_dwordx2 v6, v[2:3], off sc0 sc1
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fmax_ret_f64__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v5, v1
+; GFX942-NEXT:    s_mov_b64 s[0:1], src_private_base
+; GFX942-NEXT:    v_mov_b32_e32 v4, v0
+; GFX942-NEXT:    v_cmp_ne_u32_e32 vcc, s1, v5
+; GFX942-NEXT:    ; implicit-def: $vgpr0_vgpr1
+; GFX942-NEXT:    s_and_saveexec_b64 s[0:1], vcc
+; GFX942-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB18_3
+; GFX942-NEXT:  ; %bb.1: ; %Flow
+; GFX942-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB18_4
+; GFX942-NEXT:  .LBB18_2: ; %atomicrmw.phi
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-NEXT:  .LBB18_3: ; %atomicrmw.global
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_max_f64 v[0:1], v[4:5], v[2:3] sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    ; implicit-def: $vgpr4_vgpr5
+; GFX942-NEXT:    ; implicit-def: $vgpr2_vgpr3
+; GFX942-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
+; GFX942-NEXT:    s_cbranch_execz .LBB18_2
+; GFX942-NEXT:  .LBB18_4: ; %atomicrmw.private
+; GFX942-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[4:5]
+; GFX942-NEXT:    v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v6, -1, v4, vcc
+; GFX942-NEXT:    scratch_load_dwordx2 v[0:1], v6, off
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_max_f64 v[4:5], v[0:1], v[0:1]
+; GFX942-NEXT:    v_max_f64 v[2:3], v[4:5], v[2:3]
+; GFX942-NEXT:    scratch_store_dwordx2 v6, v[2:3], off sc0 sc1
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fmax_ret_f64__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -3220,45 +3220,45 @@ define double @flat_agent_atomic_fmax_ret_f64__offset12b_pos__amdgpu_no_fine_gra
 ; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s0
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fmax_ret_f64__offset12b_pos__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0x7f8
-; GFX940-NEXT:    v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1]
-; GFX940-NEXT:    s_mov_b64 s[0:1], src_private_base
-; GFX940-NEXT:    v_cmp_ne_u32_e32 vcc, s1, v5
-; GFX940-NEXT:    ; implicit-def: $vgpr0_vgpr1
-; GFX940-NEXT:    s_and_saveexec_b64 s[0:1], vcc
-; GFX940-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB19_3
-; GFX940-NEXT:  ; %bb.1: ; %Flow
-; GFX940-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB19_4
-; GFX940-NEXT:  .LBB19_2: ; %atomicrmw.phi
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
-; GFX940-NEXT:  .LBB19_3: ; %atomicrmw.global
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_max_f64 v[0:1], v[4:5], v[2:3] sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    ; implicit-def: $vgpr4_vgpr5
-; GFX940-NEXT:    ; implicit-def: $vgpr2_vgpr3
-; GFX940-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
-; GFX940-NEXT:    s_cbranch_execz .LBB19_2
-; GFX940-NEXT:  .LBB19_4: ; %atomicrmw.private
-; GFX940-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[4:5]
-; GFX940-NEXT:    v_max_f64 v[2:3], v[2:3], v[2:3]
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cndmask_b32_e32 v6, -1, v4, vcc
-; GFX940-NEXT:    scratch_load_dwordx2 v[0:1], v6, off
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_max_f64 v[4:5], v[0:1], v[0:1]
-; GFX940-NEXT:    v_max_f64 v[2:3], v[4:5], v[2:3]
-; GFX940-NEXT:    scratch_store_dwordx2 v6, v[2:3], off sc0 sc1
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fmax_ret_f64__offset12b_pos__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0x7f8
+; GFX942-NEXT:    v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1]
+; GFX942-NEXT:    s_mov_b64 s[0:1], src_private_base
+; GFX942-NEXT:    v_cmp_ne_u32_e32 vcc, s1, v5
+; GFX942-NEXT:    ; implicit-def: $vgpr0_vgpr1
+; GFX942-NEXT:    s_and_saveexec_b64 s[0:1], vcc
+; GFX942-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB19_3
+; GFX942-NEXT:  ; %bb.1: ; %Flow
+; GFX942-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB19_4
+; GFX942-NEXT:  .LBB19_2: ; %atomicrmw.phi
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-NEXT:  .LBB19_3: ; %atomicrmw.global
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_max_f64 v[0:1], v[4:5], v[2:3] sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    ; implicit-def: $vgpr4_vgpr5
+; GFX942-NEXT:    ; implicit-def: $vgpr2_vgpr3
+; GFX942-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
+; GFX942-NEXT:    s_cbranch_execz .LBB19_2
+; GFX942-NEXT:  .LBB19_4: ; %atomicrmw.private
+; GFX942-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[4:5]
+; GFX942-NEXT:    v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v6, -1, v4, vcc
+; GFX942-NEXT:    scratch_load_dwordx2 v[0:1], v6, off
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_max_f64 v[4:5], v[0:1], v[0:1]
+; GFX942-NEXT:    v_max_f64 v[2:3], v[4:5], v[2:3]
+; GFX942-NEXT:    scratch_store_dwordx2 v6, v[2:3], off sc0 sc1
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fmax_ret_f64__offset12b_pos__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -3633,46 +3633,46 @@ define double @flat_agent_atomic_fmax_ret_f64__offset12b_neg__amdgpu_no_fine_gra
 ; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s0
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fmax_ret_f64__offset12b_neg__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    s_movk_i32 s0, 0xf800
-; GFX940-NEXT:    s_mov_b32 s1, -1
-; GFX940-NEXT:    v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1]
-; GFX940-NEXT:    s_mov_b64 s[0:1], src_private_base
-; GFX940-NEXT:    v_cmp_ne_u32_e32 vcc, s1, v5
-; GFX940-NEXT:    ; implicit-def: $vgpr0_vgpr1
-; GFX940-NEXT:    s_and_saveexec_b64 s[0:1], vcc
-; GFX940-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB20_3
-; GFX940-NEXT:  ; %bb.1: ; %Flow
-; GFX940-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB20_4
-; GFX940-NEXT:  .LBB20_2: ; %atomicrmw.phi
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
-; GFX940-NEXT:  .LBB20_3: ; %atomicrmw.global
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_max_f64 v[0:1], v[4:5], v[2:3] sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    ; implicit-def: $vgpr4_vgpr5
-; GFX940-NEXT:    ; implicit-def: $vgpr2_vgpr3
-; GFX940-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
-; GFX940-NEXT:    s_cbranch_execz .LBB20_2
-; GFX940-NEXT:  .LBB20_4: ; %atomicrmw.private
-; GFX940-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[4:5]
-; GFX940-NEXT:    v_max_f64 v[2:3], v[2:3], v[2:3]
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cndmask_b32_e32 v6, -1, v4, vcc
-; GFX940-NEXT:    scratch_load_dwordx2 v[0:1], v6, off
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_max_f64 v[4:5], v[0:1], v[0:1]
-; GFX940-NEXT:    v_max_f64 v[2:3], v[4:5], v[2:3]
-; GFX940-NEXT:    scratch_store_dwordx2 v6, v[2:3], off sc0 sc1
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fmax_ret_f64__offset12b_neg__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    s_movk_i32 s0, 0xf800
+; GFX942-NEXT:    s_mov_b32 s1, -1
+; GFX942-NEXT:    v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1]
+; GFX942-NEXT:    s_mov_b64 s[0:1], src_private_base
+; GFX942-NEXT:    v_cmp_ne_u32_e32 vcc, s1, v5
+; GFX942-NEXT:    ; implicit-def: $vgpr0_vgpr1
+; GFX942-NEXT:    s_and_saveexec_b64 s[0:1], vcc
+; GFX942-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB20_3
+; GFX942-NEXT:  ; %bb.1: ; %Flow
+; GFX942-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB20_4
+; GFX942-NEXT:  .LBB20_2: ; %atomicrmw.phi
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-NEXT:  .LBB20_3: ; %atomicrmw.global
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_max_f64 v[0:1], v[4:5], v[2:3] sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    ; implicit-def: $vgpr4_vgpr5
+; GFX942-NEXT:    ; implicit-def: $vgpr2_vgpr3
+; GFX942-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
+; GFX942-NEXT:    s_cbranch_execz .LBB20_2
+; GFX942-NEXT:  .LBB20_4: ; %atomicrmw.private
+; GFX942-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[4:5]
+; GFX942-NEXT:    v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v6, -1, v4, vcc
+; GFX942-NEXT:    scratch_load_dwordx2 v[0:1], v6, off
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_max_f64 v[4:5], v[0:1], v[0:1]
+; GFX942-NEXT:    v_max_f64 v[2:3], v[4:5], v[2:3]
+; GFX942-NEXT:    scratch_store_dwordx2 v6, v[2:3], off sc0 sc1
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fmax_ret_f64__offset12b_neg__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -4041,42 +4041,42 @@ define void @flat_agent_atomic_fmax_noret_f64__amdgpu_no_fine_grained_memory(ptr
 ; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s0
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fmax_noret_f64__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    s_mov_b64 s[0:1], src_private_base
-; GFX940-NEXT:    v_cmp_ne_u32_e32 vcc, s1, v1
-; GFX940-NEXT:    s_and_saveexec_b64 s[0:1], vcc
-; GFX940-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB21_3
-; GFX940-NEXT:  ; %bb.1: ; %Flow
-; GFX940-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB21_4
-; GFX940-NEXT:  .LBB21_2: ; %atomicrmw.phi
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
-; GFX940-NEXT:  .LBB21_3: ; %atomicrmw.global
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_max_f64 v[0:1], v[2:3]
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    ; implicit-def: $vgpr0_vgpr1
-; GFX940-NEXT:    ; implicit-def: $vgpr2_vgpr3
-; GFX940-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
-; GFX940-NEXT:    s_cbranch_execz .LBB21_2
-; GFX940-NEXT:  .LBB21_4: ; %atomicrmw.private
-; GFX940-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[0:1]
-; GFX940-NEXT:    v_max_f64 v[2:3], v[2:3], v[2:3]
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cndmask_b32_e32 v4, -1, v0, vcc
-; GFX940-NEXT:    scratch_load_dwordx2 v[0:1], v4, off
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_max_f64 v[0:1], v[0:1], v[0:1]
-; GFX940-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
-; GFX940-NEXT:    scratch_store_dwordx2 v4, v[0:1], off sc0 sc1
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fmax_noret_f64__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    s_mov_b64 s[0:1], src_private_base
+; GFX942-NEXT:    v_cmp_ne_u32_e32 vcc, s1, v1
+; GFX942-NEXT:    s_and_saveexec_b64 s[0:1], vcc
+; GFX942-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB21_3
+; GFX942-NEXT:  ; %bb.1: ; %Flow
+; GFX942-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB21_4
+; GFX942-NEXT:  .LBB21_2: ; %atomicrmw.phi
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-NEXT:  .LBB21_3: ; %atomicrmw.global
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_max_f64 v[0:1], v[2:3]
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    ; implicit-def: $vgpr0_vgpr1
+; GFX942-NEXT:    ; implicit-def: $vgpr2_vgpr3
+; GFX942-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
+; GFX942-NEXT:    s_cbranch_execz .LBB21_2
+; GFX942-NEXT:  .LBB21_4: ; %atomicrmw.private
+; GFX942-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[0:1]
+; GFX942-NEXT:    v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v4, -1, v0, vcc
+; GFX942-NEXT:    scratch_load_dwordx2 v[0:1], v4, off
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX942-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX942-NEXT:    scratch_store_dwordx2 v4, v[0:1], off sc0 sc1
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fmax_noret_f64__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -4431,44 +4431,44 @@ define void @flat_agent_atomic_fmax_noret_f64__offset12b_pos__amdgpu_no_fine_gra
 ; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s0
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fmax_noret_f64__offset12b_pos__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0x7f8
-; GFX940-NEXT:    v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1]
-; GFX940-NEXT:    s_mov_b64 s[0:1], src_private_base
-; GFX940-NEXT:    v_cmp_ne_u32_e32 vcc, s1, v1
-; GFX940-NEXT:    s_and_saveexec_b64 s[0:1], vcc
-; GFX940-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB22_3
-; GFX940-NEXT:  ; %bb.1: ; %Flow
-; GFX940-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB22_4
-; GFX940-NEXT:  .LBB22_2: ; %atomicrmw.phi
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
-; GFX940-NEXT:  .LBB22_3: ; %atomicrmw.global
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_max_f64 v[0:1], v[2:3]
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    ; implicit-def: $vgpr0_vgpr1
-; GFX940-NEXT:    ; implicit-def: $vgpr2_vgpr3
-; GFX940-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
-; GFX940-NEXT:    s_cbranch_execz .LBB22_2
-; GFX940-NEXT:  .LBB22_4: ; %atomicrmw.private
-; GFX940-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[0:1]
-; GFX940-NEXT:    v_max_f64 v[2:3], v[2:3], v[2:3]
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cndmask_b32_e32 v4, -1, v0, vcc
-; GFX940-NEXT:    scratch_load_dwordx2 v[0:1], v4, off
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_max_f64 v[0:1], v[0:1], v[0:1]
-; GFX940-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
-; GFX940-NEXT:    scratch_store_dwordx2 v4, v[0:1], off sc0 sc1
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fmax_noret_f64__offset12b_pos__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0x7f8
+; GFX942-NEXT:    v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1]
+; GFX942-NEXT:    s_mov_b64 s[0:1], src_private_base
+; GFX942-NEXT:    v_cmp_ne_u32_e32 vcc, s1, v1
+; GFX942-NEXT:    s_and_saveexec_b64 s[0:1], vcc
+; GFX942-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB22_3
+; GFX942-NEXT:  ; %bb.1: ; %Flow
+; GFX942-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB22_4
+; GFX942-NEXT:  .LBB22_2: ; %atomicrmw.phi
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-NEXT:  .LBB22_3: ; %atomicrmw.global
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_max_f64 v[0:1], v[2:3]
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    ; implicit-def: $vgpr0_vgpr1
+; GFX942-NEXT:    ; implicit-def: $vgpr2_vgpr3
+; GFX942-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
+; GFX942-NEXT:    s_cbranch_execz .LBB22_2
+; GFX942-NEXT:  .LBB22_4: ; %atomicrmw.private
+; GFX942-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[0:1]
+; GFX942-NEXT:    v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v4, -1, v0, vcc
+; GFX942-NEXT:    scratch_load_dwordx2 v[0:1], v4, off
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX942-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX942-NEXT:    scratch_store_dwordx2 v4, v[0:1], off sc0 sc1
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fmax_noret_f64__offset12b_pos__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -4837,45 +4837,45 @@ define void @flat_agent_atomic_fmax_noret_f64__offset12b_neg__amdgpu_no_fine_gra
 ; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s0
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fmax_noret_f64__offset12b_neg__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    s_movk_i32 s0, 0xf800
-; GFX940-NEXT:    s_mov_b32 s1, -1
-; GFX940-NEXT:    v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1]
-; GFX940-NEXT:    s_mov_b64 s[0:1], src_private_base
-; GFX940-NEXT:    v_cmp_ne_u32_e32 vcc, s1, v1
-; GFX940-NEXT:    s_and_saveexec_b64 s[0:1], vcc
-; GFX940-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB23_3
-; GFX940-NEXT:  ; %bb.1: ; %Flow
-; GFX940-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB23_4
-; GFX940-NEXT:  .LBB23_2: ; %atomicrmw.phi
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
-; GFX940-NEXT:  .LBB23_3: ; %atomicrmw.global
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_max_f64 v[0:1], v[2:3]
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    ; implicit-def: $vgpr0_vgpr1
-; GFX940-NEXT:    ; implicit-def: $vgpr2_vgpr3
-; GFX940-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
-; GFX940-NEXT:    s_cbranch_execz .LBB23_2
-; GFX940-NEXT:  .LBB23_4: ; %atomicrmw.private
-; GFX940-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[0:1]
-; GFX940-NEXT:    v_max_f64 v[2:3], v[2:3], v[2:3]
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cndmask_b32_e32 v4, -1, v0, vcc
-; GFX940-NEXT:    scratch_load_dwordx2 v[0:1], v4, off
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_max_f64 v[0:1], v[0:1], v[0:1]
-; GFX940-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
-; GFX940-NEXT:    scratch_store_dwordx2 v4, v[0:1], off sc0 sc1
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fmax_noret_f64__offset12b_neg__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    s_movk_i32 s0, 0xf800
+; GFX942-NEXT:    s_mov_b32 s1, -1
+; GFX942-NEXT:    v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1]
+; GFX942-NEXT:    s_mov_b64 s[0:1], src_private_base
+; GFX942-NEXT:    v_cmp_ne_u32_e32 vcc, s1, v1
+; GFX942-NEXT:    s_and_saveexec_b64 s[0:1], vcc
+; GFX942-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB23_3
+; GFX942-NEXT:  ; %bb.1: ; %Flow
+; GFX942-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB23_4
+; GFX942-NEXT:  .LBB23_2: ; %atomicrmw.phi
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-NEXT:  .LBB23_3: ; %atomicrmw.global
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_max_f64 v[0:1], v[2:3]
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    ; implicit-def: $vgpr0_vgpr1
+; GFX942-NEXT:    ; implicit-def: $vgpr2_vgpr3
+; GFX942-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
+; GFX942-NEXT:    s_cbranch_execz .LBB23_2
+; GFX942-NEXT:  .LBB23_4: ; %atomicrmw.private
+; GFX942-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[0:1]
+; GFX942-NEXT:    v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v4, -1, v0, vcc
+; GFX942-NEXT:    scratch_load_dwordx2 v[0:1], v4, off
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX942-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX942-NEXT:    scratch_store_dwordx2 v4, v[0:1], off sc0 sc1
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fmax_noret_f64__offset12b_neg__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -5236,45 +5236,45 @@ define double @flat_agent_atomic_fmax_ret_f64__amdgpu_no_remote_memory(ptr %ptr,
 ; GFX12-NEXT:    v_dual_mov_b32 v0, v2 :: v_dual_mov_b32 v1, v3
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fmax_ret_f64__amdgpu_no_remote_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v5, v1
-; GFX940-NEXT:    s_mov_b64 s[0:1], src_private_base
-; GFX940-NEXT:    v_mov_b32_e32 v4, v0
-; GFX940-NEXT:    v_cmp_ne_u32_e32 vcc, s1, v5
-; GFX940-NEXT:    ; implicit-def: $vgpr0_vgpr1
-; GFX940-NEXT:    s_and_saveexec_b64 s[0:1], vcc
-; GFX940-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB24_3
-; GFX940-NEXT:  ; %bb.1: ; %Flow
-; GFX940-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB24_4
-; GFX940-NEXT:  .LBB24_2: ; %atomicrmw.phi
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
-; GFX940-NEXT:  .LBB24_3: ; %atomicrmw.global
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_max_f64 v[0:1], v[4:5], v[2:3] sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    ; implicit-def: $vgpr4_vgpr5
-; GFX940-NEXT:    ; implicit-def: $vgpr2_vgpr3
-; GFX940-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
-; GFX940-NEXT:    s_cbranch_execz .LBB24_2
-; GFX940-NEXT:  .LBB24_4: ; %atomicrmw.private
-; GFX940-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[4:5]
-; GFX940-NEXT:    v_max_f64 v[2:3], v[2:3], v[2:3]
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cndmask_b32_e32 v6, -1, v4, vcc
-; GFX940-NEXT:    scratch_load_dwordx2 v[0:1], v6, off
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_max_f64 v[4:5], v[0:1], v[0:1]
-; GFX940-NEXT:    v_max_f64 v[2:3], v[4:5], v[2:3]
-; GFX940-NEXT:    scratch_store_dwordx2 v6, v[2:3], off sc0 sc1
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fmax_ret_f64__amdgpu_no_remote_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v5, v1
+; GFX942-NEXT:    s_mov_b64 s[0:1], src_private_base
+; GFX942-NEXT:    v_mov_b32_e32 v4, v0
+; GFX942-NEXT:    v_cmp_ne_u32_e32 vcc, s1, v5
+; GFX942-NEXT:    ; implicit-def: $vgpr0_vgpr1
+; GFX942-NEXT:    s_and_saveexec_b64 s[0:1], vcc
+; GFX942-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB24_3
+; GFX942-NEXT:  ; %bb.1: ; %Flow
+; GFX942-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB24_4
+; GFX942-NEXT:  .LBB24_2: ; %atomicrmw.phi
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-NEXT:  .LBB24_3: ; %atomicrmw.global
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_max_f64 v[0:1], v[4:5], v[2:3] sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    ; implicit-def: $vgpr4_vgpr5
+; GFX942-NEXT:    ; implicit-def: $vgpr2_vgpr3
+; GFX942-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
+; GFX942-NEXT:    s_cbranch_execz .LBB24_2
+; GFX942-NEXT:  .LBB24_4: ; %atomicrmw.private
+; GFX942-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[4:5]
+; GFX942-NEXT:    v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v6, -1, v4, vcc
+; GFX942-NEXT:    scratch_load_dwordx2 v[0:1], v6, off
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_max_f64 v[4:5], v[0:1], v[0:1]
+; GFX942-NEXT:    v_max_f64 v[2:3], v[4:5], v[2:3]
+; GFX942-NEXT:    scratch_store_dwordx2 v6, v[2:3], off sc0 sc1
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fmax_ret_f64__amdgpu_no_remote_memory:
 ; GFX11:       ; %bb.0:
@@ -5659,45 +5659,45 @@ define double @flat_agent_atomic_fmax_ret_f64__amdgpu_no_fine_grained_memory__am
 ; GFX12-NEXT:    v_dual_mov_b32 v0, v2 :: v_dual_mov_b32 v1, v3
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fmax_ret_f64__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v5, v1
-; GFX940-NEXT:    s_mov_b64 s[0:1], src_private_base
-; GFX940-NEXT:    v_mov_b32_e32 v4, v0
-; GFX940-NEXT:    v_cmp_ne_u32_e32 vcc, s1, v5
-; GFX940-NEXT:    ; implicit-def: $vgpr0_vgpr1
-; GFX940-NEXT:    s_and_saveexec_b64 s[0:1], vcc
-; GFX940-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB25_3
-; GFX940-NEXT:  ; %bb.1: ; %Flow
-; GFX940-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB25_4
-; GFX940-NEXT:  .LBB25_2: ; %atomicrmw.phi
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
-; GFX940-NEXT:  .LBB25_3: ; %atomicrmw.global
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_max_f64 v[0:1], v[4:5], v[2:3] sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    ; implicit-def: $vgpr4_vgpr5
-; GFX940-NEXT:    ; implicit-def: $vgpr2_vgpr3
-; GFX940-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
-; GFX940-NEXT:    s_cbranch_execz .LBB25_2
-; GFX940-NEXT:  .LBB25_4: ; %atomicrmw.private
-; GFX940-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[4:5]
-; GFX940-NEXT:    v_max_f64 v[2:3], v[2:3], v[2:3]
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cndmask_b32_e32 v6, -1, v4, vcc
-; GFX940-NEXT:    scratch_load_dwordx2 v[0:1], v6, off
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_max_f64 v[4:5], v[0:1], v[0:1]
-; GFX940-NEXT:    v_max_f64 v[2:3], v[4:5], v[2:3]
-; GFX940-NEXT:    scratch_store_dwordx2 v6, v[2:3], off sc0 sc1
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fmax_ret_f64__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v5, v1
+; GFX942-NEXT:    s_mov_b64 s[0:1], src_private_base
+; GFX942-NEXT:    v_mov_b32_e32 v4, v0
+; GFX942-NEXT:    v_cmp_ne_u32_e32 vcc, s1, v5
+; GFX942-NEXT:    ; implicit-def: $vgpr0_vgpr1
+; GFX942-NEXT:    s_and_saveexec_b64 s[0:1], vcc
+; GFX942-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB25_3
+; GFX942-NEXT:  ; %bb.1: ; %Flow
+; GFX942-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB25_4
+; GFX942-NEXT:  .LBB25_2: ; %atomicrmw.phi
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-NEXT:  .LBB25_3: ; %atomicrmw.global
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_max_f64 v[0:1], v[4:5], v[2:3] sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    ; implicit-def: $vgpr4_vgpr5
+; GFX942-NEXT:    ; implicit-def: $vgpr2_vgpr3
+; GFX942-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
+; GFX942-NEXT:    s_cbranch_execz .LBB25_2
+; GFX942-NEXT:  .LBB25_4: ; %atomicrmw.private
+; GFX942-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[4:5]
+; GFX942-NEXT:    v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v6, -1, v4, vcc
+; GFX942-NEXT:    scratch_load_dwordx2 v[0:1], v6, off
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_max_f64 v[4:5], v[0:1], v[0:1]
+; GFX942-NEXT:    v_max_f64 v[2:3], v[4:5], v[2:3]
+; GFX942-NEXT:    scratch_store_dwordx2 v6, v[2:3], off sc0 sc1
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fmax_ret_f64__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory:
 ; GFX11:       ; %bb.0:
@@ -6039,40 +6039,40 @@ define half @flat_agent_atomic_fmax_ret_f16__amdgpu_no_fine_grained_memory(ptr %
 ; GFX12-NEXT:    v_lshrrev_b32_e32 v0, v3, v5
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fmax_ret_f16__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v3, v0
-; GFX940-NEXT:    v_and_b32_e32 v0, -4, v3
-; GFX940-NEXT:    flat_load_dword v5, v[0:1]
-; GFX940-NEXT:    v_and_b32_e32 v3, 3, v3
-; GFX940-NEXT:    v_lshlrev_b32_e32 v3, 3, v3
-; GFX940-NEXT:    s_mov_b32 s0, 0xffff
-; GFX940-NEXT:    v_lshlrev_b32_e64 v4, v3, s0
-; GFX940-NEXT:    v_not_b32_e32 v4, v4
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    v_max_f16_e32 v2, v2, v2
-; GFX940-NEXT:  .LBB26_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v7, v5
-; GFX940-NEXT:    v_lshrrev_b32_e32 v5, v3, v7
-; GFX940-NEXT:    v_max_f16_e32 v5, v5, v5
-; GFX940-NEXT:    v_max_f16_e32 v5, v5, v2
-; GFX940-NEXT:    v_lshlrev_b32_e32 v5, v3, v5
-; GFX940-NEXT:    v_and_or_b32 v6, v7, v4, v5
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v5, v[0:1], v[6:7] sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v5, v7
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB26_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    v_lshrrev_b32_e32 v0, v3, v5
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fmax_ret_f16__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v3, v0
+; GFX942-NEXT:    v_and_b32_e32 v0, -4, v3
+; GFX942-NEXT:    flat_load_dword v5, v[0:1]
+; GFX942-NEXT:    v_and_b32_e32 v3, 3, v3
+; GFX942-NEXT:    v_lshlrev_b32_e32 v3, 3, v3
+; GFX942-NEXT:    s_mov_b32 s0, 0xffff
+; GFX942-NEXT:    v_lshlrev_b32_e64 v4, v3, s0
+; GFX942-NEXT:    v_not_b32_e32 v4, v4
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    v_max_f16_e32 v2, v2, v2
+; GFX942-NEXT:  .LBB26_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v7, v5
+; GFX942-NEXT:    v_lshrrev_b32_e32 v5, v3, v7
+; GFX942-NEXT:    v_max_f16_e32 v5, v5, v5
+; GFX942-NEXT:    v_max_f16_e32 v5, v5, v2
+; GFX942-NEXT:    v_lshlrev_b32_e32 v5, v3, v5
+; GFX942-NEXT:    v_and_or_b32 v6, v7, v4, v5
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v5, v[0:1], v[6:7] sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v5, v7
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB26_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    v_lshrrev_b32_e32 v0, v3, v5
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fmax_ret_f16__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -6343,42 +6343,42 @@ define half @flat_agent_atomic_fmax_ret_f16__offset12b_pos__amdgpu_no_fine_grain
 ; GFX12-NEXT:    v_lshrrev_b32_e32 v0, v3, v5
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fmax_ret_f16__offset12b_pos__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0x7fe
-; GFX940-NEXT:    v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1]
-; GFX940-NEXT:    v_and_b32_e32 v0, -4, v4
-; GFX940-NEXT:    v_mov_b32_e32 v1, v5
-; GFX940-NEXT:    flat_load_dword v5, v[0:1]
-; GFX940-NEXT:    v_and_b32_e32 v3, 3, v4
-; GFX940-NEXT:    v_lshlrev_b32_e32 v3, 3, v3
-; GFX940-NEXT:    s_mov_b32 s0, 0xffff
-; GFX940-NEXT:    v_lshlrev_b32_e64 v4, v3, s0
-; GFX940-NEXT:    v_not_b32_e32 v4, v4
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    v_max_f16_e32 v2, v2, v2
-; GFX940-NEXT:  .LBB27_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v7, v5
-; GFX940-NEXT:    v_lshrrev_b32_e32 v5, v3, v7
-; GFX940-NEXT:    v_max_f16_e32 v5, v5, v5
-; GFX940-NEXT:    v_max_f16_e32 v5, v5, v2
-; GFX940-NEXT:    v_lshlrev_b32_e32 v5, v3, v5
-; GFX940-NEXT:    v_and_or_b32 v6, v7, v4, v5
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v5, v[0:1], v[6:7] sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v5, v7
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB27_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    v_lshrrev_b32_e32 v0, v3, v5
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fmax_ret_f16__offset12b_pos__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0x7fe
+; GFX942-NEXT:    v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1]
+; GFX942-NEXT:    v_and_b32_e32 v0, -4, v4
+; GFX942-NEXT:    v_mov_b32_e32 v1, v5
+; GFX942-NEXT:    flat_load_dword v5, v[0:1]
+; GFX942-NEXT:    v_and_b32_e32 v3, 3, v4
+; GFX942-NEXT:    v_lshlrev_b32_e32 v3, 3, v3
+; GFX942-NEXT:    s_mov_b32 s0, 0xffff
+; GFX942-NEXT:    v_lshlrev_b32_e64 v4, v3, s0
+; GFX942-NEXT:    v_not_b32_e32 v4, v4
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    v_max_f16_e32 v2, v2, v2
+; GFX942-NEXT:  .LBB27_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v7, v5
+; GFX942-NEXT:    v_lshrrev_b32_e32 v5, v3, v7
+; GFX942-NEXT:    v_max_f16_e32 v5, v5, v5
+; GFX942-NEXT:    v_max_f16_e32 v5, v5, v2
+; GFX942-NEXT:    v_lshlrev_b32_e32 v5, v3, v5
+; GFX942-NEXT:    v_and_or_b32 v6, v7, v4, v5
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v5, v[0:1], v[6:7] sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v5, v7
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB27_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    v_lshrrev_b32_e32 v0, v3, v5
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fmax_ret_f16__offset12b_pos__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -6656,43 +6656,43 @@ define half @flat_agent_atomic_fmax_ret_f16__offset12b_neg__amdgpu_no_fine_grain
 ; GFX12-NEXT:    v_lshrrev_b32_e32 v0, v3, v5
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fmax_ret_f16__offset12b_neg__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    s_movk_i32 s0, 0xf800
-; GFX940-NEXT:    s_mov_b32 s1, -1
-; GFX940-NEXT:    v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1]
-; GFX940-NEXT:    v_and_b32_e32 v0, -4, v4
-; GFX940-NEXT:    v_mov_b32_e32 v1, v5
-; GFX940-NEXT:    flat_load_dword v5, v[0:1]
-; GFX940-NEXT:    v_and_b32_e32 v3, 3, v4
-; GFX940-NEXT:    v_lshlrev_b32_e32 v3, 3, v3
-; GFX940-NEXT:    s_mov_b32 s0, 0xffff
-; GFX940-NEXT:    v_lshlrev_b32_e64 v4, v3, s0
-; GFX940-NEXT:    v_not_b32_e32 v4, v4
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    v_max_f16_e32 v2, v2, v2
-; GFX940-NEXT:  .LBB28_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v7, v5
-; GFX940-NEXT:    v_lshrrev_b32_e32 v5, v3, v7
-; GFX940-NEXT:    v_max_f16_e32 v5, v5, v5
-; GFX940-NEXT:    v_max_f16_e32 v5, v5, v2
-; GFX940-NEXT:    v_lshlrev_b32_e32 v5, v3, v5
-; GFX940-NEXT:    v_and_or_b32 v6, v7, v4, v5
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v5, v[0:1], v[6:7] sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v5, v7
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB28_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    v_lshrrev_b32_e32 v0, v3, v5
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fmax_ret_f16__offset12b_neg__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    s_movk_i32 s0, 0xf800
+; GFX942-NEXT:    s_mov_b32 s1, -1
+; GFX942-NEXT:    v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1]
+; GFX942-NEXT:    v_and_b32_e32 v0, -4, v4
+; GFX942-NEXT:    v_mov_b32_e32 v1, v5
+; GFX942-NEXT:    flat_load_dword v5, v[0:1]
+; GFX942-NEXT:    v_and_b32_e32 v3, 3, v4
+; GFX942-NEXT:    v_lshlrev_b32_e32 v3, 3, v3
+; GFX942-NEXT:    s_mov_b32 s0, 0xffff
+; GFX942-NEXT:    v_lshlrev_b32_e64 v4, v3, s0
+; GFX942-NEXT:    v_not_b32_e32 v4, v4
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    v_max_f16_e32 v2, v2, v2
+; GFX942-NEXT:  .LBB28_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v7, v5
+; GFX942-NEXT:    v_lshrrev_b32_e32 v5, v3, v7
+; GFX942-NEXT:    v_max_f16_e32 v5, v5, v5
+; GFX942-NEXT:    v_max_f16_e32 v5, v5, v2
+; GFX942-NEXT:    v_lshlrev_b32_e32 v5, v3, v5
+; GFX942-NEXT:    v_and_or_b32 v6, v7, v4, v5
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v5, v[0:1], v[6:7] sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v5, v7
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB28_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    v_lshrrev_b32_e32 v0, v3, v5
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fmax_ret_f16__offset12b_neg__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -6967,39 +6967,39 @@ define void @flat_agent_atomic_fmax_noret_f16__amdgpu_no_fine_grained_memory(ptr
 ; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s0
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fmax_noret_f16__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v3, v0
-; GFX940-NEXT:    v_and_b32_e32 v0, -4, v3
-; GFX940-NEXT:    flat_load_dword v5, v[0:1]
-; GFX940-NEXT:    v_and_b32_e32 v3, 3, v3
-; GFX940-NEXT:    v_lshlrev_b32_e32 v3, 3, v3
-; GFX940-NEXT:    s_mov_b32 s0, 0xffff
-; GFX940-NEXT:    v_lshlrev_b32_e64 v4, v3, s0
-; GFX940-NEXT:    v_not_b32_e32 v6, v4
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    v_max_f16_e32 v2, v2, v2
-; GFX940-NEXT:  .LBB29_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_lshrrev_b32_e32 v4, v3, v5
-; GFX940-NEXT:    v_max_f16_e32 v4, v4, v4
-; GFX940-NEXT:    v_max_f16_e32 v4, v4, v2
-; GFX940-NEXT:    v_lshlrev_b32_e32 v4, v3, v4
-; GFX940-NEXT:    v_and_or_b32 v4, v5, v6, v4
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v4, v[0:1], v[4:5] sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v4, v5
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v5, v4
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB29_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fmax_noret_f16__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v3, v0
+; GFX942-NEXT:    v_and_b32_e32 v0, -4, v3
+; GFX942-NEXT:    flat_load_dword v5, v[0:1]
+; GFX942-NEXT:    v_and_b32_e32 v3, 3, v3
+; GFX942-NEXT:    v_lshlrev_b32_e32 v3, 3, v3
+; GFX942-NEXT:    s_mov_b32 s0, 0xffff
+; GFX942-NEXT:    v_lshlrev_b32_e64 v4, v3, s0
+; GFX942-NEXT:    v_not_b32_e32 v6, v4
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    v_max_f16_e32 v2, v2, v2
+; GFX942-NEXT:  .LBB29_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_lshrrev_b32_e32 v4, v3, v5
+; GFX942-NEXT:    v_max_f16_e32 v4, v4, v4
+; GFX942-NEXT:    v_max_f16_e32 v4, v4, v2
+; GFX942-NEXT:    v_lshlrev_b32_e32 v4, v3, v4
+; GFX942-NEXT:    v_and_or_b32 v4, v5, v6, v4
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v4, v[0:1], v[4:5] sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v4, v5
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v5, v4
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB29_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fmax_noret_f16__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -7262,41 +7262,41 @@ define void @flat_agent_atomic_fmax_noret_f16__offset12b_pos__amdgpu_no_fine_gra
 ; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s0
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fmax_noret_f16__offset12b_pos__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0x7fe
-; GFX940-NEXT:    v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1]
-; GFX940-NEXT:    v_and_b32_e32 v0, -4, v4
-; GFX940-NEXT:    v_mov_b32_e32 v1, v5
-; GFX940-NEXT:    flat_load_dword v3, v[0:1]
-; GFX940-NEXT:    v_and_b32_e32 v4, 3, v4
-; GFX940-NEXT:    v_lshlrev_b32_e32 v4, 3, v4
-; GFX940-NEXT:    s_mov_b32 s0, 0xffff
-; GFX940-NEXT:    v_lshlrev_b32_e64 v5, v4, s0
-; GFX940-NEXT:    v_not_b32_e32 v5, v5
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    v_max_f16_e32 v6, v2, v2
-; GFX940-NEXT:  .LBB30_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_lshrrev_b32_e32 v2, v4, v3
-; GFX940-NEXT:    v_max_f16_e32 v2, v2, v2
-; GFX940-NEXT:    v_max_f16_e32 v2, v2, v6
-; GFX940-NEXT:    v_lshlrev_b32_e32 v2, v4, v2
-; GFX940-NEXT:    v_and_or_b32 v2, v3, v5, v2
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v3, v2
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB30_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fmax_noret_f16__offset12b_pos__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0x7fe
+; GFX942-NEXT:    v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1]
+; GFX942-NEXT:    v_and_b32_e32 v0, -4, v4
+; GFX942-NEXT:    v_mov_b32_e32 v1, v5
+; GFX942-NEXT:    flat_load_dword v3, v[0:1]
+; GFX942-NEXT:    v_and_b32_e32 v4, 3, v4
+; GFX942-NEXT:    v_lshlrev_b32_e32 v4, 3, v4
+; GFX942-NEXT:    s_mov_b32 s0, 0xffff
+; GFX942-NEXT:    v_lshlrev_b32_e64 v5, v4, s0
+; GFX942-NEXT:    v_not_b32_e32 v5, v5
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    v_max_f16_e32 v6, v2, v2
+; GFX942-NEXT:  .LBB30_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_lshrrev_b32_e32 v2, v4, v3
+; GFX942-NEXT:    v_max_f16_e32 v2, v2, v2
+; GFX942-NEXT:    v_max_f16_e32 v2, v2, v6
+; GFX942-NEXT:    v_lshlrev_b32_e32 v2, v4, v2
+; GFX942-NEXT:    v_and_or_b32 v2, v3, v5, v2
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v3, v2
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB30_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fmax_noret_f16__offset12b_pos__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -7566,42 +7566,42 @@ define void @flat_agent_atomic_fmax_noret_f16__offset12b_neg__amdgpu_no_fine_gra
 ; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s0
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fmax_noret_f16__offset12b_neg__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    s_movk_i32 s0, 0xf800
-; GFX940-NEXT:    s_mov_b32 s1, -1
-; GFX940-NEXT:    v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1]
-; GFX940-NEXT:    v_and_b32_e32 v0, -4, v4
-; GFX940-NEXT:    v_mov_b32_e32 v1, v5
-; GFX940-NEXT:    flat_load_dword v3, v[0:1]
-; GFX940-NEXT:    v_and_b32_e32 v4, 3, v4
-; GFX940-NEXT:    v_lshlrev_b32_e32 v4, 3, v4
-; GFX940-NEXT:    s_mov_b32 s0, 0xffff
-; GFX940-NEXT:    v_lshlrev_b32_e64 v5, v4, s0
-; GFX940-NEXT:    v_not_b32_e32 v5, v5
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    v_max_f16_e32 v6, v2, v2
-; GFX940-NEXT:  .LBB31_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_lshrrev_b32_e32 v2, v4, v3
-; GFX940-NEXT:    v_max_f16_e32 v2, v2, v2
-; GFX940-NEXT:    v_max_f16_e32 v2, v2, v6
-; GFX940-NEXT:    v_lshlrev_b32_e32 v2, v4, v2
-; GFX940-NEXT:    v_and_or_b32 v2, v3, v5, v2
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v3, v2
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB31_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fmax_noret_f16__offset12b_neg__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    s_movk_i32 s0, 0xf800
+; GFX942-NEXT:    s_mov_b32 s1, -1
+; GFX942-NEXT:    v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1]
+; GFX942-NEXT:    v_and_b32_e32 v0, -4, v4
+; GFX942-NEXT:    v_mov_b32_e32 v1, v5
+; GFX942-NEXT:    flat_load_dword v3, v[0:1]
+; GFX942-NEXT:    v_and_b32_e32 v4, 3, v4
+; GFX942-NEXT:    v_lshlrev_b32_e32 v4, 3, v4
+; GFX942-NEXT:    s_mov_b32 s0, 0xffff
+; GFX942-NEXT:    v_lshlrev_b32_e64 v5, v4, s0
+; GFX942-NEXT:    v_not_b32_e32 v5, v5
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    v_max_f16_e32 v6, v2, v2
+; GFX942-NEXT:  .LBB31_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_lshrrev_b32_e32 v2, v4, v3
+; GFX942-NEXT:    v_max_f16_e32 v2, v2, v2
+; GFX942-NEXT:    v_max_f16_e32 v2, v2, v6
+; GFX942-NEXT:    v_lshlrev_b32_e32 v2, v4, v2
+; GFX942-NEXT:    v_and_or_b32 v2, v3, v5, v2
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v3, v2
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB31_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fmax_noret_f16__offset12b_neg__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -7859,32 +7859,32 @@ define half @flat_agent_atomic_fmax_ret_f16__offset12b_pos__align4__amdgpu_no_fi
 ; GFX12-NEXT:    v_mov_b32_e32 v0, v3
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fmax_ret_f16__offset12b_pos__align4__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    flat_load_dword v3, v[0:1] offset:2046
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    v_max_f16_e32 v2, v2, v2
-; GFX940-NEXT:    s_mov_b32 s2, 0xffff0000
-; GFX940-NEXT:  .LBB32_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v5, v3
-; GFX940-NEXT:    v_max_f16_e32 v3, v5, v5
-; GFX940-NEXT:    v_max_f16_e32 v3, v3, v2
-; GFX940-NEXT:    v_and_or_b32 v4, v5, s2, v3
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2046 sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB32_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v0, v3
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fmax_ret_f16__offset12b_pos__align4__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    flat_load_dword v3, v[0:1] offset:2046
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    v_max_f16_e32 v2, v2, v2
+; GFX942-NEXT:    s_mov_b32 s2, 0xffff0000
+; GFX942-NEXT:  .LBB32_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v5, v3
+; GFX942-NEXT:    v_max_f16_e32 v3, v5, v5
+; GFX942-NEXT:    v_max_f16_e32 v3, v3, v2
+; GFX942-NEXT:    v_and_or_b32 v4, v5, s2, v3
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2046 sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB32_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v0, v3
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fmax_ret_f16__offset12b_pos__align4__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -8094,31 +8094,31 @@ define void @flat_agent_atomic_fmax_noret_f16__offset12b__align4_pos__amdgpu_no_
 ; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s0
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fmax_noret_f16__offset12b__align4_pos__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    flat_load_dword v3, v[0:1] offset:2046
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    v_max_f16_e32 v4, v2, v2
-; GFX940-NEXT:    s_mov_b32 s2, 0xffff0000
-; GFX940-NEXT:  .LBB33_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_max_f16_e32 v2, v3, v3
-; GFX940-NEXT:    v_max_f16_e32 v2, v2, v4
-; GFX940-NEXT:    v_and_or_b32 v2, v3, s2, v2
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2046 sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v3, v2
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB33_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fmax_noret_f16__offset12b__align4_pos__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    flat_load_dword v3, v[0:1] offset:2046
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    v_max_f16_e32 v4, v2, v2
+; GFX942-NEXT:    s_mov_b32 s2, 0xffff0000
+; GFX942-NEXT:  .LBB33_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_max_f16_e32 v2, v3, v3
+; GFX942-NEXT:    v_max_f16_e32 v2, v2, v4
+; GFX942-NEXT:    v_and_or_b32 v2, v3, s2, v2
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2046 sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v3, v2
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB33_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fmax_noret_f16__offset12b__align4_pos__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -8339,42 +8339,42 @@ define half @flat_system_atomic_fmax_ret_f16__offset12b_pos__amdgpu_no_fine_grai
 ; GFX12-NEXT:    v_lshrrev_b32_e32 v0, v3, v5
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_system_atomic_fmax_ret_f16__offset12b_pos__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0x7fe
-; GFX940-NEXT:    v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1]
-; GFX940-NEXT:    v_and_b32_e32 v0, -4, v4
-; GFX940-NEXT:    v_mov_b32_e32 v1, v5
-; GFX940-NEXT:    flat_load_dword v5, v[0:1]
-; GFX940-NEXT:    v_and_b32_e32 v3, 3, v4
-; GFX940-NEXT:    v_lshlrev_b32_e32 v3, 3, v3
-; GFX940-NEXT:    s_mov_b32 s0, 0xffff
-; GFX940-NEXT:    v_lshlrev_b32_e64 v4, v3, s0
-; GFX940-NEXT:    v_not_b32_e32 v4, v4
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    v_max_f16_e32 v2, v2, v2
-; GFX940-NEXT:  .LBB34_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v7, v5
-; GFX940-NEXT:    v_lshrrev_b32_e32 v5, v3, v7
-; GFX940-NEXT:    v_max_f16_e32 v5, v5, v5
-; GFX940-NEXT:    v_max_f16_e32 v5, v5, v2
-; GFX940-NEXT:    v_lshlrev_b32_e32 v5, v3, v5
-; GFX940-NEXT:    v_and_or_b32 v6, v7, v4, v5
-; GFX940-NEXT:    buffer_wbl2 sc0 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v5, v[0:1], v[6:7] sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc0 sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v5, v7
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB34_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    v_lshrrev_b32_e32 v0, v3, v5
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_system_atomic_fmax_ret_f16__offset12b_pos__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0x7fe
+; GFX942-NEXT:    v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1]
+; GFX942-NEXT:    v_and_b32_e32 v0, -4, v4
+; GFX942-NEXT:    v_mov_b32_e32 v1, v5
+; GFX942-NEXT:    flat_load_dword v5, v[0:1]
+; GFX942-NEXT:    v_and_b32_e32 v3, 3, v4
+; GFX942-NEXT:    v_lshlrev_b32_e32 v3, 3, v3
+; GFX942-NEXT:    s_mov_b32 s0, 0xffff
+; GFX942-NEXT:    v_lshlrev_b32_e64 v4, v3, s0
+; GFX942-NEXT:    v_not_b32_e32 v4, v4
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    v_max_f16_e32 v2, v2, v2
+; GFX942-NEXT:  .LBB34_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v7, v5
+; GFX942-NEXT:    v_lshrrev_b32_e32 v5, v3, v7
+; GFX942-NEXT:    v_max_f16_e32 v5, v5, v5
+; GFX942-NEXT:    v_max_f16_e32 v5, v5, v2
+; GFX942-NEXT:    v_lshlrev_b32_e32 v5, v3, v5
+; GFX942-NEXT:    v_and_or_b32 v6, v7, v4, v5
+; GFX942-NEXT:    buffer_wbl2 sc0 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v5, v[0:1], v[6:7] sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc0 sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v5, v7
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB34_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    v_lshrrev_b32_e32 v0, v3, v5
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_system_atomic_fmax_ret_f16__offset12b_pos__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -8654,41 +8654,41 @@ define void @flat_system_atomic_fmax_noret_f16__offset12b_pos__amdgpu_no_fine_gr
 ; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s0
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_system_atomic_fmax_noret_f16__offset12b_pos__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0x7fe
-; GFX940-NEXT:    v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1]
-; GFX940-NEXT:    v_and_b32_e32 v0, -4, v4
-; GFX940-NEXT:    v_mov_b32_e32 v1, v5
-; GFX940-NEXT:    flat_load_dword v3, v[0:1]
-; GFX940-NEXT:    v_and_b32_e32 v4, 3, v4
-; GFX940-NEXT:    v_lshlrev_b32_e32 v4, 3, v4
-; GFX940-NEXT:    s_mov_b32 s0, 0xffff
-; GFX940-NEXT:    v_lshlrev_b32_e64 v5, v4, s0
-; GFX940-NEXT:    v_not_b32_e32 v5, v5
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    v_max_f16_e32 v6, v2, v2
-; GFX940-NEXT:  .LBB35_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_lshrrev_b32_e32 v2, v4, v3
-; GFX940-NEXT:    v_max_f16_e32 v2, v2, v2
-; GFX940-NEXT:    v_max_f16_e32 v2, v2, v6
-; GFX940-NEXT:    v_lshlrev_b32_e32 v2, v4, v2
-; GFX940-NEXT:    v_and_or_b32 v2, v3, v5, v2
-; GFX940-NEXT:    buffer_wbl2 sc0 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc0 sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v3, v2
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB35_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_system_atomic_fmax_noret_f16__offset12b_pos__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0x7fe
+; GFX942-NEXT:    v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1]
+; GFX942-NEXT:    v_and_b32_e32 v0, -4, v4
+; GFX942-NEXT:    v_mov_b32_e32 v1, v5
+; GFX942-NEXT:    flat_load_dword v3, v[0:1]
+; GFX942-NEXT:    v_and_b32_e32 v4, 3, v4
+; GFX942-NEXT:    v_lshlrev_b32_e32 v4, 3, v4
+; GFX942-NEXT:    s_mov_b32 s0, 0xffff
+; GFX942-NEXT:    v_lshlrev_b32_e64 v5, v4, s0
+; GFX942-NEXT:    v_not_b32_e32 v5, v5
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    v_max_f16_e32 v6, v2, v2
+; GFX942-NEXT:  .LBB35_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_lshrrev_b32_e32 v2, v4, v3
+; GFX942-NEXT:    v_max_f16_e32 v2, v2, v2
+; GFX942-NEXT:    v_max_f16_e32 v2, v2, v6
+; GFX942-NEXT:    v_lshlrev_b32_e32 v2, v4, v2
+; GFX942-NEXT:    v_and_or_b32 v2, v3, v5, v2
+; GFX942-NEXT:    buffer_wbl2 sc0 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc0 sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v3, v2
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB35_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_system_atomic_fmax_noret_f16__offset12b_pos__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -8970,47 +8970,47 @@ define bfloat @flat_agent_atomic_fmax_ret_bf16__amdgpu_no_fine_grained_memory(pt
 ; GFX12-NEXT:    v_lshrrev_b32_e32 v0, v3, v5
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fmax_ret_bf16__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v3, v0
-; GFX940-NEXT:    v_and_b32_e32 v0, -4, v3
-; GFX940-NEXT:    flat_load_dword v5, v[0:1]
-; GFX940-NEXT:    v_and_b32_e32 v3, 3, v3
-; GFX940-NEXT:    v_lshlrev_b32_e32 v3, 3, v3
-; GFX940-NEXT:    s_mov_b32 s0, 0xffff
-; GFX940-NEXT:    v_lshlrev_b32_e64 v4, v3, s0
-; GFX940-NEXT:    v_not_b32_e32 v4, v4
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
-; GFX940-NEXT:    s_movk_i32 s2, 0x7fff
-; GFX940-NEXT:  .LBB36_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v7, v5
-; GFX940-NEXT:    v_lshrrev_b32_sdwa v5, v3, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_max_f32_e32 v5, v5, v2
-; GFX940-NEXT:    v_bfe_u32 v6, v5, 16, 1
-; GFX940-NEXT:    v_or_b32_e32 v8, 0x400000, v5
-; GFX940-NEXT:    v_add3_u32 v6, v6, v5, s2
-; GFX940-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v5, v6, v8, vcc
-; GFX940-NEXT:    v_lshlrev_b32_sdwa v5, v3, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX940-NEXT:    v_and_or_b32 v6, v7, v4, v5
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v5, v[0:1], v[6:7] sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v5, v7
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB36_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    v_lshrrev_b32_e32 v0, v3, v5
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fmax_ret_bf16__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v3, v0
+; GFX942-NEXT:    v_and_b32_e32 v0, -4, v3
+; GFX942-NEXT:    flat_load_dword v5, v[0:1]
+; GFX942-NEXT:    v_and_b32_e32 v3, 3, v3
+; GFX942-NEXT:    v_lshlrev_b32_e32 v3, 3, v3
+; GFX942-NEXT:    s_mov_b32 s0, 0xffff
+; GFX942-NEXT:    v_lshlrev_b32_e64 v4, v3, s0
+; GFX942-NEXT:    v_not_b32_e32 v4, v4
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
+; GFX942-NEXT:    s_movk_i32 s2, 0x7fff
+; GFX942-NEXT:  .LBB36_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v7, v5
+; GFX942-NEXT:    v_lshrrev_b32_sdwa v5, v3, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_max_f32_e32 v5, v5, v2
+; GFX942-NEXT:    v_bfe_u32 v6, v5, 16, 1
+; GFX942-NEXT:    v_or_b32_e32 v8, 0x400000, v5
+; GFX942-NEXT:    v_add3_u32 v6, v6, v5, s2
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v5, v6, v8, vcc
+; GFX942-NEXT:    v_lshlrev_b32_sdwa v5, v3, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX942-NEXT:    v_and_or_b32 v6, v7, v4, v5
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v5, v[0:1], v[6:7] sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v5, v7
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB36_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    v_lshrrev_b32_e32 v0, v3, v5
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fmax_ret_bf16__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -9316,49 +9316,49 @@ define bfloat @flat_agent_atomic_fmax_ret_bf16__offset12b_pos__amdgpu_no_fine_gr
 ; GFX12-NEXT:    v_lshrrev_b32_e32 v0, v3, v5
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fmax_ret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0x7fe
-; GFX940-NEXT:    v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1]
-; GFX940-NEXT:    v_and_b32_e32 v0, -4, v4
-; GFX940-NEXT:    v_mov_b32_e32 v1, v5
-; GFX940-NEXT:    flat_load_dword v5, v[0:1]
-; GFX940-NEXT:    v_and_b32_e32 v3, 3, v4
-; GFX940-NEXT:    v_lshlrev_b32_e32 v3, 3, v3
-; GFX940-NEXT:    s_mov_b32 s0, 0xffff
-; GFX940-NEXT:    v_lshlrev_b32_e64 v4, v3, s0
-; GFX940-NEXT:    v_not_b32_e32 v4, v4
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
-; GFX940-NEXT:    s_movk_i32 s2, 0x7fff
-; GFX940-NEXT:  .LBB37_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v7, v5
-; GFX940-NEXT:    v_lshrrev_b32_sdwa v5, v3, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_max_f32_e32 v5, v5, v2
-; GFX940-NEXT:    v_bfe_u32 v6, v5, 16, 1
-; GFX940-NEXT:    v_or_b32_e32 v8, 0x400000, v5
-; GFX940-NEXT:    v_add3_u32 v6, v6, v5, s2
-; GFX940-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v5, v6, v8, vcc
-; GFX940-NEXT:    v_lshlrev_b32_sdwa v5, v3, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX940-NEXT:    v_and_or_b32 v6, v7, v4, v5
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v5, v[0:1], v[6:7] sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v5, v7
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB37_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    v_lshrrev_b32_e32 v0, v3, v5
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fmax_ret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0x7fe
+; GFX942-NEXT:    v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1]
+; GFX942-NEXT:    v_and_b32_e32 v0, -4, v4
+; GFX942-NEXT:    v_mov_b32_e32 v1, v5
+; GFX942-NEXT:    flat_load_dword v5, v[0:1]
+; GFX942-NEXT:    v_and_b32_e32 v3, 3, v4
+; GFX942-NEXT:    v_lshlrev_b32_e32 v3, 3, v3
+; GFX942-NEXT:    s_mov_b32 s0, 0xffff
+; GFX942-NEXT:    v_lshlrev_b32_e64 v4, v3, s0
+; GFX942-NEXT:    v_not_b32_e32 v4, v4
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
+; GFX942-NEXT:    s_movk_i32 s2, 0x7fff
+; GFX942-NEXT:  .LBB37_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v7, v5
+; GFX942-NEXT:    v_lshrrev_b32_sdwa v5, v3, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_max_f32_e32 v5, v5, v2
+; GFX942-NEXT:    v_bfe_u32 v6, v5, 16, 1
+; GFX942-NEXT:    v_or_b32_e32 v8, 0x400000, v5
+; GFX942-NEXT:    v_add3_u32 v6, v6, v5, s2
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v5, v6, v8, vcc
+; GFX942-NEXT:    v_lshlrev_b32_sdwa v5, v3, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX942-NEXT:    v_and_or_b32 v6, v7, v4, v5
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v5, v[0:1], v[6:7] sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v5, v7
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB37_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    v_lshrrev_b32_e32 v0, v3, v5
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fmax_ret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -9672,50 +9672,50 @@ define bfloat @flat_agent_atomic_fmax_ret_bf16__offset12b_neg__amdgpu_no_fine_gr
 ; GFX12-NEXT:    v_lshrrev_b32_e32 v0, v3, v5
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fmax_ret_bf16__offset12b_neg__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    s_movk_i32 s0, 0xf800
-; GFX940-NEXT:    s_mov_b32 s1, -1
-; GFX940-NEXT:    v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1]
-; GFX940-NEXT:    v_and_b32_e32 v0, -4, v4
-; GFX940-NEXT:    v_mov_b32_e32 v1, v5
-; GFX940-NEXT:    flat_load_dword v5, v[0:1]
-; GFX940-NEXT:    v_and_b32_e32 v3, 3, v4
-; GFX940-NEXT:    v_lshlrev_b32_e32 v3, 3, v3
-; GFX940-NEXT:    s_mov_b32 s0, 0xffff
-; GFX940-NEXT:    v_lshlrev_b32_e64 v4, v3, s0
-; GFX940-NEXT:    v_not_b32_e32 v4, v4
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
-; GFX940-NEXT:    s_movk_i32 s2, 0x7fff
-; GFX940-NEXT:  .LBB38_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v7, v5
-; GFX940-NEXT:    v_lshrrev_b32_sdwa v5, v3, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_max_f32_e32 v5, v5, v2
-; GFX940-NEXT:    v_bfe_u32 v6, v5, 16, 1
-; GFX940-NEXT:    v_or_b32_e32 v8, 0x400000, v5
-; GFX940-NEXT:    v_add3_u32 v6, v6, v5, s2
-; GFX940-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v5, v6, v8, vcc
-; GFX940-NEXT:    v_lshlrev_b32_sdwa v5, v3, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX940-NEXT:    v_and_or_b32 v6, v7, v4, v5
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v5, v[0:1], v[6:7] sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v5, v7
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB38_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    v_lshrrev_b32_e32 v0, v3, v5
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fmax_ret_bf16__offset12b_neg__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    s_movk_i32 s0, 0xf800
+; GFX942-NEXT:    s_mov_b32 s1, -1
+; GFX942-NEXT:    v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1]
+; GFX942-NEXT:    v_and_b32_e32 v0, -4, v4
+; GFX942-NEXT:    v_mov_b32_e32 v1, v5
+; GFX942-NEXT:    flat_load_dword v5, v[0:1]
+; GFX942-NEXT:    v_and_b32_e32 v3, 3, v4
+; GFX942-NEXT:    v_lshlrev_b32_e32 v3, 3, v3
+; GFX942-NEXT:    s_mov_b32 s0, 0xffff
+; GFX942-NEXT:    v_lshlrev_b32_e64 v4, v3, s0
+; GFX942-NEXT:    v_not_b32_e32 v4, v4
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
+; GFX942-NEXT:    s_movk_i32 s2, 0x7fff
+; GFX942-NEXT:  .LBB38_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v7, v5
+; GFX942-NEXT:    v_lshrrev_b32_sdwa v5, v3, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_max_f32_e32 v5, v5, v2
+; GFX942-NEXT:    v_bfe_u32 v6, v5, 16, 1
+; GFX942-NEXT:    v_or_b32_e32 v8, 0x400000, v5
+; GFX942-NEXT:    v_add3_u32 v6, v6, v5, s2
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v5, v6, v8, vcc
+; GFX942-NEXT:    v_lshlrev_b32_sdwa v5, v3, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX942-NEXT:    v_and_or_b32 v6, v7, v4, v5
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v5, v[0:1], v[6:7] sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v5, v7
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB38_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    v_lshrrev_b32_e32 v0, v3, v5
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fmax_ret_bf16__offset12b_neg__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -10024,46 +10024,46 @@ define void @flat_agent_atomic_fmax_noret_bf16__amdgpu_no_fine_grained_memory(pt
 ; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s0
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fmax_noret_bf16__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v3, v0
-; GFX940-NEXT:    v_and_b32_e32 v0, -4, v3
-; GFX940-NEXT:    flat_load_dword v5, v[0:1]
-; GFX940-NEXT:    v_and_b32_e32 v3, 3, v3
-; GFX940-NEXT:    v_lshlrev_b32_e32 v3, 3, v3
-; GFX940-NEXT:    s_mov_b32 s0, 0xffff
-; GFX940-NEXT:    v_lshlrev_b32_e64 v4, v3, s0
-; GFX940-NEXT:    v_not_b32_e32 v6, v4
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
-; GFX940-NEXT:    s_movk_i32 s2, 0x7fff
-; GFX940-NEXT:  .LBB39_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_lshrrev_b32_sdwa v4, v3, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_max_f32_e32 v4, v4, v2
-; GFX940-NEXT:    v_bfe_u32 v7, v4, 16, 1
-; GFX940-NEXT:    v_or_b32_e32 v8, 0x400000, v4
-; GFX940-NEXT:    v_add3_u32 v7, v7, v4, s2
-; GFX940-NEXT:    v_cmp_u_f32_e32 vcc, v4, v4
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v4, v7, v8, vcc
-; GFX940-NEXT:    v_lshlrev_b32_sdwa v4, v3, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX940-NEXT:    v_and_or_b32 v4, v5, v6, v4
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v4, v[0:1], v[4:5] sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v4, v5
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v5, v4
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB39_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fmax_noret_bf16__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v3, v0
+; GFX942-NEXT:    v_and_b32_e32 v0, -4, v3
+; GFX942-NEXT:    flat_load_dword v5, v[0:1]
+; GFX942-NEXT:    v_and_b32_e32 v3, 3, v3
+; GFX942-NEXT:    v_lshlrev_b32_e32 v3, 3, v3
+; GFX942-NEXT:    s_mov_b32 s0, 0xffff
+; GFX942-NEXT:    v_lshlrev_b32_e64 v4, v3, s0
+; GFX942-NEXT:    v_not_b32_e32 v6, v4
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
+; GFX942-NEXT:    s_movk_i32 s2, 0x7fff
+; GFX942-NEXT:  .LBB39_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_lshrrev_b32_sdwa v4, v3, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_max_f32_e32 v4, v4, v2
+; GFX942-NEXT:    v_bfe_u32 v7, v4, 16, 1
+; GFX942-NEXT:    v_or_b32_e32 v8, 0x400000, v4
+; GFX942-NEXT:    v_add3_u32 v7, v7, v4, s2
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v4, v4
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v4, v7, v8, vcc
+; GFX942-NEXT:    v_lshlrev_b32_sdwa v4, v3, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX942-NEXT:    v_and_or_b32 v4, v5, v6, v4
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v4, v[0:1], v[4:5] sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v4, v5
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v5, v4
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB39_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fmax_noret_bf16__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -10359,48 +10359,48 @@ define void @flat_agent_atomic_fmax_noret_bf16__offset12b_pos__amdgpu_no_fine_gr
 ; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s0
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fmax_noret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0x7fe
-; GFX940-NEXT:    v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1]
-; GFX940-NEXT:    v_and_b32_e32 v0, -4, v4
-; GFX940-NEXT:    v_mov_b32_e32 v1, v5
-; GFX940-NEXT:    flat_load_dword v3, v[0:1]
-; GFX940-NEXT:    v_and_b32_e32 v4, 3, v4
-; GFX940-NEXT:    v_lshlrev_b32_e32 v4, 3, v4
-; GFX940-NEXT:    s_mov_b32 s0, 0xffff
-; GFX940-NEXT:    v_lshlrev_b32_e64 v5, v4, s0
-; GFX940-NEXT:    v_not_b32_e32 v5, v5
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    v_lshlrev_b32_e32 v6, 16, v2
-; GFX940-NEXT:    s_movk_i32 s2, 0x7fff
-; GFX940-NEXT:  .LBB40_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_lshrrev_b32_sdwa v2, v4, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_max_f32_e32 v2, v2, v6
-; GFX940-NEXT:    v_bfe_u32 v7, v2, 16, 1
-; GFX940-NEXT:    v_or_b32_e32 v8, 0x400000, v2
-; GFX940-NEXT:    v_add3_u32 v7, v7, v2, s2
-; GFX940-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v2, v7, v8, vcc
-; GFX940-NEXT:    v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX940-NEXT:    v_and_or_b32 v2, v3, v5, v2
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v3, v2
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB40_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fmax_noret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0x7fe
+; GFX942-NEXT:    v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1]
+; GFX942-NEXT:    v_and_b32_e32 v0, -4, v4
+; GFX942-NEXT:    v_mov_b32_e32 v1, v5
+; GFX942-NEXT:    flat_load_dword v3, v[0:1]
+; GFX942-NEXT:    v_and_b32_e32 v4, 3, v4
+; GFX942-NEXT:    v_lshlrev_b32_e32 v4, 3, v4
+; GFX942-NEXT:    s_mov_b32 s0, 0xffff
+; GFX942-NEXT:    v_lshlrev_b32_e64 v5, v4, s0
+; GFX942-NEXT:    v_not_b32_e32 v5, v5
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    v_lshlrev_b32_e32 v6, 16, v2
+; GFX942-NEXT:    s_movk_i32 s2, 0x7fff
+; GFX942-NEXT:  .LBB40_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_lshrrev_b32_sdwa v2, v4, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_max_f32_e32 v2, v2, v6
+; GFX942-NEXT:    v_bfe_u32 v7, v2, 16, 1
+; GFX942-NEXT:    v_or_b32_e32 v8, 0x400000, v2
+; GFX942-NEXT:    v_add3_u32 v7, v7, v2, s2
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v7, v8, vcc
+; GFX942-NEXT:    v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX942-NEXT:    v_and_or_b32 v2, v3, v5, v2
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v3, v2
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB40_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fmax_noret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -10704,49 +10704,49 @@ define void @flat_agent_atomic_fmax_noret_bf16__offset12b_neg__amdgpu_no_fine_gr
 ; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s0
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fmax_noret_bf16__offset12b_neg__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    s_movk_i32 s0, 0xf800
-; GFX940-NEXT:    s_mov_b32 s1, -1
-; GFX940-NEXT:    v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1]
-; GFX940-NEXT:    v_and_b32_e32 v0, -4, v4
-; GFX940-NEXT:    v_mov_b32_e32 v1, v5
-; GFX940-NEXT:    flat_load_dword v3, v[0:1]
-; GFX940-NEXT:    v_and_b32_e32 v4, 3, v4
-; GFX940-NEXT:    v_lshlrev_b32_e32 v4, 3, v4
-; GFX940-NEXT:    s_mov_b32 s0, 0xffff
-; GFX940-NEXT:    v_lshlrev_b32_e64 v5, v4, s0
-; GFX940-NEXT:    v_not_b32_e32 v5, v5
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    v_lshlrev_b32_e32 v6, 16, v2
-; GFX940-NEXT:    s_movk_i32 s2, 0x7fff
-; GFX940-NEXT:  .LBB41_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_lshrrev_b32_sdwa v2, v4, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_max_f32_e32 v2, v2, v6
-; GFX940-NEXT:    v_bfe_u32 v7, v2, 16, 1
-; GFX940-NEXT:    v_or_b32_e32 v8, 0x400000, v2
-; GFX940-NEXT:    v_add3_u32 v7, v7, v2, s2
-; GFX940-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v2, v7, v8, vcc
-; GFX940-NEXT:    v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX940-NEXT:    v_and_or_b32 v2, v3, v5, v2
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v3, v2
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB41_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fmax_noret_bf16__offset12b_neg__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    s_movk_i32 s0, 0xf800
+; GFX942-NEXT:    s_mov_b32 s1, -1
+; GFX942-NEXT:    v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1]
+; GFX942-NEXT:    v_and_b32_e32 v0, -4, v4
+; GFX942-NEXT:    v_mov_b32_e32 v1, v5
+; GFX942-NEXT:    flat_load_dword v3, v[0:1]
+; GFX942-NEXT:    v_and_b32_e32 v4, 3, v4
+; GFX942-NEXT:    v_lshlrev_b32_e32 v4, 3, v4
+; GFX942-NEXT:    s_mov_b32 s0, 0xffff
+; GFX942-NEXT:    v_lshlrev_b32_e64 v5, v4, s0
+; GFX942-NEXT:    v_not_b32_e32 v5, v5
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    v_lshlrev_b32_e32 v6, 16, v2
+; GFX942-NEXT:    s_movk_i32 s2, 0x7fff
+; GFX942-NEXT:  .LBB41_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_lshrrev_b32_sdwa v2, v4, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_max_f32_e32 v2, v2, v6
+; GFX942-NEXT:    v_bfe_u32 v7, v2, 16, 1
+; GFX942-NEXT:    v_or_b32_e32 v8, 0x400000, v2
+; GFX942-NEXT:    v_add3_u32 v7, v7, v2, s2
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v7, v8, vcc
+; GFX942-NEXT:    v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX942-NEXT:    v_and_or_b32 v2, v3, v5, v2
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v3, v2
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB41_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fmax_noret_bf16__offset12b_neg__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -11039,40 +11039,40 @@ define bfloat @flat_agent_atomic_fmax_ret_bf16__offset12b_pos__align4__amdgpu_no
 ; GFX12-NEXT:    v_mov_b32_e32 v0, v3
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fmax_ret_bf16__offset12b_pos__align4__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    flat_load_dword v3, v[0:1] offset:2046
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
-; GFX940-NEXT:    s_movk_i32 s2, 0x7fff
-; GFX940-NEXT:    s_mov_b32 s3, 0xffff0000
-; GFX940-NEXT:  .LBB42_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v5, v3
-; GFX940-NEXT:    v_lshlrev_b32_e32 v3, 16, v5
-; GFX940-NEXT:    v_max_f32_e32 v3, v3, v2
-; GFX940-NEXT:    v_bfe_u32 v4, v3, 16, 1
-; GFX940-NEXT:    v_or_b32_e32 v6, 0x400000, v3
-; GFX940-NEXT:    v_add3_u32 v4, v4, v3, s2
-; GFX940-NEXT:    v_cmp_u_f32_e32 vcc, v3, v3
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v3, v4, v6, vcc
-; GFX940-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
-; GFX940-NEXT:    v_and_or_b32 v4, v5, s3, v3
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2046 sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB42_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v0, v3
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fmax_ret_bf16__offset12b_pos__align4__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    flat_load_dword v3, v[0:1] offset:2046
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
+; GFX942-NEXT:    s_movk_i32 s2, 0x7fff
+; GFX942-NEXT:    s_mov_b32 s3, 0xffff0000
+; GFX942-NEXT:  .LBB42_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v5, v3
+; GFX942-NEXT:    v_lshlrev_b32_e32 v3, 16, v5
+; GFX942-NEXT:    v_max_f32_e32 v3, v3, v2
+; GFX942-NEXT:    v_bfe_u32 v4, v3, 16, 1
+; GFX942-NEXT:    v_or_b32_e32 v6, 0x400000, v3
+; GFX942-NEXT:    v_add3_u32 v4, v4, v3, s2
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v3, v3
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v4, v6, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
+; GFX942-NEXT:    v_and_or_b32 v4, v5, s3, v3
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2046 sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB42_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v0, v3
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fmax_ret_bf16__offset12b_pos__align4__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -11323,39 +11323,39 @@ define void @flat_agent_atomic_fmax_noret_bf16__offset12b__align4_pos__amdgpu_no
 ; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s0
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fmax_noret_bf16__offset12b__align4_pos__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    flat_load_dword v3, v[0:1] offset:2046
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    v_lshlrev_b32_e32 v4, 16, v2
-; GFX940-NEXT:    s_movk_i32 s2, 0x7fff
-; GFX940-NEXT:    s_mov_b32 s3, 0xffff0000
-; GFX940-NEXT:  .LBB43_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_lshlrev_b32_e32 v2, 16, v3
-; GFX940-NEXT:    v_max_f32_e32 v2, v2, v4
-; GFX940-NEXT:    v_bfe_u32 v5, v2, 16, 1
-; GFX940-NEXT:    v_or_b32_e32 v6, 0x400000, v2
-; GFX940-NEXT:    v_add3_u32 v5, v5, v2, s2
-; GFX940-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v2, v5, v6, vcc
-; GFX940-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
-; GFX940-NEXT:    v_and_or_b32 v2, v3, s3, v2
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2046 sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v3, v2
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB43_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fmax_noret_bf16__offset12b__align4_pos__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    flat_load_dword v3, v[0:1] offset:2046
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    v_lshlrev_b32_e32 v4, 16, v2
+; GFX942-NEXT:    s_movk_i32 s2, 0x7fff
+; GFX942-NEXT:    s_mov_b32 s3, 0xffff0000
+; GFX942-NEXT:  .LBB43_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_lshlrev_b32_e32 v2, 16, v3
+; GFX942-NEXT:    v_max_f32_e32 v2, v2, v4
+; GFX942-NEXT:    v_bfe_u32 v5, v2, 16, 1
+; GFX942-NEXT:    v_or_b32_e32 v6, 0x400000, v2
+; GFX942-NEXT:    v_add3_u32 v5, v5, v2, s2
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v5, v6, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
+; GFX942-NEXT:    v_and_or_b32 v2, v3, s3, v2
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2046 sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v3, v2
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB43_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fmax_noret_bf16__offset12b__align4_pos__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -11617,49 +11617,49 @@ define bfloat @flat_system_atomic_fmax_ret_bf16__offset12b_pos__amdgpu_no_fine_g
 ; GFX12-NEXT:    v_lshrrev_b32_e32 v0, v3, v5
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_system_atomic_fmax_ret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0x7fe
-; GFX940-NEXT:    v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1]
-; GFX940-NEXT:    v_and_b32_e32 v0, -4, v4
-; GFX940-NEXT:    v_mov_b32_e32 v1, v5
-; GFX940-NEXT:    flat_load_dword v5, v[0:1]
-; GFX940-NEXT:    v_and_b32_e32 v3, 3, v4
-; GFX940-NEXT:    v_lshlrev_b32_e32 v3, 3, v3
-; GFX940-NEXT:    s_mov_b32 s0, 0xffff
-; GFX940-NEXT:    v_lshlrev_b32_e64 v4, v3, s0
-; GFX940-NEXT:    v_not_b32_e32 v4, v4
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
-; GFX940-NEXT:    s_movk_i32 s2, 0x7fff
-; GFX940-NEXT:  .LBB44_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v7, v5
-; GFX940-NEXT:    v_lshrrev_b32_sdwa v5, v3, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_max_f32_e32 v5, v5, v2
-; GFX940-NEXT:    v_bfe_u32 v6, v5, 16, 1
-; GFX940-NEXT:    v_or_b32_e32 v8, 0x400000, v5
-; GFX940-NEXT:    v_add3_u32 v6, v6, v5, s2
-; GFX940-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v5, v6, v8, vcc
-; GFX940-NEXT:    v_lshlrev_b32_sdwa v5, v3, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX940-NEXT:    v_and_or_b32 v6, v7, v4, v5
-; GFX940-NEXT:    buffer_wbl2 sc0 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v5, v[0:1], v[6:7] sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc0 sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v5, v7
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB44_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    v_lshrrev_b32_e32 v0, v3, v5
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_system_atomic_fmax_ret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0x7fe
+; GFX942-NEXT:    v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1]
+; GFX942-NEXT:    v_and_b32_e32 v0, -4, v4
+; GFX942-NEXT:    v_mov_b32_e32 v1, v5
+; GFX942-NEXT:    flat_load_dword v5, v[0:1]
+; GFX942-NEXT:    v_and_b32_e32 v3, 3, v4
+; GFX942-NEXT:    v_lshlrev_b32_e32 v3, 3, v3
+; GFX942-NEXT:    s_mov_b32 s0, 0xffff
+; GFX942-NEXT:    v_lshlrev_b32_e64 v4, v3, s0
+; GFX942-NEXT:    v_not_b32_e32 v4, v4
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
+; GFX942-NEXT:    s_movk_i32 s2, 0x7fff
+; GFX942-NEXT:  .LBB44_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v7, v5
+; GFX942-NEXT:    v_lshrrev_b32_sdwa v5, v3, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_max_f32_e32 v5, v5, v2
+; GFX942-NEXT:    v_bfe_u32 v6, v5, 16, 1
+; GFX942-NEXT:    v_or_b32_e32 v8, 0x400000, v5
+; GFX942-NEXT:    v_add3_u32 v6, v6, v5, s2
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v5, v6, v8, vcc
+; GFX942-NEXT:    v_lshlrev_b32_sdwa v5, v3, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX942-NEXT:    v_and_or_b32 v6, v7, v4, v5
+; GFX942-NEXT:    buffer_wbl2 sc0 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v5, v[0:1], v[6:7] sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc0 sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v5, v7
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB44_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    v_lshrrev_b32_e32 v0, v3, v5
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_system_atomic_fmax_ret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -11974,48 +11974,48 @@ define void @flat_system_atomic_fmax_noret_bf16__offset12b_pos__amdgpu_no_fine_g
 ; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s0
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_system_atomic_fmax_noret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0x7fe
-; GFX940-NEXT:    v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1]
-; GFX940-NEXT:    v_and_b32_e32 v0, -4, v4
-; GFX940-NEXT:    v_mov_b32_e32 v1, v5
-; GFX940-NEXT:    flat_load_dword v3, v[0:1]
-; GFX940-NEXT:    v_and_b32_e32 v4, 3, v4
-; GFX940-NEXT:    v_lshlrev_b32_e32 v4, 3, v4
-; GFX940-NEXT:    s_mov_b32 s0, 0xffff
-; GFX940-NEXT:    v_lshlrev_b32_e64 v5, v4, s0
-; GFX940-NEXT:    v_not_b32_e32 v5, v5
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    v_lshlrev_b32_e32 v6, 16, v2
-; GFX940-NEXT:    s_movk_i32 s2, 0x7fff
-; GFX940-NEXT:  .LBB45_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_lshrrev_b32_sdwa v2, v4, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_max_f32_e32 v2, v2, v6
-; GFX940-NEXT:    v_bfe_u32 v7, v2, 16, 1
-; GFX940-NEXT:    v_or_b32_e32 v8, 0x400000, v2
-; GFX940-NEXT:    v_add3_u32 v7, v7, v2, s2
-; GFX940-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v2, v7, v8, vcc
-; GFX940-NEXT:    v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX940-NEXT:    v_and_or_b32 v2, v3, v5, v2
-; GFX940-NEXT:    buffer_wbl2 sc0 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc0 sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v3, v2
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB45_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_system_atomic_fmax_noret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0x7fe
+; GFX942-NEXT:    v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1]
+; GFX942-NEXT:    v_and_b32_e32 v0, -4, v4
+; GFX942-NEXT:    v_mov_b32_e32 v1, v5
+; GFX942-NEXT:    flat_load_dword v3, v[0:1]
+; GFX942-NEXT:    v_and_b32_e32 v4, 3, v4
+; GFX942-NEXT:    v_lshlrev_b32_e32 v4, 3, v4
+; GFX942-NEXT:    s_mov_b32 s0, 0xffff
+; GFX942-NEXT:    v_lshlrev_b32_e64 v5, v4, s0
+; GFX942-NEXT:    v_not_b32_e32 v5, v5
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    v_lshlrev_b32_e32 v6, 16, v2
+; GFX942-NEXT:    s_movk_i32 s2, 0x7fff
+; GFX942-NEXT:  .LBB45_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_lshrrev_b32_sdwa v2, v4, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_max_f32_e32 v2, v2, v6
+; GFX942-NEXT:    v_bfe_u32 v7, v2, 16, 1
+; GFX942-NEXT:    v_or_b32_e32 v8, 0x400000, v2
+; GFX942-NEXT:    v_add3_u32 v7, v7, v2, s2
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v7, v8, vcc
+; GFX942-NEXT:    v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX942-NEXT:    v_and_or_b32 v2, v3, v5, v2
+; GFX942-NEXT:    buffer_wbl2 sc0 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc0 sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v3, v2
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB45_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_system_atomic_fmax_noret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -12303,31 +12303,31 @@ define <2 x half> @flat_agent_atomic_fmax_ret_v2f16__amdgpu_no_fine_grained_memo
 ; GFX12-NEXT:    v_mov_b32_e32 v0, v3
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fmax_ret_v2f16__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    flat_load_dword v3, v[0:1]
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    v_pk_max_f16 v2, v2, v2
-; GFX940-NEXT:  .LBB46_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v5, v3
-; GFX940-NEXT:    v_pk_max_f16 v3, v5, v5
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_pk_max_f16 v4, v3, v2
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v3, v[0:1], v[4:5] sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB46_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v0, v3
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fmax_ret_v2f16__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    flat_load_dword v3, v[0:1]
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    v_pk_max_f16 v2, v2, v2
+; GFX942-NEXT:  .LBB46_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v5, v3
+; GFX942-NEXT:    v_pk_max_f16 v3, v5, v5
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_pk_max_f16 v4, v3, v2
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v3, v[0:1], v[4:5] sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB46_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v0, v3
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fmax_ret_v2f16__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -12538,31 +12538,31 @@ define <2 x half> @flat_agent_atomic_fmax_ret_v2f16__offset12b_pos__amdgpu_no_fi
 ; GFX12-NEXT:    v_mov_b32_e32 v0, v3
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fmax_ret_v2f16__offset12b_pos__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    flat_load_dword v3, v[0:1] offset:2044
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    v_pk_max_f16 v2, v2, v2
-; GFX940-NEXT:  .LBB47_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v5, v3
-; GFX940-NEXT:    v_pk_max_f16 v3, v5, v5
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_pk_max_f16 v4, v3, v2
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2044 sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB47_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v0, v3
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fmax_ret_v2f16__offset12b_pos__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    flat_load_dword v3, v[0:1] offset:2044
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    v_pk_max_f16 v2, v2, v2
+; GFX942-NEXT:  .LBB47_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v5, v3
+; GFX942-NEXT:    v_pk_max_f16 v3, v5, v5
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_pk_max_f16 v4, v3, v2
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2044 sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB47_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v0, v3
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fmax_ret_v2f16__offset12b_pos__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -12776,38 +12776,38 @@ define <2 x half> @flat_agent_atomic_fmax_ret_v2f16__offset12b_neg__amdgpu_no_fi
 ; GFX12-NEXT:    v_mov_b32_e32 v0, v3
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fmax_ret_v2f16__offset12b_neg__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v4, v0
-; GFX940-NEXT:    v_mov_b32_e32 v5, v1
-; GFX940-NEXT:    v_add_co_u32_e32 v0, vcc, 0xfffff800, v4
-; GFX940-NEXT:    s_movk_i32 s0, 0xf800
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v5, vcc
-; GFX940-NEXT:    flat_load_dword v0, v[0:1]
-; GFX940-NEXT:    s_mov_b32 s1, -1
-; GFX940-NEXT:    v_lshl_add_u64 v[4:5], v[4:5], 0, s[0:1]
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    v_pk_max_f16 v1, v2, v2
-; GFX940-NEXT:  .LBB48_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v3, v0
-; GFX940-NEXT:    v_pk_max_f16 v0, v3, v3
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_pk_max_f16 v2, v0, v1
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v0, v[4:5], v[2:3] sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v0, v3
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB48_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fmax_ret_v2f16__offset12b_neg__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v4, v0
+; GFX942-NEXT:    v_mov_b32_e32 v5, v1
+; GFX942-NEXT:    v_add_co_u32_e32 v0, vcc, 0xfffff800, v4
+; GFX942-NEXT:    s_movk_i32 s0, 0xf800
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v5, vcc
+; GFX942-NEXT:    flat_load_dword v0, v[0:1]
+; GFX942-NEXT:    s_mov_b32 s1, -1
+; GFX942-NEXT:    v_lshl_add_u64 v[4:5], v[4:5], 0, s[0:1]
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    v_pk_max_f16 v1, v2, v2
+; GFX942-NEXT:  .LBB48_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v3, v0
+; GFX942-NEXT:    v_pk_max_f16 v0, v3, v3
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_pk_max_f16 v2, v0, v1
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v0, v[4:5], v[2:3] sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v0, v3
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB48_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fmax_ret_v2f16__offset12b_neg__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -13031,30 +13031,30 @@ define void @flat_agent_atomic_fmax_noret_v2f16__amdgpu_no_fine_grained_memory(p
 ; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s0
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fmax_noret_v2f16__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    flat_load_dword v3, v[0:1]
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    v_pk_max_f16 v4, v2, v2
-; GFX940-NEXT:  .LBB49_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_pk_max_f16 v2, v3, v3
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_pk_max_f16 v2, v2, v4
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v3, v2
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB49_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fmax_noret_v2f16__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    flat_load_dword v3, v[0:1]
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    v_pk_max_f16 v4, v2, v2
+; GFX942-NEXT:  .LBB49_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_pk_max_f16 v2, v3, v3
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_pk_max_f16 v2, v2, v4
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v3, v2
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB49_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fmax_noret_v2f16__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -13257,30 +13257,30 @@ define void @flat_agent_atomic_fmax_noret_v2f16__offset12b_pos__amdgpu_no_fine_g
 ; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s0
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fmax_noret_v2f16__offset12b_pos__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    flat_load_dword v3, v[0:1] offset:2044
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    v_pk_max_f16 v4, v2, v2
-; GFX940-NEXT:  .LBB50_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_pk_max_f16 v2, v3, v3
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_pk_max_f16 v2, v2, v4
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2044 sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v3, v2
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB50_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fmax_noret_v2f16__offset12b_pos__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    flat_load_dword v3, v[0:1] offset:2044
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    v_pk_max_f16 v4, v2, v2
+; GFX942-NEXT:  .LBB50_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_pk_max_f16 v2, v3, v3
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_pk_max_f16 v2, v2, v4
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2044 sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v3, v2
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB50_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fmax_noret_v2f16__offset12b_pos__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -13490,36 +13490,36 @@ define void @flat_agent_atomic_fmax_noret_v2f16__offset12b_neg__amdgpu_no_fine_g
 ; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s0
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fmax_noret_v2f16__offset12b_neg__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_add_co_u32_e32 v4, vcc, 0xfffff800, v0
-; GFX940-NEXT:    s_movk_i32 s0, 0xf800
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_addc_co_u32_e32 v5, vcc, -1, v1, vcc
-; GFX940-NEXT:    flat_load_dword v3, v[4:5]
-; GFX940-NEXT:    s_mov_b32 s1, -1
-; GFX940-NEXT:    v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1]
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    v_pk_max_f16 v4, v2, v2
-; GFX940-NEXT:  .LBB51_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_pk_max_f16 v2, v3, v3
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_pk_max_f16 v2, v2, v4
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v3, v2
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB51_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fmax_noret_v2f16__offset12b_neg__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_add_co_u32_e32 v4, vcc, 0xfffff800, v0
+; GFX942-NEXT:    s_movk_i32 s0, 0xf800
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_addc_co_u32_e32 v5, vcc, -1, v1, vcc
+; GFX942-NEXT:    flat_load_dword v3, v[4:5]
+; GFX942-NEXT:    s_mov_b32 s1, -1
+; GFX942-NEXT:    v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1]
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    v_pk_max_f16 v4, v2, v2
+; GFX942-NEXT:  .LBB51_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_pk_max_f16 v2, v3, v3
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_pk_max_f16 v2, v2, v4
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v3, v2
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB51_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fmax_noret_v2f16__offset12b_neg__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -13743,31 +13743,31 @@ define <2 x half> @flat_system_atomic_fmax_ret_v2f16__offset12b_pos__amdgpu_no_f
 ; GFX12-NEXT:    v_mov_b32_e32 v0, v3
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_system_atomic_fmax_ret_v2f16__offset12b_pos__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    flat_load_dword v3, v[0:1] offset:2044
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    v_pk_max_f16 v2, v2, v2
-; GFX940-NEXT:  .LBB52_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v5, v3
-; GFX940-NEXT:    v_pk_max_f16 v3, v5, v5
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_pk_max_f16 v4, v3, v2
-; GFX940-NEXT:    buffer_wbl2 sc0 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2044 sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc0 sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB52_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v0, v3
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_system_atomic_fmax_ret_v2f16__offset12b_pos__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    flat_load_dword v3, v[0:1] offset:2044
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    v_pk_max_f16 v2, v2, v2
+; GFX942-NEXT:  .LBB52_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v5, v3
+; GFX942-NEXT:    v_pk_max_f16 v3, v5, v5
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_pk_max_f16 v4, v3, v2
+; GFX942-NEXT:    buffer_wbl2 sc0 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2044 sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc0 sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB52_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v0, v3
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_system_atomic_fmax_ret_v2f16__offset12b_pos__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -13983,30 +13983,30 @@ define void @flat_system_atomic_fmax_noret_v2f16__offset12b_pos__amdgpu_no_fine_
 ; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s0
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_system_atomic_fmax_noret_v2f16__offset12b_pos__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    flat_load_dword v3, v[0:1] offset:2044
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    v_pk_max_f16 v4, v2, v2
-; GFX940-NEXT:  .LBB53_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_pk_max_f16 v2, v3, v3
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_pk_max_f16 v2, v2, v4
-; GFX940-NEXT:    buffer_wbl2 sc0 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2044 sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc0 sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v3, v2
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB53_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_system_atomic_fmax_noret_v2f16__offset12b_pos__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    flat_load_dword v3, v[0:1] offset:2044
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    v_pk_max_f16 v4, v2, v2
+; GFX942-NEXT:  .LBB53_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_pk_max_f16 v2, v3, v3
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_pk_max_f16 v2, v2, v4
+; GFX942-NEXT:    buffer_wbl2 sc0 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2044 sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc0 sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v3, v2
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB53_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_system_atomic_fmax_noret_v2f16__offset12b_pos__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -14242,47 +14242,47 @@ define <2 x bfloat> @flat_agent_atomic_fmax_ret_v2bf16__amdgpu_no_fine_grained_m
 ; GFX12-NEXT:    v_mov_b32_e32 v0, v3
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fmax_ret_v2bf16__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    flat_load_dword v3, v[0:1]
-; GFX940-NEXT:    s_mov_b64 s[2:3], 0
-; GFX940-NEXT:    v_lshlrev_b32_e32 v4, 16, v2
-; GFX940-NEXT:    s_movk_i32 s4, 0x7fff
-; GFX940-NEXT:    v_and_b32_e32 v2, 0xffff0000, v2
-; GFX940-NEXT:    s_mov_b32 s5, 0x7060302
-; GFX940-NEXT:  .LBB54_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v7, v3
-; GFX940-NEXT:    v_lshlrev_b32_e32 v3, 16, v7
-; GFX940-NEXT:    v_and_b32_e32 v5, 0xffff0000, v7
-; GFX940-NEXT:    v_max_f32_e32 v3, v3, v4
-; GFX940-NEXT:    v_max_f32_e32 v5, v5, v2
-; GFX940-NEXT:    v_bfe_u32 v6, v3, 16, 1
-; GFX940-NEXT:    v_bfe_u32 v9, v5, 16, 1
-; GFX940-NEXT:    v_or_b32_e32 v8, 0x400000, v3
-; GFX940-NEXT:    v_or_b32_e32 v10, 0x400000, v5
-; GFX940-NEXT:    v_add3_u32 v6, v6, v3, s4
-; GFX940-NEXT:    v_add3_u32 v9, v9, v5, s4
-; GFX940-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
-; GFX940-NEXT:    v_cmp_u_f32_e64 s[0:1], v3, v3
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cndmask_b32_e32 v5, v9, v10, vcc
-; GFX940-NEXT:    v_cndmask_b32_e64 v3, v6, v8, s[0:1]
-; GFX940-NEXT:    v_perm_b32 v6, v5, v3, s5
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v3, v[0:1], v[6:7] sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v7
-; GFX940-NEXT:    s_or_b64 s[2:3], vcc, s[2:3]
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[2:3]
-; GFX940-NEXT:    s_cbranch_execnz .LBB54_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[2:3]
-; GFX940-NEXT:    v_mov_b32_e32 v0, v3
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fmax_ret_v2bf16__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    flat_load_dword v3, v[0:1]
+; GFX942-NEXT:    s_mov_b64 s[2:3], 0
+; GFX942-NEXT:    v_lshlrev_b32_e32 v4, 16, v2
+; GFX942-NEXT:    s_movk_i32 s4, 0x7fff
+; GFX942-NEXT:    v_and_b32_e32 v2, 0xffff0000, v2
+; GFX942-NEXT:    s_mov_b32 s5, 0x7060302
+; GFX942-NEXT:  .LBB54_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v7, v3
+; GFX942-NEXT:    v_lshlrev_b32_e32 v3, 16, v7
+; GFX942-NEXT:    v_and_b32_e32 v5, 0xffff0000, v7
+; GFX942-NEXT:    v_max_f32_e32 v3, v3, v4
+; GFX942-NEXT:    v_max_f32_e32 v5, v5, v2
+; GFX942-NEXT:    v_bfe_u32 v6, v3, 16, 1
+; GFX942-NEXT:    v_bfe_u32 v9, v5, 16, 1
+; GFX942-NEXT:    v_or_b32_e32 v8, 0x400000, v3
+; GFX942-NEXT:    v_or_b32_e32 v10, 0x400000, v5
+; GFX942-NEXT:    v_add3_u32 v6, v6, v3, s4
+; GFX942-NEXT:    v_add3_u32 v9, v9, v5, s4
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
+; GFX942-NEXT:    v_cmp_u_f32_e64 s[0:1], v3, v3
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v5, v9, v10, vcc
+; GFX942-NEXT:    v_cndmask_b32_e64 v3, v6, v8, s[0:1]
+; GFX942-NEXT:    v_perm_b32 v6, v5, v3, s5
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v3, v[0:1], v[6:7] sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v7
+; GFX942-NEXT:    s_or_b64 s[2:3], vcc, s[2:3]
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[2:3]
+; GFX942-NEXT:    s_cbranch_execnz .LBB54_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[2:3]
+; GFX942-NEXT:    v_mov_b32_e32 v0, v3
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fmax_ret_v2bf16__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -14588,47 +14588,47 @@ define <2 x bfloat> @flat_agent_atomic_fmax_ret_v2bf16__offset12b_pos__amdgpu_no
 ; GFX12-NEXT:    v_mov_b32_e32 v0, v3
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fmax_ret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    flat_load_dword v3, v[0:1] offset:2044
-; GFX940-NEXT:    s_mov_b64 s[2:3], 0
-; GFX940-NEXT:    v_lshlrev_b32_e32 v4, 16, v2
-; GFX940-NEXT:    s_movk_i32 s4, 0x7fff
-; GFX940-NEXT:    v_and_b32_e32 v2, 0xffff0000, v2
-; GFX940-NEXT:    s_mov_b32 s5, 0x7060302
-; GFX940-NEXT:  .LBB55_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v7, v3
-; GFX940-NEXT:    v_lshlrev_b32_e32 v3, 16, v7
-; GFX940-NEXT:    v_and_b32_e32 v5, 0xffff0000, v7
-; GFX940-NEXT:    v_max_f32_e32 v3, v3, v4
-; GFX940-NEXT:    v_max_f32_e32 v5, v5, v2
-; GFX940-NEXT:    v_bfe_u32 v6, v3, 16, 1
-; GFX940-NEXT:    v_bfe_u32 v9, v5, 16, 1
-; GFX940-NEXT:    v_or_b32_e32 v8, 0x400000, v3
-; GFX940-NEXT:    v_or_b32_e32 v10, 0x400000, v5
-; GFX940-NEXT:    v_add3_u32 v6, v6, v3, s4
-; GFX940-NEXT:    v_add3_u32 v9, v9, v5, s4
-; GFX940-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
-; GFX940-NEXT:    v_cmp_u_f32_e64 s[0:1], v3, v3
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cndmask_b32_e32 v5, v9, v10, vcc
-; GFX940-NEXT:    v_cndmask_b32_e64 v3, v6, v8, s[0:1]
-; GFX940-NEXT:    v_perm_b32 v6, v5, v3, s5
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v3, v[0:1], v[6:7] offset:2044 sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v7
-; GFX940-NEXT:    s_or_b64 s[2:3], vcc, s[2:3]
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[2:3]
-; GFX940-NEXT:    s_cbranch_execnz .LBB55_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[2:3]
-; GFX940-NEXT:    v_mov_b32_e32 v0, v3
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fmax_ret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    flat_load_dword v3, v[0:1] offset:2044
+; GFX942-NEXT:    s_mov_b64 s[2:3], 0
+; GFX942-NEXT:    v_lshlrev_b32_e32 v4, 16, v2
+; GFX942-NEXT:    s_movk_i32 s4, 0x7fff
+; GFX942-NEXT:    v_and_b32_e32 v2, 0xffff0000, v2
+; GFX942-NEXT:    s_mov_b32 s5, 0x7060302
+; GFX942-NEXT:  .LBB55_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v7, v3
+; GFX942-NEXT:    v_lshlrev_b32_e32 v3, 16, v7
+; GFX942-NEXT:    v_and_b32_e32 v5, 0xffff0000, v7
+; GFX942-NEXT:    v_max_f32_e32 v3, v3, v4
+; GFX942-NEXT:    v_max_f32_e32 v5, v5, v2
+; GFX942-NEXT:    v_bfe_u32 v6, v3, 16, 1
+; GFX942-NEXT:    v_bfe_u32 v9, v5, 16, 1
+; GFX942-NEXT:    v_or_b32_e32 v8, 0x400000, v3
+; GFX942-NEXT:    v_or_b32_e32 v10, 0x400000, v5
+; GFX942-NEXT:    v_add3_u32 v6, v6, v3, s4
+; GFX942-NEXT:    v_add3_u32 v9, v9, v5, s4
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
+; GFX942-NEXT:    v_cmp_u_f32_e64 s[0:1], v3, v3
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v5, v9, v10, vcc
+; GFX942-NEXT:    v_cndmask_b32_e64 v3, v6, v8, s[0:1]
+; GFX942-NEXT:    v_perm_b32 v6, v5, v3, s5
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v3, v[0:1], v[6:7] offset:2044 sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v7
+; GFX942-NEXT:    s_or_b64 s[2:3], vcc, s[2:3]
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[2:3]
+; GFX942-NEXT:    s_cbranch_execnz .LBB55_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[2:3]
+; GFX942-NEXT:    v_mov_b32_e32 v0, v3
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fmax_ret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -14937,54 +14937,54 @@ define <2 x bfloat> @flat_agent_atomic_fmax_ret_v2bf16__offset12b_neg__amdgpu_no
 ; GFX12-NEXT:    v_mov_b32_e32 v0, v3
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fmax_ret_v2bf16__offset12b_neg__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v4, v0
-; GFX940-NEXT:    v_mov_b32_e32 v5, v1
-; GFX940-NEXT:    v_add_co_u32_e32 v0, vcc, 0xfffff800, v4
-; GFX940-NEXT:    s_movk_i32 s0, 0xf800
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v5, vcc
-; GFX940-NEXT:    flat_load_dword v0, v[0:1]
-; GFX940-NEXT:    s_mov_b32 s1, -1
-; GFX940-NEXT:    v_lshl_add_u64 v[4:5], v[4:5], 0, s[0:1]
-; GFX940-NEXT:    s_mov_b64 s[2:3], 0
-; GFX940-NEXT:    v_lshlrev_b32_e32 v1, 16, v2
-; GFX940-NEXT:    s_movk_i32 s4, 0x7fff
-; GFX940-NEXT:    v_and_b32_e32 v2, 0xffff0000, v2
-; GFX940-NEXT:    s_mov_b32 s5, 0x7060302
-; GFX940-NEXT:  .LBB56_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v7, v0
-; GFX940-NEXT:    v_lshlrev_b32_e32 v0, 16, v7
-; GFX940-NEXT:    v_and_b32_e32 v3, 0xffff0000, v7
-; GFX940-NEXT:    v_max_f32_e32 v0, v0, v1
-; GFX940-NEXT:    v_max_f32_e32 v3, v3, v2
-; GFX940-NEXT:    v_bfe_u32 v6, v0, 16, 1
-; GFX940-NEXT:    v_bfe_u32 v9, v3, 16, 1
-; GFX940-NEXT:    v_or_b32_e32 v8, 0x400000, v0
-; GFX940-NEXT:    v_or_b32_e32 v10, 0x400000, v3
-; GFX940-NEXT:    v_add3_u32 v6, v6, v0, s4
-; GFX940-NEXT:    v_add3_u32 v9, v9, v3, s4
-; GFX940-NEXT:    v_cmp_u_f32_e32 vcc, v3, v3
-; GFX940-NEXT:    v_cmp_u_f32_e64 s[0:1], v0, v0
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cndmask_b32_e32 v3, v9, v10, vcc
-; GFX940-NEXT:    v_cndmask_b32_e64 v0, v6, v8, s[0:1]
-; GFX940-NEXT:    v_perm_b32 v6, v3, v0, s5
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v0, v[4:5], v[6:7] sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v0, v7
-; GFX940-NEXT:    s_or_b64 s[2:3], vcc, s[2:3]
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[2:3]
-; GFX940-NEXT:    s_cbranch_execnz .LBB56_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[2:3]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fmax_ret_v2bf16__offset12b_neg__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v4, v0
+; GFX942-NEXT:    v_mov_b32_e32 v5, v1
+; GFX942-NEXT:    v_add_co_u32_e32 v0, vcc, 0xfffff800, v4
+; GFX942-NEXT:    s_movk_i32 s0, 0xf800
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v5, vcc
+; GFX942-NEXT:    flat_load_dword v0, v[0:1]
+; GFX942-NEXT:    s_mov_b32 s1, -1
+; GFX942-NEXT:    v_lshl_add_u64 v[4:5], v[4:5], 0, s[0:1]
+; GFX942-NEXT:    s_mov_b64 s[2:3], 0
+; GFX942-NEXT:    v_lshlrev_b32_e32 v1, 16, v2
+; GFX942-NEXT:    s_movk_i32 s4, 0x7fff
+; GFX942-NEXT:    v_and_b32_e32 v2, 0xffff0000, v2
+; GFX942-NEXT:    s_mov_b32 s5, 0x7060302
+; GFX942-NEXT:  .LBB56_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v7, v0
+; GFX942-NEXT:    v_lshlrev_b32_e32 v0, 16, v7
+; GFX942-NEXT:    v_and_b32_e32 v3, 0xffff0000, v7
+; GFX942-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX942-NEXT:    v_max_f32_e32 v3, v3, v2
+; GFX942-NEXT:    v_bfe_u32 v6, v0, 16, 1
+; GFX942-NEXT:    v_bfe_u32 v9, v3, 16, 1
+; GFX942-NEXT:    v_or_b32_e32 v8, 0x400000, v0
+; GFX942-NEXT:    v_or_b32_e32 v10, 0x400000, v3
+; GFX942-NEXT:    v_add3_u32 v6, v6, v0, s4
+; GFX942-NEXT:    v_add3_u32 v9, v9, v3, s4
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v3, v3
+; GFX942-NEXT:    v_cmp_u_f32_e64 s[0:1], v0, v0
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v9, v10, vcc
+; GFX942-NEXT:    v_cndmask_b32_e64 v0, v6, v8, s[0:1]
+; GFX942-NEXT:    v_perm_b32 v6, v3, v0, s5
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v0, v[4:5], v[6:7] sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v0, v7
+; GFX942-NEXT:    s_or_b64 s[2:3], vcc, s[2:3]
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[2:3]
+; GFX942-NEXT:    s_cbranch_execnz .LBB56_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[2:3]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fmax_ret_v2bf16__offset12b_neg__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -15303,46 +15303,46 @@ define void @flat_agent_atomic_fmax_noret_v2bf16__amdgpu_no_fine_grained_memory(
 ; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s1
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fmax_noret_v2bf16__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    flat_load_dword v3, v[0:1]
-; GFX940-NEXT:    s_mov_b64 s[2:3], 0
-; GFX940-NEXT:    v_lshlrev_b32_e32 v4, 16, v2
-; GFX940-NEXT:    s_movk_i32 s4, 0x7fff
-; GFX940-NEXT:    v_and_b32_e32 v5, 0xffff0000, v2
-; GFX940-NEXT:    s_mov_b32 s5, 0x7060302
-; GFX940-NEXT:  .LBB57_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_lshlrev_b32_e32 v2, 16, v3
-; GFX940-NEXT:    v_and_b32_e32 v6, 0xffff0000, v3
-; GFX940-NEXT:    v_max_f32_e32 v2, v2, v4
-; GFX940-NEXT:    v_max_f32_e32 v6, v6, v5
-; GFX940-NEXT:    v_bfe_u32 v7, v2, 16, 1
-; GFX940-NEXT:    v_bfe_u32 v9, v6, 16, 1
-; GFX940-NEXT:    v_or_b32_e32 v8, 0x400000, v2
-; GFX940-NEXT:    v_or_b32_e32 v10, 0x400000, v6
-; GFX940-NEXT:    v_add3_u32 v7, v7, v2, s4
-; GFX940-NEXT:    v_add3_u32 v9, v9, v6, s4
-; GFX940-NEXT:    v_cmp_u_f32_e32 vcc, v6, v6
-; GFX940-NEXT:    v_cmp_u_f32_e64 s[0:1], v2, v2
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cndmask_b32_e32 v6, v9, v10, vcc
-; GFX940-NEXT:    v_cndmask_b32_e64 v2, v7, v8, s[0:1]
-; GFX940-NEXT:    v_perm_b32 v2, v6, v2, s5
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
-; GFX940-NEXT:    s_or_b64 s[2:3], vcc, s[2:3]
-; GFX940-NEXT:    v_mov_b32_e32 v3, v2
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[2:3]
-; GFX940-NEXT:    s_cbranch_execnz .LBB57_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[2:3]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fmax_noret_v2bf16__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    flat_load_dword v3, v[0:1]
+; GFX942-NEXT:    s_mov_b64 s[2:3], 0
+; GFX942-NEXT:    v_lshlrev_b32_e32 v4, 16, v2
+; GFX942-NEXT:    s_movk_i32 s4, 0x7fff
+; GFX942-NEXT:    v_and_b32_e32 v5, 0xffff0000, v2
+; GFX942-NEXT:    s_mov_b32 s5, 0x7060302
+; GFX942-NEXT:  .LBB57_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_lshlrev_b32_e32 v2, 16, v3
+; GFX942-NEXT:    v_and_b32_e32 v6, 0xffff0000, v3
+; GFX942-NEXT:    v_max_f32_e32 v2, v2, v4
+; GFX942-NEXT:    v_max_f32_e32 v6, v6, v5
+; GFX942-NEXT:    v_bfe_u32 v7, v2, 16, 1
+; GFX942-NEXT:    v_bfe_u32 v9, v6, 16, 1
+; GFX942-NEXT:    v_or_b32_e32 v8, 0x400000, v2
+; GFX942-NEXT:    v_or_b32_e32 v10, 0x400000, v6
+; GFX942-NEXT:    v_add3_u32 v7, v7, v2, s4
+; GFX942-NEXT:    v_add3_u32 v9, v9, v6, s4
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v6, v6
+; GFX942-NEXT:    v_cmp_u_f32_e64 s[0:1], v2, v2
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v6, v9, v10, vcc
+; GFX942-NEXT:    v_cndmask_b32_e64 v2, v7, v8, s[0:1]
+; GFX942-NEXT:    v_perm_b32 v2, v6, v2, s5
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX942-NEXT:    s_or_b64 s[2:3], vcc, s[2:3]
+; GFX942-NEXT:    v_mov_b32_e32 v3, v2
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[2:3]
+; GFX942-NEXT:    s_cbranch_execnz .LBB57_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[2:3]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fmax_noret_v2bf16__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -15639,46 +15639,46 @@ define void @flat_agent_atomic_fmax_noret_v2bf16__offset12b_pos__amdgpu_no_fine_
 ; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s1
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fmax_noret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    flat_load_dword v3, v[0:1] offset:2044
-; GFX940-NEXT:    s_mov_b64 s[2:3], 0
-; GFX940-NEXT:    v_lshlrev_b32_e32 v4, 16, v2
-; GFX940-NEXT:    s_movk_i32 s4, 0x7fff
-; GFX940-NEXT:    v_and_b32_e32 v5, 0xffff0000, v2
-; GFX940-NEXT:    s_mov_b32 s5, 0x7060302
-; GFX940-NEXT:  .LBB58_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_lshlrev_b32_e32 v2, 16, v3
-; GFX940-NEXT:    v_and_b32_e32 v6, 0xffff0000, v3
-; GFX940-NEXT:    v_max_f32_e32 v2, v2, v4
-; GFX940-NEXT:    v_max_f32_e32 v6, v6, v5
-; GFX940-NEXT:    v_bfe_u32 v7, v2, 16, 1
-; GFX940-NEXT:    v_bfe_u32 v9, v6, 16, 1
-; GFX940-NEXT:    v_or_b32_e32 v8, 0x400000, v2
-; GFX940-NEXT:    v_or_b32_e32 v10, 0x400000, v6
-; GFX940-NEXT:    v_add3_u32 v7, v7, v2, s4
-; GFX940-NEXT:    v_add3_u32 v9, v9, v6, s4
-; GFX940-NEXT:    v_cmp_u_f32_e32 vcc, v6, v6
-; GFX940-NEXT:    v_cmp_u_f32_e64 s[0:1], v2, v2
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cndmask_b32_e32 v6, v9, v10, vcc
-; GFX940-NEXT:    v_cndmask_b32_e64 v2, v7, v8, s[0:1]
-; GFX940-NEXT:    v_perm_b32 v2, v6, v2, s5
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2044 sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
-; GFX940-NEXT:    s_or_b64 s[2:3], vcc, s[2:3]
-; GFX940-NEXT:    v_mov_b32_e32 v3, v2
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[2:3]
-; GFX940-NEXT:    s_cbranch_execnz .LBB58_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[2:3]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fmax_noret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    flat_load_dword v3, v[0:1] offset:2044
+; GFX942-NEXT:    s_mov_b64 s[2:3], 0
+; GFX942-NEXT:    v_lshlrev_b32_e32 v4, 16, v2
+; GFX942-NEXT:    s_movk_i32 s4, 0x7fff
+; GFX942-NEXT:    v_and_b32_e32 v5, 0xffff0000, v2
+; GFX942-NEXT:    s_mov_b32 s5, 0x7060302
+; GFX942-NEXT:  .LBB58_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_lshlrev_b32_e32 v2, 16, v3
+; GFX942-NEXT:    v_and_b32_e32 v6, 0xffff0000, v3
+; GFX942-NEXT:    v_max_f32_e32 v2, v2, v4
+; GFX942-NEXT:    v_max_f32_e32 v6, v6, v5
+; GFX942-NEXT:    v_bfe_u32 v7, v2, 16, 1
+; GFX942-NEXT:    v_bfe_u32 v9, v6, 16, 1
+; GFX942-NEXT:    v_or_b32_e32 v8, 0x400000, v2
+; GFX942-NEXT:    v_or_b32_e32 v10, 0x400000, v6
+; GFX942-NEXT:    v_add3_u32 v7, v7, v2, s4
+; GFX942-NEXT:    v_add3_u32 v9, v9, v6, s4
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v6, v6
+; GFX942-NEXT:    v_cmp_u_f32_e64 s[0:1], v2, v2
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v6, v9, v10, vcc
+; GFX942-NEXT:    v_cndmask_b32_e64 v2, v7, v8, s[0:1]
+; GFX942-NEXT:    v_perm_b32 v2, v6, v2, s5
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2044 sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX942-NEXT:    s_or_b64 s[2:3], vcc, s[2:3]
+; GFX942-NEXT:    v_mov_b32_e32 v3, v2
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[2:3]
+; GFX942-NEXT:    s_cbranch_execnz .LBB58_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[2:3]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fmax_noret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -15982,52 +15982,52 @@ define void @flat_agent_atomic_fmax_noret_v2bf16__offset12b_neg__amdgpu_no_fine_
 ; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s1
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fmax_noret_v2bf16__offset12b_neg__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_add_co_u32_e32 v4, vcc, 0xfffff800, v0
-; GFX940-NEXT:    s_movk_i32 s0, 0xf800
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_addc_co_u32_e32 v5, vcc, -1, v1, vcc
-; GFX940-NEXT:    flat_load_dword v3, v[4:5]
-; GFX940-NEXT:    s_mov_b32 s1, -1
-; GFX940-NEXT:    v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1]
-; GFX940-NEXT:    s_mov_b64 s[2:3], 0
-; GFX940-NEXT:    v_lshlrev_b32_e32 v4, 16, v2
-; GFX940-NEXT:    s_movk_i32 s4, 0x7fff
-; GFX940-NEXT:    v_and_b32_e32 v5, 0xffff0000, v2
-; GFX940-NEXT:    s_mov_b32 s5, 0x7060302
-; GFX940-NEXT:  .LBB59_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_lshlrev_b32_e32 v2, 16, v3
-; GFX940-NEXT:    v_and_b32_e32 v6, 0xffff0000, v3
-; GFX940-NEXT:    v_max_f32_e32 v2, v2, v4
-; GFX940-NEXT:    v_max_f32_e32 v6, v6, v5
-; GFX940-NEXT:    v_bfe_u32 v7, v2, 16, 1
-; GFX940-NEXT:    v_bfe_u32 v9, v6, 16, 1
-; GFX940-NEXT:    v_or_b32_e32 v8, 0x400000, v2
-; GFX940-NEXT:    v_or_b32_e32 v10, 0x400000, v6
-; GFX940-NEXT:    v_add3_u32 v7, v7, v2, s4
-; GFX940-NEXT:    v_add3_u32 v9, v9, v6, s4
-; GFX940-NEXT:    v_cmp_u_f32_e32 vcc, v6, v6
-; GFX940-NEXT:    v_cmp_u_f32_e64 s[0:1], v2, v2
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cndmask_b32_e32 v6, v9, v10, vcc
-; GFX940-NEXT:    v_cndmask_b32_e64 v2, v7, v8, s[0:1]
-; GFX940-NEXT:    v_perm_b32 v2, v6, v2, s5
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
-; GFX940-NEXT:    s_or_b64 s[2:3], vcc, s[2:3]
-; GFX940-NEXT:    v_mov_b32_e32 v3, v2
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[2:3]
-; GFX940-NEXT:    s_cbranch_execnz .LBB59_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[2:3]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fmax_noret_v2bf16__offset12b_neg__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_add_co_u32_e32 v4, vcc, 0xfffff800, v0
+; GFX942-NEXT:    s_movk_i32 s0, 0xf800
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_addc_co_u32_e32 v5, vcc, -1, v1, vcc
+; GFX942-NEXT:    flat_load_dword v3, v[4:5]
+; GFX942-NEXT:    s_mov_b32 s1, -1
+; GFX942-NEXT:    v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1]
+; GFX942-NEXT:    s_mov_b64 s[2:3], 0
+; GFX942-NEXT:    v_lshlrev_b32_e32 v4, 16, v2
+; GFX942-NEXT:    s_movk_i32 s4, 0x7fff
+; GFX942-NEXT:    v_and_b32_e32 v5, 0xffff0000, v2
+; GFX942-NEXT:    s_mov_b32 s5, 0x7060302
+; GFX942-NEXT:  .LBB59_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_lshlrev_b32_e32 v2, 16, v3
+; GFX942-NEXT:    v_and_b32_e32 v6, 0xffff0000, v3
+; GFX942-NEXT:    v_max_f32_e32 v2, v2, v4
+; GFX942-NEXT:    v_max_f32_e32 v6, v6, v5
+; GFX942-NEXT:    v_bfe_u32 v7, v2, 16, 1
+; GFX942-NEXT:    v_bfe_u32 v9, v6, 16, 1
+; GFX942-NEXT:    v_or_b32_e32 v8, 0x400000, v2
+; GFX942-NEXT:    v_or_b32_e32 v10, 0x400000, v6
+; GFX942-NEXT:    v_add3_u32 v7, v7, v2, s4
+; GFX942-NEXT:    v_add3_u32 v9, v9, v6, s4
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v6, v6
+; GFX942-NEXT:    v_cmp_u_f32_e64 s[0:1], v2, v2
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v6, v9, v10, vcc
+; GFX942-NEXT:    v_cndmask_b32_e64 v2, v7, v8, s[0:1]
+; GFX942-NEXT:    v_perm_b32 v2, v6, v2, s5
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX942-NEXT:    s_or_b64 s[2:3], vcc, s[2:3]
+; GFX942-NEXT:    v_mov_b32_e32 v3, v2
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[2:3]
+; GFX942-NEXT:    s_cbranch_execnz .LBB59_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[2:3]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fmax_noret_v2bf16__offset12b_neg__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -16345,47 +16345,47 @@ define <2 x bfloat> @flat_system_atomic_fmax_ret_v2bf16__offset12b_pos__amdgpu_n
 ; GFX12-NEXT:    v_mov_b32_e32 v0, v3
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_system_atomic_fmax_ret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    flat_load_dword v3, v[0:1] offset:2044
-; GFX940-NEXT:    s_mov_b64 s[2:3], 0
-; GFX940-NEXT:    v_lshlrev_b32_e32 v4, 16, v2
-; GFX940-NEXT:    s_movk_i32 s4, 0x7fff
-; GFX940-NEXT:    v_and_b32_e32 v2, 0xffff0000, v2
-; GFX940-NEXT:    s_mov_b32 s5, 0x7060302
-; GFX940-NEXT:  .LBB60_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v7, v3
-; GFX940-NEXT:    v_lshlrev_b32_e32 v3, 16, v7
-; GFX940-NEXT:    v_and_b32_e32 v5, 0xffff0000, v7
-; GFX940-NEXT:    v_max_f32_e32 v3, v3, v4
-; GFX940-NEXT:    v_max_f32_e32 v5, v5, v2
-; GFX940-NEXT:    v_bfe_u32 v6, v3, 16, 1
-; GFX940-NEXT:    v_bfe_u32 v9, v5, 16, 1
-; GFX940-NEXT:    v_or_b32_e32 v8, 0x400000, v3
-; GFX940-NEXT:    v_or_b32_e32 v10, 0x400000, v5
-; GFX940-NEXT:    v_add3_u32 v6, v6, v3, s4
-; GFX940-NEXT:    v_add3_u32 v9, v9, v5, s4
-; GFX940-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
-; GFX940-NEXT:    v_cmp_u_f32_e64 s[0:1], v3, v3
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cndmask_b32_e32 v5, v9, v10, vcc
-; GFX940-NEXT:    v_cndmask_b32_e64 v3, v6, v8, s[0:1]
-; GFX940-NEXT:    v_perm_b32 v6, v5, v3, s5
-; GFX940-NEXT:    buffer_wbl2 sc0 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v3, v[0:1], v[6:7] offset:2044 sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc0 sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v7
-; GFX940-NEXT:    s_or_b64 s[2:3], vcc, s[2:3]
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[2:3]
-; GFX940-NEXT:    s_cbranch_execnz .LBB60_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[2:3]
-; GFX940-NEXT:    v_mov_b32_e32 v0, v3
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_system_atomic_fmax_ret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    flat_load_dword v3, v[0:1] offset:2044
+; GFX942-NEXT:    s_mov_b64 s[2:3], 0
+; GFX942-NEXT:    v_lshlrev_b32_e32 v4, 16, v2
+; GFX942-NEXT:    s_movk_i32 s4, 0x7fff
+; GFX942-NEXT:    v_and_b32_e32 v2, 0xffff0000, v2
+; GFX942-NEXT:    s_mov_b32 s5, 0x7060302
+; GFX942-NEXT:  .LBB60_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v7, v3
+; GFX942-NEXT:    v_lshlrev_b32_e32 v3, 16, v7
+; GFX942-NEXT:    v_and_b32_e32 v5, 0xffff0000, v7
+; GFX942-NEXT:    v_max_f32_e32 v3, v3, v4
+; GFX942-NEXT:    v_max_f32_e32 v5, v5, v2
+; GFX942-NEXT:    v_bfe_u32 v6, v3, 16, 1
+; GFX942-NEXT:    v_bfe_u32 v9, v5, 16, 1
+; GFX942-NEXT:    v_or_b32_e32 v8, 0x400000, v3
+; GFX942-NEXT:    v_or_b32_e32 v10, 0x400000, v5
+; GFX942-NEXT:    v_add3_u32 v6, v6, v3, s4
+; GFX942-NEXT:    v_add3_u32 v9, v9, v5, s4
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
+; GFX942-NEXT:    v_cmp_u_f32_e64 s[0:1], v3, v3
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v5, v9, v10, vcc
+; GFX942-NEXT:    v_cndmask_b32_e64 v3, v6, v8, s[0:1]
+; GFX942-NEXT:    v_perm_b32 v6, v5, v3, s5
+; GFX942-NEXT:    buffer_wbl2 sc0 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v3, v[0:1], v[6:7] offset:2044 sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc0 sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v7
+; GFX942-NEXT:    s_or_b64 s[2:3], vcc, s[2:3]
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[2:3]
+; GFX942-NEXT:    s_cbranch_execnz .LBB60_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[2:3]
+; GFX942-NEXT:    v_mov_b32_e32 v0, v3
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_system_atomic_fmax_ret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -16696,46 +16696,46 @@ define void @flat_system_atomic_fmax_noret_v2bf16__offset12b_pos__amdgpu_no_fine
 ; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s1
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_system_atomic_fmax_noret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    flat_load_dword v3, v[0:1] offset:2044
-; GFX940-NEXT:    s_mov_b64 s[2:3], 0
-; GFX940-NEXT:    v_lshlrev_b32_e32 v4, 16, v2
-; GFX940-NEXT:    s_movk_i32 s4, 0x7fff
-; GFX940-NEXT:    v_and_b32_e32 v5, 0xffff0000, v2
-; GFX940-NEXT:    s_mov_b32 s5, 0x7060302
-; GFX940-NEXT:  .LBB61_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_lshlrev_b32_e32 v2, 16, v3
-; GFX940-NEXT:    v_and_b32_e32 v6, 0xffff0000, v3
-; GFX940-NEXT:    v_max_f32_e32 v2, v2, v4
-; GFX940-NEXT:    v_max_f32_e32 v6, v6, v5
-; GFX940-NEXT:    v_bfe_u32 v7, v2, 16, 1
-; GFX940-NEXT:    v_bfe_u32 v9, v6, 16, 1
-; GFX940-NEXT:    v_or_b32_e32 v8, 0x400000, v2
-; GFX940-NEXT:    v_or_b32_e32 v10, 0x400000, v6
-; GFX940-NEXT:    v_add3_u32 v7, v7, v2, s4
-; GFX940-NEXT:    v_add3_u32 v9, v9, v6, s4
-; GFX940-NEXT:    v_cmp_u_f32_e32 vcc, v6, v6
-; GFX940-NEXT:    v_cmp_u_f32_e64 s[0:1], v2, v2
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cndmask_b32_e32 v6, v9, v10, vcc
-; GFX940-NEXT:    v_cndmask_b32_e64 v2, v7, v8, s[0:1]
-; GFX940-NEXT:    v_perm_b32 v2, v6, v2, s5
-; GFX940-NEXT:    buffer_wbl2 sc0 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2044 sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc0 sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
-; GFX940-NEXT:    s_or_b64 s[2:3], vcc, s[2:3]
-; GFX940-NEXT:    v_mov_b32_e32 v3, v2
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[2:3]
-; GFX940-NEXT:    s_cbranch_execnz .LBB61_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[2:3]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_system_atomic_fmax_noret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    flat_load_dword v3, v[0:1] offset:2044
+; GFX942-NEXT:    s_mov_b64 s[2:3], 0
+; GFX942-NEXT:    v_lshlrev_b32_e32 v4, 16, v2
+; GFX942-NEXT:    s_movk_i32 s4, 0x7fff
+; GFX942-NEXT:    v_and_b32_e32 v5, 0xffff0000, v2
+; GFX942-NEXT:    s_mov_b32 s5, 0x7060302
+; GFX942-NEXT:  .LBB61_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_lshlrev_b32_e32 v2, 16, v3
+; GFX942-NEXT:    v_and_b32_e32 v6, 0xffff0000, v3
+; GFX942-NEXT:    v_max_f32_e32 v2, v2, v4
+; GFX942-NEXT:    v_max_f32_e32 v6, v6, v5
+; GFX942-NEXT:    v_bfe_u32 v7, v2, 16, 1
+; GFX942-NEXT:    v_bfe_u32 v9, v6, 16, 1
+; GFX942-NEXT:    v_or_b32_e32 v8, 0x400000, v2
+; GFX942-NEXT:    v_or_b32_e32 v10, 0x400000, v6
+; GFX942-NEXT:    v_add3_u32 v7, v7, v2, s4
+; GFX942-NEXT:    v_add3_u32 v9, v9, v6, s4
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v6, v6
+; GFX942-NEXT:    v_cmp_u_f32_e64 s[0:1], v2, v2
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v6, v9, v10, vcc
+; GFX942-NEXT:    v_cndmask_b32_e64 v2, v7, v8, s[0:1]
+; GFX942-NEXT:    v_perm_b32 v2, v6, v2, s5
+; GFX942-NEXT:    buffer_wbl2 sc0 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2044 sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc0 sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX942-NEXT:    s_or_b64 s[2:3], vcc, s[2:3]
+; GFX942-NEXT:    v_mov_b32_e32 v3, v2
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[2:3]
+; GFX942-NEXT:    s_cbranch_execnz .LBB61_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[2:3]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_system_atomic_fmax_noret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
diff --git llvm/test/CodeGen/AMDGPU/flat-atomicrmw-fmin.ll llvm/test/CodeGen/AMDGPU/flat-atomicrmw-fmin.ll
index 065596776cf7..723505af6cc6 100644
--- llvm/test/CodeGen/AMDGPU/flat-atomicrmw-fmin.ll
+++ llvm/test/CodeGen/AMDGPU/flat-atomicrmw-fmin.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
 ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 < %s | FileCheck -check-prefix=GFX12 %s
-; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx940 < %s | FileCheck -check-prefix=GFX940 %s
+; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx942 < %s | FileCheck -check-prefix=GFX942 %s
 ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 < %s | FileCheck -check-prefix=GFX11 %s
 ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s
 ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx90a < %s | FileCheck -check-prefix=GFX90A %s
@@ -26,30 +26,30 @@ define float @flat_agent_atomic_fmin_ret_f32__amdgpu_no_fine_grained_memory(ptr
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fmin_ret_f32__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    flat_load_dword v3, v[0:1]
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    v_max_f32_e32 v2, v2, v2
-; GFX940-NEXT:  .LBB0_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v5, v3
-; GFX940-NEXT:    v_max_f32_e32 v3, v5, v5
-; GFX940-NEXT:    v_min_f32_e32 v4, v3, v2
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v3, v[0:1], v[4:5] sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB0_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v0, v3
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fmin_ret_f32__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    flat_load_dword v3, v[0:1]
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    v_max_f32_e32 v2, v2, v2
+; GFX942-NEXT:  .LBB0_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v5, v3
+; GFX942-NEXT:    v_max_f32_e32 v3, v5, v5
+; GFX942-NEXT:    v_min_f32_e32 v4, v3, v2
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v3, v[0:1], v[4:5] sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB0_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v0, v3
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fmin_ret_f32__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -168,30 +168,30 @@ define float @flat_agent_atomic_fmin_ret_f32__offset12b_pos__amdgpu_no_fine_grai
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fmin_ret_f32__offset12b_pos__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    flat_load_dword v3, v[0:1] offset:2044
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    v_max_f32_e32 v2, v2, v2
-; GFX940-NEXT:  .LBB1_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v5, v3
-; GFX940-NEXT:    v_max_f32_e32 v3, v5, v5
-; GFX940-NEXT:    v_min_f32_e32 v4, v3, v2
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2044 sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB1_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v0, v3
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fmin_ret_f32__offset12b_pos__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    flat_load_dword v3, v[0:1] offset:2044
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    v_max_f32_e32 v2, v2, v2
+; GFX942-NEXT:  .LBB1_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v5, v3
+; GFX942-NEXT:    v_max_f32_e32 v3, v5, v5
+; GFX942-NEXT:    v_min_f32_e32 v4, v3, v2
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2044 sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB1_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v0, v3
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fmin_ret_f32__offset12b_pos__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -316,37 +316,37 @@ define float @flat_agent_atomic_fmin_ret_f32__offset12b_neg__amdgpu_no_fine_grai
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fmin_ret_f32__offset12b_neg__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v4, v0
-; GFX940-NEXT:    v_mov_b32_e32 v5, v1
-; GFX940-NEXT:    v_add_co_u32_e32 v0, vcc, 0xfffff800, v4
-; GFX940-NEXT:    s_movk_i32 s0, 0xf800
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v5, vcc
-; GFX940-NEXT:    flat_load_dword v0, v[0:1]
-; GFX940-NEXT:    s_mov_b32 s1, -1
-; GFX940-NEXT:    v_lshl_add_u64 v[4:5], v[4:5], 0, s[0:1]
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    v_max_f32_e32 v1, v2, v2
-; GFX940-NEXT:  .LBB2_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v3, v0
-; GFX940-NEXT:    v_max_f32_e32 v0, v3, v3
-; GFX940-NEXT:    v_min_f32_e32 v2, v0, v1
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v0, v[4:5], v[2:3] sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v0, v3
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB2_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fmin_ret_f32__offset12b_neg__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v4, v0
+; GFX942-NEXT:    v_mov_b32_e32 v5, v1
+; GFX942-NEXT:    v_add_co_u32_e32 v0, vcc, 0xfffff800, v4
+; GFX942-NEXT:    s_movk_i32 s0, 0xf800
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v5, vcc
+; GFX942-NEXT:    flat_load_dword v0, v[0:1]
+; GFX942-NEXT:    s_mov_b32 s1, -1
+; GFX942-NEXT:    v_lshl_add_u64 v[4:5], v[4:5], 0, s[0:1]
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    v_max_f32_e32 v1, v2, v2
+; GFX942-NEXT:  .LBB2_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v3, v0
+; GFX942-NEXT:    v_max_f32_e32 v0, v3, v3
+; GFX942-NEXT:    v_min_f32_e32 v2, v0, v1
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v0, v[4:5], v[2:3] sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v0, v3
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB2_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fmin_ret_f32__offset12b_neg__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -479,29 +479,29 @@ define void @flat_agent_atomic_fmin_noret_f32__amdgpu_no_fine_grained_memory(ptr
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fmin_noret_f32__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    flat_load_dword v3, v[0:1]
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    v_max_f32_e32 v4, v2, v2
-; GFX940-NEXT:  .LBB3_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_max_f32_e32 v2, v3, v3
-; GFX940-NEXT:    v_min_f32_e32 v2, v2, v4
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v3, v2
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB3_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fmin_noret_f32__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    flat_load_dword v3, v[0:1]
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    v_max_f32_e32 v4, v2, v2
+; GFX942-NEXT:  .LBB3_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_max_f32_e32 v2, v3, v3
+; GFX942-NEXT:    v_min_f32_e32 v2, v2, v4
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v3, v2
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB3_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fmin_noret_f32__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -619,29 +619,29 @@ define void @flat_agent_atomic_fmin_noret_f32__offset12b_pos__amdgpu_no_fine_gra
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fmin_noret_f32__offset12b_pos__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    flat_load_dword v3, v[0:1] offset:2044
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    v_max_f32_e32 v4, v2, v2
-; GFX940-NEXT:  .LBB4_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_max_f32_e32 v2, v3, v3
-; GFX940-NEXT:    v_min_f32_e32 v2, v2, v4
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2044 sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v3, v2
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB4_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fmin_noret_f32__offset12b_pos__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    flat_load_dword v3, v[0:1] offset:2044
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    v_max_f32_e32 v4, v2, v2
+; GFX942-NEXT:  .LBB4_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_max_f32_e32 v2, v3, v3
+; GFX942-NEXT:    v_min_f32_e32 v2, v2, v4
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2044 sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v3, v2
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB4_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fmin_noret_f32__offset12b_pos__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -766,35 +766,35 @@ define void @flat_agent_atomic_fmin_noret_f32__offset12b_neg__amdgpu_no_fine_gra
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fmin_noret_f32__offset12b_neg__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_add_co_u32_e32 v4, vcc, 0xfffff800, v0
-; GFX940-NEXT:    s_movk_i32 s0, 0xf800
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_addc_co_u32_e32 v5, vcc, -1, v1, vcc
-; GFX940-NEXT:    flat_load_dword v3, v[4:5]
-; GFX940-NEXT:    s_mov_b32 s1, -1
-; GFX940-NEXT:    v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1]
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    v_max_f32_e32 v4, v2, v2
-; GFX940-NEXT:  .LBB5_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_max_f32_e32 v2, v3, v3
-; GFX940-NEXT:    v_min_f32_e32 v2, v2, v4
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v3, v2
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB5_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fmin_noret_f32__offset12b_neg__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_add_co_u32_e32 v4, vcc, 0xfffff800, v0
+; GFX942-NEXT:    s_movk_i32 s0, 0xf800
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_addc_co_u32_e32 v5, vcc, -1, v1, vcc
+; GFX942-NEXT:    flat_load_dword v3, v[4:5]
+; GFX942-NEXT:    s_mov_b32 s1, -1
+; GFX942-NEXT:    v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1]
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    v_max_f32_e32 v4, v2, v2
+; GFX942-NEXT:  .LBB5_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_max_f32_e32 v2, v3, v3
+; GFX942-NEXT:    v_min_f32_e32 v2, v2, v4
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v3, v2
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB5_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fmin_noret_f32__offset12b_neg__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -930,30 +930,30 @@ define float @flat_system_atomic_fmin_ret_f32__offset12b_pos__amdgpu_no_fine_gra
 ; GFX12-NEXT:    global_inv scope:SCOPE_SYS
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_system_atomic_fmin_ret_f32__offset12b_pos__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    flat_load_dword v3, v[0:1] offset:2044
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    v_max_f32_e32 v2, v2, v2
-; GFX940-NEXT:  .LBB6_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v5, v3
-; GFX940-NEXT:    v_max_f32_e32 v3, v5, v5
-; GFX940-NEXT:    v_min_f32_e32 v4, v3, v2
-; GFX940-NEXT:    buffer_wbl2 sc0 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2044 sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc0 sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB6_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v0, v3
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_system_atomic_fmin_ret_f32__offset12b_pos__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    flat_load_dword v3, v[0:1] offset:2044
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    v_max_f32_e32 v2, v2, v2
+; GFX942-NEXT:  .LBB6_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v5, v3
+; GFX942-NEXT:    v_max_f32_e32 v3, v5, v5
+; GFX942-NEXT:    v_min_f32_e32 v4, v3, v2
+; GFX942-NEXT:    buffer_wbl2 sc0 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2044 sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc0 sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB6_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v0, v3
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_system_atomic_fmin_ret_f32__offset12b_pos__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -1081,29 +1081,29 @@ define void @flat_system_atomic_fmin_noret_f32__offset12b_pos__amdgpu_no_fine_gr
 ; GFX12-NEXT:    global_inv scope:SCOPE_SYS
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_system_atomic_fmin_noret_f32__offset12b_pos__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    flat_load_dword v3, v[0:1] offset:2044
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    v_max_f32_e32 v4, v2, v2
-; GFX940-NEXT:  .LBB7_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_max_f32_e32 v2, v3, v3
-; GFX940-NEXT:    v_min_f32_e32 v2, v2, v4
-; GFX940-NEXT:    buffer_wbl2 sc0 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2044 sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc0 sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v3, v2
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB7_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_system_atomic_fmin_noret_f32__offset12b_pos__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    flat_load_dword v3, v[0:1] offset:2044
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    v_max_f32_e32 v4, v2, v2
+; GFX942-NEXT:  .LBB7_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_max_f32_e32 v2, v3, v3
+; GFX942-NEXT:    v_min_f32_e32 v2, v2, v4
+; GFX942-NEXT:    buffer_wbl2 sc0 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2044 sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc0 sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v3, v2
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB7_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_system_atomic_fmin_noret_f32__offset12b_pos__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -1230,30 +1230,30 @@ define float @flat_agent_atomic_fmin_ret_f32__amdgpu_no_remote_memory(ptr %ptr,
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fmin_ret_f32__amdgpu_no_remote_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    flat_load_dword v3, v[0:1]
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    v_max_f32_e32 v2, v2, v2
-; GFX940-NEXT:  .LBB8_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v5, v3
-; GFX940-NEXT:    v_max_f32_e32 v3, v5, v5
-; GFX940-NEXT:    v_min_f32_e32 v4, v3, v2
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v3, v[0:1], v[4:5] sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB8_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v0, v3
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fmin_ret_f32__amdgpu_no_remote_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    flat_load_dword v3, v[0:1]
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    v_max_f32_e32 v2, v2, v2
+; GFX942-NEXT:  .LBB8_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v5, v3
+; GFX942-NEXT:    v_max_f32_e32 v3, v5, v5
+; GFX942-NEXT:    v_min_f32_e32 v4, v3, v2
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v3, v[0:1], v[4:5] sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB8_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v0, v3
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fmin_ret_f32__amdgpu_no_remote_memory:
 ; GFX11:       ; %bb.0:
@@ -1422,30 +1422,30 @@ define float @flat_agent_atomic_fmin_ret_f32__amdgpu_no_fine_grained_memory__amd
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fmin_ret_f32__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    flat_load_dword v3, v[0:1]
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    v_max_f32_e32 v2, v2, v2
-; GFX940-NEXT:  .LBB9_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v5, v3
-; GFX940-NEXT:    v_max_f32_e32 v3, v5, v5
-; GFX940-NEXT:    v_min_f32_e32 v4, v3, v2
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v3, v[0:1], v[4:5] sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB9_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v0, v3
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fmin_ret_f32__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    flat_load_dword v3, v[0:1]
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    v_max_f32_e32 v2, v2, v2
+; GFX942-NEXT:  .LBB9_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v5, v3
+; GFX942-NEXT:    v_max_f32_e32 v3, v5, v5
+; GFX942-NEXT:    v_min_f32_e32 v4, v3, v2
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v3, v[0:1], v[4:5] sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB9_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v0, v3
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fmin_ret_f32__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory:
 ; GFX11:       ; %bb.0:
@@ -1568,30 +1568,30 @@ define float @flat_agent_atomic_fmin_ret_f32__ftz__amdgpu_no_fine_grained_memory
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fmin_ret_f32__ftz__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    flat_load_dword v3, v[0:1]
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    v_max_f32_e32 v2, v2, v2
-; GFX940-NEXT:  .LBB10_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v5, v3
-; GFX940-NEXT:    v_max_f32_e32 v3, v5, v5
-; GFX940-NEXT:    v_min_f32_e32 v4, v3, v2
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v3, v[0:1], v[4:5] sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB10_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v0, v3
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fmin_ret_f32__ftz__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    flat_load_dword v3, v[0:1]
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    v_max_f32_e32 v2, v2, v2
+; GFX942-NEXT:  .LBB10_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v5, v3
+; GFX942-NEXT:    v_max_f32_e32 v3, v5, v5
+; GFX942-NEXT:    v_min_f32_e32 v4, v3, v2
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v3, v[0:1], v[4:5] sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB10_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v0, v3
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fmin_ret_f32__ftz__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -1710,30 +1710,30 @@ define float @flat_agent_atomic_fmin_ret_f32__offset12b_pos__ftz__amdgpu_no_fine
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fmin_ret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    flat_load_dword v3, v[0:1] offset:2044
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    v_max_f32_e32 v2, v2, v2
-; GFX940-NEXT:  .LBB11_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v5, v3
-; GFX940-NEXT:    v_max_f32_e32 v3, v5, v5
-; GFX940-NEXT:    v_min_f32_e32 v4, v3, v2
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2044 sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB11_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v0, v3
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fmin_ret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    flat_load_dword v3, v[0:1] offset:2044
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    v_max_f32_e32 v2, v2, v2
+; GFX942-NEXT:  .LBB11_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v5, v3
+; GFX942-NEXT:    v_max_f32_e32 v3, v5, v5
+; GFX942-NEXT:    v_min_f32_e32 v4, v3, v2
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2044 sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB11_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v0, v3
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fmin_ret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -1858,37 +1858,37 @@ define float @flat_agent_atomic_fmin_ret_f32__offset12b_neg__ftz__amdgpu_no_fine
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fmin_ret_f32__offset12b_neg__ftz__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v4, v0
-; GFX940-NEXT:    v_mov_b32_e32 v5, v1
-; GFX940-NEXT:    v_add_co_u32_e32 v0, vcc, 0xfffff800, v4
-; GFX940-NEXT:    s_movk_i32 s0, 0xf800
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v5, vcc
-; GFX940-NEXT:    flat_load_dword v0, v[0:1]
-; GFX940-NEXT:    s_mov_b32 s1, -1
-; GFX940-NEXT:    v_lshl_add_u64 v[4:5], v[4:5], 0, s[0:1]
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    v_max_f32_e32 v1, v2, v2
-; GFX940-NEXT:  .LBB12_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v3, v0
-; GFX940-NEXT:    v_max_f32_e32 v0, v3, v3
-; GFX940-NEXT:    v_min_f32_e32 v2, v0, v1
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v0, v[4:5], v[2:3] sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v0, v3
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB12_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fmin_ret_f32__offset12b_neg__ftz__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v4, v0
+; GFX942-NEXT:    v_mov_b32_e32 v5, v1
+; GFX942-NEXT:    v_add_co_u32_e32 v0, vcc, 0xfffff800, v4
+; GFX942-NEXT:    s_movk_i32 s0, 0xf800
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v5, vcc
+; GFX942-NEXT:    flat_load_dword v0, v[0:1]
+; GFX942-NEXT:    s_mov_b32 s1, -1
+; GFX942-NEXT:    v_lshl_add_u64 v[4:5], v[4:5], 0, s[0:1]
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    v_max_f32_e32 v1, v2, v2
+; GFX942-NEXT:  .LBB12_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v3, v0
+; GFX942-NEXT:    v_max_f32_e32 v0, v3, v3
+; GFX942-NEXT:    v_min_f32_e32 v2, v0, v1
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v0, v[4:5], v[2:3] sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v0, v3
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB12_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fmin_ret_f32__offset12b_neg__ftz__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -2021,29 +2021,29 @@ define void @flat_agent_atomic_fmin_noret_f32__ftz__amdgpu_no_fine_grained_memor
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fmin_noret_f32__ftz__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    flat_load_dword v3, v[0:1]
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    v_max_f32_e32 v4, v2, v2
-; GFX940-NEXT:  .LBB13_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_max_f32_e32 v2, v3, v3
-; GFX940-NEXT:    v_min_f32_e32 v2, v2, v4
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v3, v2
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB13_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fmin_noret_f32__ftz__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    flat_load_dword v3, v[0:1]
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    v_max_f32_e32 v4, v2, v2
+; GFX942-NEXT:  .LBB13_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_max_f32_e32 v2, v3, v3
+; GFX942-NEXT:    v_min_f32_e32 v2, v2, v4
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v3, v2
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB13_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fmin_noret_f32__ftz__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -2161,29 +2161,29 @@ define void @flat_agent_atomic_fmin_noret_f32__offset12b_pos__ftz__amdgpu_no_fin
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fmin_noret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    flat_load_dword v3, v[0:1] offset:2044
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    v_max_f32_e32 v4, v2, v2
-; GFX940-NEXT:  .LBB14_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_max_f32_e32 v2, v3, v3
-; GFX940-NEXT:    v_min_f32_e32 v2, v2, v4
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2044 sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v3, v2
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB14_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fmin_noret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    flat_load_dword v3, v[0:1] offset:2044
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    v_max_f32_e32 v4, v2, v2
+; GFX942-NEXT:  .LBB14_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_max_f32_e32 v2, v3, v3
+; GFX942-NEXT:    v_min_f32_e32 v2, v2, v4
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2044 sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v3, v2
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB14_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fmin_noret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -2308,35 +2308,35 @@ define void @flat_agent_atomic_fmin_noret_f32__offset12b_neg__ftz__amdgpu_no_fin
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fmin_noret_f32__offset12b_neg__ftz__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_add_co_u32_e32 v4, vcc, 0xfffff800, v0
-; GFX940-NEXT:    s_movk_i32 s0, 0xf800
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_addc_co_u32_e32 v5, vcc, -1, v1, vcc
-; GFX940-NEXT:    flat_load_dword v3, v[4:5]
-; GFX940-NEXT:    s_mov_b32 s1, -1
-; GFX940-NEXT:    v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1]
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    v_max_f32_e32 v4, v2, v2
-; GFX940-NEXT:  .LBB15_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_max_f32_e32 v2, v3, v3
-; GFX940-NEXT:    v_min_f32_e32 v2, v2, v4
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v3, v2
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB15_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fmin_noret_f32__offset12b_neg__ftz__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_add_co_u32_e32 v4, vcc, 0xfffff800, v0
+; GFX942-NEXT:    s_movk_i32 s0, 0xf800
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_addc_co_u32_e32 v5, vcc, -1, v1, vcc
+; GFX942-NEXT:    flat_load_dword v3, v[4:5]
+; GFX942-NEXT:    s_mov_b32 s1, -1
+; GFX942-NEXT:    v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1]
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    v_max_f32_e32 v4, v2, v2
+; GFX942-NEXT:  .LBB15_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_max_f32_e32 v2, v3, v3
+; GFX942-NEXT:    v_min_f32_e32 v2, v2, v4
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v3, v2
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB15_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fmin_noret_f32__offset12b_neg__ftz__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -2472,30 +2472,30 @@ define float @flat_system_atomic_fmin_ret_f32__offset12b_pos__ftz__amdgpu_no_fin
 ; GFX12-NEXT:    global_inv scope:SCOPE_SYS
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_system_atomic_fmin_ret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    flat_load_dword v3, v[0:1] offset:2044
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    v_max_f32_e32 v2, v2, v2
-; GFX940-NEXT:  .LBB16_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v5, v3
-; GFX940-NEXT:    v_max_f32_e32 v3, v5, v5
-; GFX940-NEXT:    v_min_f32_e32 v4, v3, v2
-; GFX940-NEXT:    buffer_wbl2 sc0 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2044 sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc0 sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB16_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v0, v3
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_system_atomic_fmin_ret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    flat_load_dword v3, v[0:1] offset:2044
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    v_max_f32_e32 v2, v2, v2
+; GFX942-NEXT:  .LBB16_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v5, v3
+; GFX942-NEXT:    v_max_f32_e32 v3, v5, v5
+; GFX942-NEXT:    v_min_f32_e32 v4, v3, v2
+; GFX942-NEXT:    buffer_wbl2 sc0 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2044 sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc0 sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB16_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v0, v3
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_system_atomic_fmin_ret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -2623,29 +2623,29 @@ define void @flat_system_atomic_fmin_noret_f32__offset12b_pos__ftz__amdgpu_no_fi
 ; GFX12-NEXT:    global_inv scope:SCOPE_SYS
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_system_atomic_fmin_noret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    flat_load_dword v3, v[0:1] offset:2044
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    v_max_f32_e32 v4, v2, v2
-; GFX940-NEXT:  .LBB17_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_max_f32_e32 v2, v3, v3
-; GFX940-NEXT:    v_min_f32_e32 v2, v2, v4
-; GFX940-NEXT:    buffer_wbl2 sc0 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2044 sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc0 sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v3, v2
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB17_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_system_atomic_fmin_noret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    flat_load_dword v3, v[0:1] offset:2044
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    v_max_f32_e32 v4, v2, v2
+; GFX942-NEXT:  .LBB17_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_max_f32_e32 v2, v3, v3
+; GFX942-NEXT:    v_min_f32_e32 v2, v2, v4
+; GFX942-NEXT:    buffer_wbl2 sc0 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2044 sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc0 sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v3, v2
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB17_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_system_atomic_fmin_noret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -2822,45 +2822,45 @@ define double @flat_agent_atomic_fmin_ret_f64__amdgpu_no_fine_grained_memory(ptr
 ; GFX12-NEXT:    v_dual_mov_b32 v0, v2 :: v_dual_mov_b32 v1, v3
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fmin_ret_f64__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v5, v1
-; GFX940-NEXT:    s_mov_b64 s[0:1], src_private_base
-; GFX940-NEXT:    v_mov_b32_e32 v4, v0
-; GFX940-NEXT:    v_cmp_ne_u32_e32 vcc, s1, v5
-; GFX940-NEXT:    ; implicit-def: $vgpr0_vgpr1
-; GFX940-NEXT:    s_and_saveexec_b64 s[0:1], vcc
-; GFX940-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB18_3
-; GFX940-NEXT:  ; %bb.1: ; %Flow
-; GFX940-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB18_4
-; GFX940-NEXT:  .LBB18_2: ; %atomicrmw.phi
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
-; GFX940-NEXT:  .LBB18_3: ; %atomicrmw.global
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_min_f64 v[0:1], v[4:5], v[2:3] sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    ; implicit-def: $vgpr4_vgpr5
-; GFX940-NEXT:    ; implicit-def: $vgpr2_vgpr3
-; GFX940-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
-; GFX940-NEXT:    s_cbranch_execz .LBB18_2
-; GFX940-NEXT:  .LBB18_4: ; %atomicrmw.private
-; GFX940-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[4:5]
-; GFX940-NEXT:    v_max_f64 v[2:3], v[2:3], v[2:3]
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cndmask_b32_e32 v6, -1, v4, vcc
-; GFX940-NEXT:    scratch_load_dwordx2 v[0:1], v6, off
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_max_f64 v[4:5], v[0:1], v[0:1]
-; GFX940-NEXT:    v_min_f64 v[2:3], v[4:5], v[2:3]
-; GFX940-NEXT:    scratch_store_dwordx2 v6, v[2:3], off sc0 sc1
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fmin_ret_f64__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v5, v1
+; GFX942-NEXT:    s_mov_b64 s[0:1], src_private_base
+; GFX942-NEXT:    v_mov_b32_e32 v4, v0
+; GFX942-NEXT:    v_cmp_ne_u32_e32 vcc, s1, v5
+; GFX942-NEXT:    ; implicit-def: $vgpr0_vgpr1
+; GFX942-NEXT:    s_and_saveexec_b64 s[0:1], vcc
+; GFX942-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB18_3
+; GFX942-NEXT:  ; %bb.1: ; %Flow
+; GFX942-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB18_4
+; GFX942-NEXT:  .LBB18_2: ; %atomicrmw.phi
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-NEXT:  .LBB18_3: ; %atomicrmw.global
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_min_f64 v[0:1], v[4:5], v[2:3] sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    ; implicit-def: $vgpr4_vgpr5
+; GFX942-NEXT:    ; implicit-def: $vgpr2_vgpr3
+; GFX942-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
+; GFX942-NEXT:    s_cbranch_execz .LBB18_2
+; GFX942-NEXT:  .LBB18_4: ; %atomicrmw.private
+; GFX942-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[4:5]
+; GFX942-NEXT:    v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v6, -1, v4, vcc
+; GFX942-NEXT:    scratch_load_dwordx2 v[0:1], v6, off
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_max_f64 v[4:5], v[0:1], v[0:1]
+; GFX942-NEXT:    v_min_f64 v[2:3], v[4:5], v[2:3]
+; GFX942-NEXT:    scratch_store_dwordx2 v6, v[2:3], off sc0 sc1
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fmin_ret_f64__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -3220,45 +3220,45 @@ define double @flat_agent_atomic_fmin_ret_f64__offset12b_pos__amdgpu_no_fine_gra
 ; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s0
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fmin_ret_f64__offset12b_pos__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0x7f8
-; GFX940-NEXT:    v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1]
-; GFX940-NEXT:    s_mov_b64 s[0:1], src_private_base
-; GFX940-NEXT:    v_cmp_ne_u32_e32 vcc, s1, v5
-; GFX940-NEXT:    ; implicit-def: $vgpr0_vgpr1
-; GFX940-NEXT:    s_and_saveexec_b64 s[0:1], vcc
-; GFX940-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB19_3
-; GFX940-NEXT:  ; %bb.1: ; %Flow
-; GFX940-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB19_4
-; GFX940-NEXT:  .LBB19_2: ; %atomicrmw.phi
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
-; GFX940-NEXT:  .LBB19_3: ; %atomicrmw.global
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_min_f64 v[0:1], v[4:5], v[2:3] sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    ; implicit-def: $vgpr4_vgpr5
-; GFX940-NEXT:    ; implicit-def: $vgpr2_vgpr3
-; GFX940-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
-; GFX940-NEXT:    s_cbranch_execz .LBB19_2
-; GFX940-NEXT:  .LBB19_4: ; %atomicrmw.private
-; GFX940-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[4:5]
-; GFX940-NEXT:    v_max_f64 v[2:3], v[2:3], v[2:3]
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cndmask_b32_e32 v6, -1, v4, vcc
-; GFX940-NEXT:    scratch_load_dwordx2 v[0:1], v6, off
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_max_f64 v[4:5], v[0:1], v[0:1]
-; GFX940-NEXT:    v_min_f64 v[2:3], v[4:5], v[2:3]
-; GFX940-NEXT:    scratch_store_dwordx2 v6, v[2:3], off sc0 sc1
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fmin_ret_f64__offset12b_pos__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0x7f8
+; GFX942-NEXT:    v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1]
+; GFX942-NEXT:    s_mov_b64 s[0:1], src_private_base
+; GFX942-NEXT:    v_cmp_ne_u32_e32 vcc, s1, v5
+; GFX942-NEXT:    ; implicit-def: $vgpr0_vgpr1
+; GFX942-NEXT:    s_and_saveexec_b64 s[0:1], vcc
+; GFX942-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB19_3
+; GFX942-NEXT:  ; %bb.1: ; %Flow
+; GFX942-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB19_4
+; GFX942-NEXT:  .LBB19_2: ; %atomicrmw.phi
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-NEXT:  .LBB19_3: ; %atomicrmw.global
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_min_f64 v[0:1], v[4:5], v[2:3] sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    ; implicit-def: $vgpr4_vgpr5
+; GFX942-NEXT:    ; implicit-def: $vgpr2_vgpr3
+; GFX942-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
+; GFX942-NEXT:    s_cbranch_execz .LBB19_2
+; GFX942-NEXT:  .LBB19_4: ; %atomicrmw.private
+; GFX942-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[4:5]
+; GFX942-NEXT:    v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v6, -1, v4, vcc
+; GFX942-NEXT:    scratch_load_dwordx2 v[0:1], v6, off
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_max_f64 v[4:5], v[0:1], v[0:1]
+; GFX942-NEXT:    v_min_f64 v[2:3], v[4:5], v[2:3]
+; GFX942-NEXT:    scratch_store_dwordx2 v6, v[2:3], off sc0 sc1
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fmin_ret_f64__offset12b_pos__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -3633,46 +3633,46 @@ define double @flat_agent_atomic_fmin_ret_f64__offset12b_neg__amdgpu_no_fine_gra
 ; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s0
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fmin_ret_f64__offset12b_neg__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    s_movk_i32 s0, 0xf800
-; GFX940-NEXT:    s_mov_b32 s1, -1
-; GFX940-NEXT:    v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1]
-; GFX940-NEXT:    s_mov_b64 s[0:1], src_private_base
-; GFX940-NEXT:    v_cmp_ne_u32_e32 vcc, s1, v5
-; GFX940-NEXT:    ; implicit-def: $vgpr0_vgpr1
-; GFX940-NEXT:    s_and_saveexec_b64 s[0:1], vcc
-; GFX940-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB20_3
-; GFX940-NEXT:  ; %bb.1: ; %Flow
-; GFX940-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB20_4
-; GFX940-NEXT:  .LBB20_2: ; %atomicrmw.phi
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
-; GFX940-NEXT:  .LBB20_3: ; %atomicrmw.global
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_min_f64 v[0:1], v[4:5], v[2:3] sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    ; implicit-def: $vgpr4_vgpr5
-; GFX940-NEXT:    ; implicit-def: $vgpr2_vgpr3
-; GFX940-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
-; GFX940-NEXT:    s_cbranch_execz .LBB20_2
-; GFX940-NEXT:  .LBB20_4: ; %atomicrmw.private
-; GFX940-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[4:5]
-; GFX940-NEXT:    v_max_f64 v[2:3], v[2:3], v[2:3]
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cndmask_b32_e32 v6, -1, v4, vcc
-; GFX940-NEXT:    scratch_load_dwordx2 v[0:1], v6, off
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_max_f64 v[4:5], v[0:1], v[0:1]
-; GFX940-NEXT:    v_min_f64 v[2:3], v[4:5], v[2:3]
-; GFX940-NEXT:    scratch_store_dwordx2 v6, v[2:3], off sc0 sc1
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fmin_ret_f64__offset12b_neg__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    s_movk_i32 s0, 0xf800
+; GFX942-NEXT:    s_mov_b32 s1, -1
+; GFX942-NEXT:    v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1]
+; GFX942-NEXT:    s_mov_b64 s[0:1], src_private_base
+; GFX942-NEXT:    v_cmp_ne_u32_e32 vcc, s1, v5
+; GFX942-NEXT:    ; implicit-def: $vgpr0_vgpr1
+; GFX942-NEXT:    s_and_saveexec_b64 s[0:1], vcc
+; GFX942-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB20_3
+; GFX942-NEXT:  ; %bb.1: ; %Flow
+; GFX942-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB20_4
+; GFX942-NEXT:  .LBB20_2: ; %atomicrmw.phi
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-NEXT:  .LBB20_3: ; %atomicrmw.global
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_min_f64 v[0:1], v[4:5], v[2:3] sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    ; implicit-def: $vgpr4_vgpr5
+; GFX942-NEXT:    ; implicit-def: $vgpr2_vgpr3
+; GFX942-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
+; GFX942-NEXT:    s_cbranch_execz .LBB20_2
+; GFX942-NEXT:  .LBB20_4: ; %atomicrmw.private
+; GFX942-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[4:5]
+; GFX942-NEXT:    v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v6, -1, v4, vcc
+; GFX942-NEXT:    scratch_load_dwordx2 v[0:1], v6, off
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_max_f64 v[4:5], v[0:1], v[0:1]
+; GFX942-NEXT:    v_min_f64 v[2:3], v[4:5], v[2:3]
+; GFX942-NEXT:    scratch_store_dwordx2 v6, v[2:3], off sc0 sc1
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fmin_ret_f64__offset12b_neg__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -4041,42 +4041,42 @@ define void @flat_agent_atomic_fmin_noret_f64__amdgpu_no_fine_grained_memory(ptr
 ; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s0
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fmin_noret_f64__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    s_mov_b64 s[0:1], src_private_base
-; GFX940-NEXT:    v_cmp_ne_u32_e32 vcc, s1, v1
-; GFX940-NEXT:    s_and_saveexec_b64 s[0:1], vcc
-; GFX940-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB21_3
-; GFX940-NEXT:  ; %bb.1: ; %Flow
-; GFX940-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB21_4
-; GFX940-NEXT:  .LBB21_2: ; %atomicrmw.phi
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
-; GFX940-NEXT:  .LBB21_3: ; %atomicrmw.global
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_min_f64 v[0:1], v[2:3]
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    ; implicit-def: $vgpr0_vgpr1
-; GFX940-NEXT:    ; implicit-def: $vgpr2_vgpr3
-; GFX940-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
-; GFX940-NEXT:    s_cbranch_execz .LBB21_2
-; GFX940-NEXT:  .LBB21_4: ; %atomicrmw.private
-; GFX940-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[0:1]
-; GFX940-NEXT:    v_max_f64 v[2:3], v[2:3], v[2:3]
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cndmask_b32_e32 v4, -1, v0, vcc
-; GFX940-NEXT:    scratch_load_dwordx2 v[0:1], v4, off
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_max_f64 v[0:1], v[0:1], v[0:1]
-; GFX940-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
-; GFX940-NEXT:    scratch_store_dwordx2 v4, v[0:1], off sc0 sc1
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fmin_noret_f64__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    s_mov_b64 s[0:1], src_private_base
+; GFX942-NEXT:    v_cmp_ne_u32_e32 vcc, s1, v1
+; GFX942-NEXT:    s_and_saveexec_b64 s[0:1], vcc
+; GFX942-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB21_3
+; GFX942-NEXT:  ; %bb.1: ; %Flow
+; GFX942-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB21_4
+; GFX942-NEXT:  .LBB21_2: ; %atomicrmw.phi
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-NEXT:  .LBB21_3: ; %atomicrmw.global
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_min_f64 v[0:1], v[2:3]
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    ; implicit-def: $vgpr0_vgpr1
+; GFX942-NEXT:    ; implicit-def: $vgpr2_vgpr3
+; GFX942-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
+; GFX942-NEXT:    s_cbranch_execz .LBB21_2
+; GFX942-NEXT:  .LBB21_4: ; %atomicrmw.private
+; GFX942-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[0:1]
+; GFX942-NEXT:    v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v4, -1, v0, vcc
+; GFX942-NEXT:    scratch_load_dwordx2 v[0:1], v4, off
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX942-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX942-NEXT:    scratch_store_dwordx2 v4, v[0:1], off sc0 sc1
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fmin_noret_f64__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -4431,44 +4431,44 @@ define void @flat_agent_atomic_fmin_noret_f64__offset12b_pos__amdgpu_no_fine_gra
 ; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s0
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fmin_noret_f64__offset12b_pos__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0x7f8
-; GFX940-NEXT:    v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1]
-; GFX940-NEXT:    s_mov_b64 s[0:1], src_private_base
-; GFX940-NEXT:    v_cmp_ne_u32_e32 vcc, s1, v1
-; GFX940-NEXT:    s_and_saveexec_b64 s[0:1], vcc
-; GFX940-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB22_3
-; GFX940-NEXT:  ; %bb.1: ; %Flow
-; GFX940-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB22_4
-; GFX940-NEXT:  .LBB22_2: ; %atomicrmw.phi
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
-; GFX940-NEXT:  .LBB22_3: ; %atomicrmw.global
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_min_f64 v[0:1], v[2:3]
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    ; implicit-def: $vgpr0_vgpr1
-; GFX940-NEXT:    ; implicit-def: $vgpr2_vgpr3
-; GFX940-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
-; GFX940-NEXT:    s_cbranch_execz .LBB22_2
-; GFX940-NEXT:  .LBB22_4: ; %atomicrmw.private
-; GFX940-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[0:1]
-; GFX940-NEXT:    v_max_f64 v[2:3], v[2:3], v[2:3]
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cndmask_b32_e32 v4, -1, v0, vcc
-; GFX940-NEXT:    scratch_load_dwordx2 v[0:1], v4, off
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_max_f64 v[0:1], v[0:1], v[0:1]
-; GFX940-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
-; GFX940-NEXT:    scratch_store_dwordx2 v4, v[0:1], off sc0 sc1
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fmin_noret_f64__offset12b_pos__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0x7f8
+; GFX942-NEXT:    v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1]
+; GFX942-NEXT:    s_mov_b64 s[0:1], src_private_base
+; GFX942-NEXT:    v_cmp_ne_u32_e32 vcc, s1, v1
+; GFX942-NEXT:    s_and_saveexec_b64 s[0:1], vcc
+; GFX942-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB22_3
+; GFX942-NEXT:  ; %bb.1: ; %Flow
+; GFX942-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB22_4
+; GFX942-NEXT:  .LBB22_2: ; %atomicrmw.phi
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-NEXT:  .LBB22_3: ; %atomicrmw.global
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_min_f64 v[0:1], v[2:3]
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    ; implicit-def: $vgpr0_vgpr1
+; GFX942-NEXT:    ; implicit-def: $vgpr2_vgpr3
+; GFX942-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
+; GFX942-NEXT:    s_cbranch_execz .LBB22_2
+; GFX942-NEXT:  .LBB22_4: ; %atomicrmw.private
+; GFX942-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[0:1]
+; GFX942-NEXT:    v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v4, -1, v0, vcc
+; GFX942-NEXT:    scratch_load_dwordx2 v[0:1], v4, off
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX942-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX942-NEXT:    scratch_store_dwordx2 v4, v[0:1], off sc0 sc1
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fmin_noret_f64__offset12b_pos__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -4837,45 +4837,45 @@ define void @flat_agent_atomic_fmin_noret_f64__offset12b_neg__amdgpu_no_fine_gra
 ; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s0
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fmin_noret_f64__offset12b_neg__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    s_movk_i32 s0, 0xf800
-; GFX940-NEXT:    s_mov_b32 s1, -1
-; GFX940-NEXT:    v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1]
-; GFX940-NEXT:    s_mov_b64 s[0:1], src_private_base
-; GFX940-NEXT:    v_cmp_ne_u32_e32 vcc, s1, v1
-; GFX940-NEXT:    s_and_saveexec_b64 s[0:1], vcc
-; GFX940-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB23_3
-; GFX940-NEXT:  ; %bb.1: ; %Flow
-; GFX940-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB23_4
-; GFX940-NEXT:  .LBB23_2: ; %atomicrmw.phi
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
-; GFX940-NEXT:  .LBB23_3: ; %atomicrmw.global
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_min_f64 v[0:1], v[2:3]
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    ; implicit-def: $vgpr0_vgpr1
-; GFX940-NEXT:    ; implicit-def: $vgpr2_vgpr3
-; GFX940-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
-; GFX940-NEXT:    s_cbranch_execz .LBB23_2
-; GFX940-NEXT:  .LBB23_4: ; %atomicrmw.private
-; GFX940-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[0:1]
-; GFX940-NEXT:    v_max_f64 v[2:3], v[2:3], v[2:3]
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cndmask_b32_e32 v4, -1, v0, vcc
-; GFX940-NEXT:    scratch_load_dwordx2 v[0:1], v4, off
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_max_f64 v[0:1], v[0:1], v[0:1]
-; GFX940-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
-; GFX940-NEXT:    scratch_store_dwordx2 v4, v[0:1], off sc0 sc1
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fmin_noret_f64__offset12b_neg__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    s_movk_i32 s0, 0xf800
+; GFX942-NEXT:    s_mov_b32 s1, -1
+; GFX942-NEXT:    v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1]
+; GFX942-NEXT:    s_mov_b64 s[0:1], src_private_base
+; GFX942-NEXT:    v_cmp_ne_u32_e32 vcc, s1, v1
+; GFX942-NEXT:    s_and_saveexec_b64 s[0:1], vcc
+; GFX942-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB23_3
+; GFX942-NEXT:  ; %bb.1: ; %Flow
+; GFX942-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB23_4
+; GFX942-NEXT:  .LBB23_2: ; %atomicrmw.phi
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-NEXT:  .LBB23_3: ; %atomicrmw.global
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_min_f64 v[0:1], v[2:3]
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    ; implicit-def: $vgpr0_vgpr1
+; GFX942-NEXT:    ; implicit-def: $vgpr2_vgpr3
+; GFX942-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
+; GFX942-NEXT:    s_cbranch_execz .LBB23_2
+; GFX942-NEXT:  .LBB23_4: ; %atomicrmw.private
+; GFX942-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[0:1]
+; GFX942-NEXT:    v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v4, -1, v0, vcc
+; GFX942-NEXT:    scratch_load_dwordx2 v[0:1], v4, off
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX942-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX942-NEXT:    scratch_store_dwordx2 v4, v[0:1], off sc0 sc1
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fmin_noret_f64__offset12b_neg__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -5236,45 +5236,45 @@ define double @flat_agent_atomic_fmin_ret_f64__amdgpu_no_remote_memory(ptr %ptr,
 ; GFX12-NEXT:    v_dual_mov_b32 v0, v2 :: v_dual_mov_b32 v1, v3
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fmin_ret_f64__amdgpu_no_remote_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v5, v1
-; GFX940-NEXT:    s_mov_b64 s[0:1], src_private_base
-; GFX940-NEXT:    v_mov_b32_e32 v4, v0
-; GFX940-NEXT:    v_cmp_ne_u32_e32 vcc, s1, v5
-; GFX940-NEXT:    ; implicit-def: $vgpr0_vgpr1
-; GFX940-NEXT:    s_and_saveexec_b64 s[0:1], vcc
-; GFX940-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB24_3
-; GFX940-NEXT:  ; %bb.1: ; %Flow
-; GFX940-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB24_4
-; GFX940-NEXT:  .LBB24_2: ; %atomicrmw.phi
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
-; GFX940-NEXT:  .LBB24_3: ; %atomicrmw.global
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_min_f64 v[0:1], v[4:5], v[2:3] sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    ; implicit-def: $vgpr4_vgpr5
-; GFX940-NEXT:    ; implicit-def: $vgpr2_vgpr3
-; GFX940-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
-; GFX940-NEXT:    s_cbranch_execz .LBB24_2
-; GFX940-NEXT:  .LBB24_4: ; %atomicrmw.private
-; GFX940-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[4:5]
-; GFX940-NEXT:    v_max_f64 v[2:3], v[2:3], v[2:3]
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cndmask_b32_e32 v6, -1, v4, vcc
-; GFX940-NEXT:    scratch_load_dwordx2 v[0:1], v6, off
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_max_f64 v[4:5], v[0:1], v[0:1]
-; GFX940-NEXT:    v_min_f64 v[2:3], v[4:5], v[2:3]
-; GFX940-NEXT:    scratch_store_dwordx2 v6, v[2:3], off sc0 sc1
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fmin_ret_f64__amdgpu_no_remote_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v5, v1
+; GFX942-NEXT:    s_mov_b64 s[0:1], src_private_base
+; GFX942-NEXT:    v_mov_b32_e32 v4, v0
+; GFX942-NEXT:    v_cmp_ne_u32_e32 vcc, s1, v5
+; GFX942-NEXT:    ; implicit-def: $vgpr0_vgpr1
+; GFX942-NEXT:    s_and_saveexec_b64 s[0:1], vcc
+; GFX942-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB24_3
+; GFX942-NEXT:  ; %bb.1: ; %Flow
+; GFX942-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB24_4
+; GFX942-NEXT:  .LBB24_2: ; %atomicrmw.phi
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-NEXT:  .LBB24_3: ; %atomicrmw.global
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_min_f64 v[0:1], v[4:5], v[2:3] sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    ; implicit-def: $vgpr4_vgpr5
+; GFX942-NEXT:    ; implicit-def: $vgpr2_vgpr3
+; GFX942-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
+; GFX942-NEXT:    s_cbranch_execz .LBB24_2
+; GFX942-NEXT:  .LBB24_4: ; %atomicrmw.private
+; GFX942-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[4:5]
+; GFX942-NEXT:    v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v6, -1, v4, vcc
+; GFX942-NEXT:    scratch_load_dwordx2 v[0:1], v6, off
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_max_f64 v[4:5], v[0:1], v[0:1]
+; GFX942-NEXT:    v_min_f64 v[2:3], v[4:5], v[2:3]
+; GFX942-NEXT:    scratch_store_dwordx2 v6, v[2:3], off sc0 sc1
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fmin_ret_f64__amdgpu_no_remote_memory:
 ; GFX11:       ; %bb.0:
@@ -5659,45 +5659,45 @@ define double @flat_agent_atomic_fmin_ret_f64__amdgpu_no_fine_grained_memory__am
 ; GFX12-NEXT:    v_dual_mov_b32 v0, v2 :: v_dual_mov_b32 v1, v3
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fmin_ret_f64__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v5, v1
-; GFX940-NEXT:    s_mov_b64 s[0:1], src_private_base
-; GFX940-NEXT:    v_mov_b32_e32 v4, v0
-; GFX940-NEXT:    v_cmp_ne_u32_e32 vcc, s1, v5
-; GFX940-NEXT:    ; implicit-def: $vgpr0_vgpr1
-; GFX940-NEXT:    s_and_saveexec_b64 s[0:1], vcc
-; GFX940-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB25_3
-; GFX940-NEXT:  ; %bb.1: ; %Flow
-; GFX940-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB25_4
-; GFX940-NEXT:  .LBB25_2: ; %atomicrmw.phi
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
-; GFX940-NEXT:  .LBB25_3: ; %atomicrmw.global
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_min_f64 v[0:1], v[4:5], v[2:3] sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    ; implicit-def: $vgpr4_vgpr5
-; GFX940-NEXT:    ; implicit-def: $vgpr2_vgpr3
-; GFX940-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
-; GFX940-NEXT:    s_cbranch_execz .LBB25_2
-; GFX940-NEXT:  .LBB25_4: ; %atomicrmw.private
-; GFX940-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[4:5]
-; GFX940-NEXT:    v_max_f64 v[2:3], v[2:3], v[2:3]
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cndmask_b32_e32 v6, -1, v4, vcc
-; GFX940-NEXT:    scratch_load_dwordx2 v[0:1], v6, off
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_max_f64 v[4:5], v[0:1], v[0:1]
-; GFX940-NEXT:    v_min_f64 v[2:3], v[4:5], v[2:3]
-; GFX940-NEXT:    scratch_store_dwordx2 v6, v[2:3], off sc0 sc1
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fmin_ret_f64__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v5, v1
+; GFX942-NEXT:    s_mov_b64 s[0:1], src_private_base
+; GFX942-NEXT:    v_mov_b32_e32 v4, v0
+; GFX942-NEXT:    v_cmp_ne_u32_e32 vcc, s1, v5
+; GFX942-NEXT:    ; implicit-def: $vgpr0_vgpr1
+; GFX942-NEXT:    s_and_saveexec_b64 s[0:1], vcc
+; GFX942-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB25_3
+; GFX942-NEXT:  ; %bb.1: ; %Flow
+; GFX942-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB25_4
+; GFX942-NEXT:  .LBB25_2: ; %atomicrmw.phi
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-NEXT:  .LBB25_3: ; %atomicrmw.global
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_min_f64 v[0:1], v[4:5], v[2:3] sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    ; implicit-def: $vgpr4_vgpr5
+; GFX942-NEXT:    ; implicit-def: $vgpr2_vgpr3
+; GFX942-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
+; GFX942-NEXT:    s_cbranch_execz .LBB25_2
+; GFX942-NEXT:  .LBB25_4: ; %atomicrmw.private
+; GFX942-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[4:5]
+; GFX942-NEXT:    v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v6, -1, v4, vcc
+; GFX942-NEXT:    scratch_load_dwordx2 v[0:1], v6, off
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_max_f64 v[4:5], v[0:1], v[0:1]
+; GFX942-NEXT:    v_min_f64 v[2:3], v[4:5], v[2:3]
+; GFX942-NEXT:    scratch_store_dwordx2 v6, v[2:3], off sc0 sc1
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fmin_ret_f64__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory:
 ; GFX11:       ; %bb.0:
@@ -6039,40 +6039,40 @@ define half @flat_agent_atomic_fmin_ret_f16__amdgpu_no_fine_grained_memory(ptr %
 ; GFX12-NEXT:    v_lshrrev_b32_e32 v0, v3, v5
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fmin_ret_f16__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v3, v0
-; GFX940-NEXT:    v_and_b32_e32 v0, -4, v3
-; GFX940-NEXT:    flat_load_dword v5, v[0:1]
-; GFX940-NEXT:    v_and_b32_e32 v3, 3, v3
-; GFX940-NEXT:    v_lshlrev_b32_e32 v3, 3, v3
-; GFX940-NEXT:    s_mov_b32 s0, 0xffff
-; GFX940-NEXT:    v_lshlrev_b32_e64 v4, v3, s0
-; GFX940-NEXT:    v_not_b32_e32 v4, v4
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    v_max_f16_e32 v2, v2, v2
-; GFX940-NEXT:  .LBB26_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v7, v5
-; GFX940-NEXT:    v_lshrrev_b32_e32 v5, v3, v7
-; GFX940-NEXT:    v_max_f16_e32 v5, v5, v5
-; GFX940-NEXT:    v_min_f16_e32 v5, v5, v2
-; GFX940-NEXT:    v_lshlrev_b32_e32 v5, v3, v5
-; GFX940-NEXT:    v_and_or_b32 v6, v7, v4, v5
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v5, v[0:1], v[6:7] sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v5, v7
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB26_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    v_lshrrev_b32_e32 v0, v3, v5
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fmin_ret_f16__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v3, v0
+; GFX942-NEXT:    v_and_b32_e32 v0, -4, v3
+; GFX942-NEXT:    flat_load_dword v5, v[0:1]
+; GFX942-NEXT:    v_and_b32_e32 v3, 3, v3
+; GFX942-NEXT:    v_lshlrev_b32_e32 v3, 3, v3
+; GFX942-NEXT:    s_mov_b32 s0, 0xffff
+; GFX942-NEXT:    v_lshlrev_b32_e64 v4, v3, s0
+; GFX942-NEXT:    v_not_b32_e32 v4, v4
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    v_max_f16_e32 v2, v2, v2
+; GFX942-NEXT:  .LBB26_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v7, v5
+; GFX942-NEXT:    v_lshrrev_b32_e32 v5, v3, v7
+; GFX942-NEXT:    v_max_f16_e32 v5, v5, v5
+; GFX942-NEXT:    v_min_f16_e32 v5, v5, v2
+; GFX942-NEXT:    v_lshlrev_b32_e32 v5, v3, v5
+; GFX942-NEXT:    v_and_or_b32 v6, v7, v4, v5
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v5, v[0:1], v[6:7] sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v5, v7
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB26_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    v_lshrrev_b32_e32 v0, v3, v5
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fmin_ret_f16__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -6343,42 +6343,42 @@ define half @flat_agent_atomic_fmin_ret_f16__offset12b_pos__amdgpu_no_fine_grain
 ; GFX12-NEXT:    v_lshrrev_b32_e32 v0, v3, v5
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fmin_ret_f16__offset12b_pos__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0x7fe
-; GFX940-NEXT:    v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1]
-; GFX940-NEXT:    v_and_b32_e32 v0, -4, v4
-; GFX940-NEXT:    v_mov_b32_e32 v1, v5
-; GFX940-NEXT:    flat_load_dword v5, v[0:1]
-; GFX940-NEXT:    v_and_b32_e32 v3, 3, v4
-; GFX940-NEXT:    v_lshlrev_b32_e32 v3, 3, v3
-; GFX940-NEXT:    s_mov_b32 s0, 0xffff
-; GFX940-NEXT:    v_lshlrev_b32_e64 v4, v3, s0
-; GFX940-NEXT:    v_not_b32_e32 v4, v4
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    v_max_f16_e32 v2, v2, v2
-; GFX940-NEXT:  .LBB27_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v7, v5
-; GFX940-NEXT:    v_lshrrev_b32_e32 v5, v3, v7
-; GFX940-NEXT:    v_max_f16_e32 v5, v5, v5
-; GFX940-NEXT:    v_min_f16_e32 v5, v5, v2
-; GFX940-NEXT:    v_lshlrev_b32_e32 v5, v3, v5
-; GFX940-NEXT:    v_and_or_b32 v6, v7, v4, v5
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v5, v[0:1], v[6:7] sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v5, v7
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB27_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    v_lshrrev_b32_e32 v0, v3, v5
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fmin_ret_f16__offset12b_pos__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0x7fe
+; GFX942-NEXT:    v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1]
+; GFX942-NEXT:    v_and_b32_e32 v0, -4, v4
+; GFX942-NEXT:    v_mov_b32_e32 v1, v5
+; GFX942-NEXT:    flat_load_dword v5, v[0:1]
+; GFX942-NEXT:    v_and_b32_e32 v3, 3, v4
+; GFX942-NEXT:    v_lshlrev_b32_e32 v3, 3, v3
+; GFX942-NEXT:    s_mov_b32 s0, 0xffff
+; GFX942-NEXT:    v_lshlrev_b32_e64 v4, v3, s0
+; GFX942-NEXT:    v_not_b32_e32 v4, v4
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    v_max_f16_e32 v2, v2, v2
+; GFX942-NEXT:  .LBB27_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v7, v5
+; GFX942-NEXT:    v_lshrrev_b32_e32 v5, v3, v7
+; GFX942-NEXT:    v_max_f16_e32 v5, v5, v5
+; GFX942-NEXT:    v_min_f16_e32 v5, v5, v2
+; GFX942-NEXT:    v_lshlrev_b32_e32 v5, v3, v5
+; GFX942-NEXT:    v_and_or_b32 v6, v7, v4, v5
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v5, v[0:1], v[6:7] sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v5, v7
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB27_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    v_lshrrev_b32_e32 v0, v3, v5
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fmin_ret_f16__offset12b_pos__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -6656,43 +6656,43 @@ define half @flat_agent_atomic_fmin_ret_f16__offset12b_neg__amdgpu_no_fine_grain
 ; GFX12-NEXT:    v_lshrrev_b32_e32 v0, v3, v5
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fmin_ret_f16__offset12b_neg__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    s_movk_i32 s0, 0xf800
-; GFX940-NEXT:    s_mov_b32 s1, -1
-; GFX940-NEXT:    v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1]
-; GFX940-NEXT:    v_and_b32_e32 v0, -4, v4
-; GFX940-NEXT:    v_mov_b32_e32 v1, v5
-; GFX940-NEXT:    flat_load_dword v5, v[0:1]
-; GFX940-NEXT:    v_and_b32_e32 v3, 3, v4
-; GFX940-NEXT:    v_lshlrev_b32_e32 v3, 3, v3
-; GFX940-NEXT:    s_mov_b32 s0, 0xffff
-; GFX940-NEXT:    v_lshlrev_b32_e64 v4, v3, s0
-; GFX940-NEXT:    v_not_b32_e32 v4, v4
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    v_max_f16_e32 v2, v2, v2
-; GFX940-NEXT:  .LBB28_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v7, v5
-; GFX940-NEXT:    v_lshrrev_b32_e32 v5, v3, v7
-; GFX940-NEXT:    v_max_f16_e32 v5, v5, v5
-; GFX940-NEXT:    v_min_f16_e32 v5, v5, v2
-; GFX940-NEXT:    v_lshlrev_b32_e32 v5, v3, v5
-; GFX940-NEXT:    v_and_or_b32 v6, v7, v4, v5
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v5, v[0:1], v[6:7] sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v5, v7
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB28_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    v_lshrrev_b32_e32 v0, v3, v5
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fmin_ret_f16__offset12b_neg__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    s_movk_i32 s0, 0xf800
+; GFX942-NEXT:    s_mov_b32 s1, -1
+; GFX942-NEXT:    v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1]
+; GFX942-NEXT:    v_and_b32_e32 v0, -4, v4
+; GFX942-NEXT:    v_mov_b32_e32 v1, v5
+; GFX942-NEXT:    flat_load_dword v5, v[0:1]
+; GFX942-NEXT:    v_and_b32_e32 v3, 3, v4
+; GFX942-NEXT:    v_lshlrev_b32_e32 v3, 3, v3
+; GFX942-NEXT:    s_mov_b32 s0, 0xffff
+; GFX942-NEXT:    v_lshlrev_b32_e64 v4, v3, s0
+; GFX942-NEXT:    v_not_b32_e32 v4, v4
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    v_max_f16_e32 v2, v2, v2
+; GFX942-NEXT:  .LBB28_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v7, v5
+; GFX942-NEXT:    v_lshrrev_b32_e32 v5, v3, v7
+; GFX942-NEXT:    v_max_f16_e32 v5, v5, v5
+; GFX942-NEXT:    v_min_f16_e32 v5, v5, v2
+; GFX942-NEXT:    v_lshlrev_b32_e32 v5, v3, v5
+; GFX942-NEXT:    v_and_or_b32 v6, v7, v4, v5
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v5, v[0:1], v[6:7] sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v5, v7
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB28_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    v_lshrrev_b32_e32 v0, v3, v5
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fmin_ret_f16__offset12b_neg__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -6967,39 +6967,39 @@ define void @flat_agent_atomic_fmin_noret_f16__amdgpu_no_fine_grained_memory(ptr
 ; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s0
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fmin_noret_f16__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v3, v0
-; GFX940-NEXT:    v_and_b32_e32 v0, -4, v3
-; GFX940-NEXT:    flat_load_dword v5, v[0:1]
-; GFX940-NEXT:    v_and_b32_e32 v3, 3, v3
-; GFX940-NEXT:    v_lshlrev_b32_e32 v3, 3, v3
-; GFX940-NEXT:    s_mov_b32 s0, 0xffff
-; GFX940-NEXT:    v_lshlrev_b32_e64 v4, v3, s0
-; GFX940-NEXT:    v_not_b32_e32 v6, v4
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    v_max_f16_e32 v2, v2, v2
-; GFX940-NEXT:  .LBB29_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_lshrrev_b32_e32 v4, v3, v5
-; GFX940-NEXT:    v_max_f16_e32 v4, v4, v4
-; GFX940-NEXT:    v_min_f16_e32 v4, v4, v2
-; GFX940-NEXT:    v_lshlrev_b32_e32 v4, v3, v4
-; GFX940-NEXT:    v_and_or_b32 v4, v5, v6, v4
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v4, v[0:1], v[4:5] sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v4, v5
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v5, v4
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB29_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fmin_noret_f16__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v3, v0
+; GFX942-NEXT:    v_and_b32_e32 v0, -4, v3
+; GFX942-NEXT:    flat_load_dword v5, v[0:1]
+; GFX942-NEXT:    v_and_b32_e32 v3, 3, v3
+; GFX942-NEXT:    v_lshlrev_b32_e32 v3, 3, v3
+; GFX942-NEXT:    s_mov_b32 s0, 0xffff
+; GFX942-NEXT:    v_lshlrev_b32_e64 v4, v3, s0
+; GFX942-NEXT:    v_not_b32_e32 v6, v4
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    v_max_f16_e32 v2, v2, v2
+; GFX942-NEXT:  .LBB29_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_lshrrev_b32_e32 v4, v3, v5
+; GFX942-NEXT:    v_max_f16_e32 v4, v4, v4
+; GFX942-NEXT:    v_min_f16_e32 v4, v4, v2
+; GFX942-NEXT:    v_lshlrev_b32_e32 v4, v3, v4
+; GFX942-NEXT:    v_and_or_b32 v4, v5, v6, v4
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v4, v[0:1], v[4:5] sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v4, v5
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v5, v4
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB29_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fmin_noret_f16__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -7262,41 +7262,41 @@ define void @flat_agent_atomic_fmin_noret_f16__offset12b_pos__amdgpu_no_fine_gra
 ; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s0
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fmin_noret_f16__offset12b_pos__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0x7fe
-; GFX940-NEXT:    v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1]
-; GFX940-NEXT:    v_and_b32_e32 v0, -4, v4
-; GFX940-NEXT:    v_mov_b32_e32 v1, v5
-; GFX940-NEXT:    flat_load_dword v3, v[0:1]
-; GFX940-NEXT:    v_and_b32_e32 v4, 3, v4
-; GFX940-NEXT:    v_lshlrev_b32_e32 v4, 3, v4
-; GFX940-NEXT:    s_mov_b32 s0, 0xffff
-; GFX940-NEXT:    v_lshlrev_b32_e64 v5, v4, s0
-; GFX940-NEXT:    v_not_b32_e32 v5, v5
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    v_max_f16_e32 v6, v2, v2
-; GFX940-NEXT:  .LBB30_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_lshrrev_b32_e32 v2, v4, v3
-; GFX940-NEXT:    v_max_f16_e32 v2, v2, v2
-; GFX940-NEXT:    v_min_f16_e32 v2, v2, v6
-; GFX940-NEXT:    v_lshlrev_b32_e32 v2, v4, v2
-; GFX940-NEXT:    v_and_or_b32 v2, v3, v5, v2
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v3, v2
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB30_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fmin_noret_f16__offset12b_pos__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0x7fe
+; GFX942-NEXT:    v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1]
+; GFX942-NEXT:    v_and_b32_e32 v0, -4, v4
+; GFX942-NEXT:    v_mov_b32_e32 v1, v5
+; GFX942-NEXT:    flat_load_dword v3, v[0:1]
+; GFX942-NEXT:    v_and_b32_e32 v4, 3, v4
+; GFX942-NEXT:    v_lshlrev_b32_e32 v4, 3, v4
+; GFX942-NEXT:    s_mov_b32 s0, 0xffff
+; GFX942-NEXT:    v_lshlrev_b32_e64 v5, v4, s0
+; GFX942-NEXT:    v_not_b32_e32 v5, v5
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    v_max_f16_e32 v6, v2, v2
+; GFX942-NEXT:  .LBB30_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_lshrrev_b32_e32 v2, v4, v3
+; GFX942-NEXT:    v_max_f16_e32 v2, v2, v2
+; GFX942-NEXT:    v_min_f16_e32 v2, v2, v6
+; GFX942-NEXT:    v_lshlrev_b32_e32 v2, v4, v2
+; GFX942-NEXT:    v_and_or_b32 v2, v3, v5, v2
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v3, v2
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB30_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fmin_noret_f16__offset12b_pos__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -7566,42 +7566,42 @@ define void @flat_agent_atomic_fmin_noret_f16__offset12b_neg__amdgpu_no_fine_gra
 ; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s0
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fmin_noret_f16__offset12b_neg__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    s_movk_i32 s0, 0xf800
-; GFX940-NEXT:    s_mov_b32 s1, -1
-; GFX940-NEXT:    v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1]
-; GFX940-NEXT:    v_and_b32_e32 v0, -4, v4
-; GFX940-NEXT:    v_mov_b32_e32 v1, v5
-; GFX940-NEXT:    flat_load_dword v3, v[0:1]
-; GFX940-NEXT:    v_and_b32_e32 v4, 3, v4
-; GFX940-NEXT:    v_lshlrev_b32_e32 v4, 3, v4
-; GFX940-NEXT:    s_mov_b32 s0, 0xffff
-; GFX940-NEXT:    v_lshlrev_b32_e64 v5, v4, s0
-; GFX940-NEXT:    v_not_b32_e32 v5, v5
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    v_max_f16_e32 v6, v2, v2
-; GFX940-NEXT:  .LBB31_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_lshrrev_b32_e32 v2, v4, v3
-; GFX940-NEXT:    v_max_f16_e32 v2, v2, v2
-; GFX940-NEXT:    v_min_f16_e32 v2, v2, v6
-; GFX940-NEXT:    v_lshlrev_b32_e32 v2, v4, v2
-; GFX940-NEXT:    v_and_or_b32 v2, v3, v5, v2
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v3, v2
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB31_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fmin_noret_f16__offset12b_neg__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    s_movk_i32 s0, 0xf800
+; GFX942-NEXT:    s_mov_b32 s1, -1
+; GFX942-NEXT:    v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1]
+; GFX942-NEXT:    v_and_b32_e32 v0, -4, v4
+; GFX942-NEXT:    v_mov_b32_e32 v1, v5
+; GFX942-NEXT:    flat_load_dword v3, v[0:1]
+; GFX942-NEXT:    v_and_b32_e32 v4, 3, v4
+; GFX942-NEXT:    v_lshlrev_b32_e32 v4, 3, v4
+; GFX942-NEXT:    s_mov_b32 s0, 0xffff
+; GFX942-NEXT:    v_lshlrev_b32_e64 v5, v4, s0
+; GFX942-NEXT:    v_not_b32_e32 v5, v5
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    v_max_f16_e32 v6, v2, v2
+; GFX942-NEXT:  .LBB31_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_lshrrev_b32_e32 v2, v4, v3
+; GFX942-NEXT:    v_max_f16_e32 v2, v2, v2
+; GFX942-NEXT:    v_min_f16_e32 v2, v2, v6
+; GFX942-NEXT:    v_lshlrev_b32_e32 v2, v4, v2
+; GFX942-NEXT:    v_and_or_b32 v2, v3, v5, v2
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v3, v2
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB31_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fmin_noret_f16__offset12b_neg__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -7859,32 +7859,32 @@ define half @flat_agent_atomic_fmin_ret_f16__offset12b_pos__align4__amdgpu_no_fi
 ; GFX12-NEXT:    v_mov_b32_e32 v0, v3
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fmin_ret_f16__offset12b_pos__align4__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    flat_load_dword v3, v[0:1] offset:2046
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    v_max_f16_e32 v2, v2, v2
-; GFX940-NEXT:    s_mov_b32 s2, 0xffff0000
-; GFX940-NEXT:  .LBB32_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v5, v3
-; GFX940-NEXT:    v_max_f16_e32 v3, v5, v5
-; GFX940-NEXT:    v_min_f16_e32 v3, v3, v2
-; GFX940-NEXT:    v_and_or_b32 v4, v5, s2, v3
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2046 sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB32_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v0, v3
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fmin_ret_f16__offset12b_pos__align4__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    flat_load_dword v3, v[0:1] offset:2046
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    v_max_f16_e32 v2, v2, v2
+; GFX942-NEXT:    s_mov_b32 s2, 0xffff0000
+; GFX942-NEXT:  .LBB32_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v5, v3
+; GFX942-NEXT:    v_max_f16_e32 v3, v5, v5
+; GFX942-NEXT:    v_min_f16_e32 v3, v3, v2
+; GFX942-NEXT:    v_and_or_b32 v4, v5, s2, v3
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2046 sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB32_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v0, v3
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fmin_ret_f16__offset12b_pos__align4__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -8094,31 +8094,31 @@ define void @flat_agent_atomic_fmin_noret_f16__offset12b__align4_pos__amdgpu_no_
 ; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s0
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fmin_noret_f16__offset12b__align4_pos__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    flat_load_dword v3, v[0:1] offset:2046
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    v_max_f16_e32 v4, v2, v2
-; GFX940-NEXT:    s_mov_b32 s2, 0xffff0000
-; GFX940-NEXT:  .LBB33_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_max_f16_e32 v2, v3, v3
-; GFX940-NEXT:    v_min_f16_e32 v2, v2, v4
-; GFX940-NEXT:    v_and_or_b32 v2, v3, s2, v2
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2046 sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v3, v2
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB33_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fmin_noret_f16__offset12b__align4_pos__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    flat_load_dword v3, v[0:1] offset:2046
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    v_max_f16_e32 v4, v2, v2
+; GFX942-NEXT:    s_mov_b32 s2, 0xffff0000
+; GFX942-NEXT:  .LBB33_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_max_f16_e32 v2, v3, v3
+; GFX942-NEXT:    v_min_f16_e32 v2, v2, v4
+; GFX942-NEXT:    v_and_or_b32 v2, v3, s2, v2
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2046 sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v3, v2
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB33_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fmin_noret_f16__offset12b__align4_pos__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -8339,42 +8339,42 @@ define half @flat_system_atomic_fmin_ret_f16__offset12b_pos__amdgpu_no_fine_grai
 ; GFX12-NEXT:    v_lshrrev_b32_e32 v0, v3, v5
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_system_atomic_fmin_ret_f16__offset12b_pos__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0x7fe
-; GFX940-NEXT:    v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1]
-; GFX940-NEXT:    v_and_b32_e32 v0, -4, v4
-; GFX940-NEXT:    v_mov_b32_e32 v1, v5
-; GFX940-NEXT:    flat_load_dword v5, v[0:1]
-; GFX940-NEXT:    v_and_b32_e32 v3, 3, v4
-; GFX940-NEXT:    v_lshlrev_b32_e32 v3, 3, v3
-; GFX940-NEXT:    s_mov_b32 s0, 0xffff
-; GFX940-NEXT:    v_lshlrev_b32_e64 v4, v3, s0
-; GFX940-NEXT:    v_not_b32_e32 v4, v4
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    v_max_f16_e32 v2, v2, v2
-; GFX940-NEXT:  .LBB34_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v7, v5
-; GFX940-NEXT:    v_lshrrev_b32_e32 v5, v3, v7
-; GFX940-NEXT:    v_max_f16_e32 v5, v5, v5
-; GFX940-NEXT:    v_min_f16_e32 v5, v5, v2
-; GFX940-NEXT:    v_lshlrev_b32_e32 v5, v3, v5
-; GFX940-NEXT:    v_and_or_b32 v6, v7, v4, v5
-; GFX940-NEXT:    buffer_wbl2 sc0 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v5, v[0:1], v[6:7] sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc0 sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v5, v7
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB34_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    v_lshrrev_b32_e32 v0, v3, v5
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_system_atomic_fmin_ret_f16__offset12b_pos__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0x7fe
+; GFX942-NEXT:    v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1]
+; GFX942-NEXT:    v_and_b32_e32 v0, -4, v4
+; GFX942-NEXT:    v_mov_b32_e32 v1, v5
+; GFX942-NEXT:    flat_load_dword v5, v[0:1]
+; GFX942-NEXT:    v_and_b32_e32 v3, 3, v4
+; GFX942-NEXT:    v_lshlrev_b32_e32 v3, 3, v3
+; GFX942-NEXT:    s_mov_b32 s0, 0xffff
+; GFX942-NEXT:    v_lshlrev_b32_e64 v4, v3, s0
+; GFX942-NEXT:    v_not_b32_e32 v4, v4
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    v_max_f16_e32 v2, v2, v2
+; GFX942-NEXT:  .LBB34_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v7, v5
+; GFX942-NEXT:    v_lshrrev_b32_e32 v5, v3, v7
+; GFX942-NEXT:    v_max_f16_e32 v5, v5, v5
+; GFX942-NEXT:    v_min_f16_e32 v5, v5, v2
+; GFX942-NEXT:    v_lshlrev_b32_e32 v5, v3, v5
+; GFX942-NEXT:    v_and_or_b32 v6, v7, v4, v5
+; GFX942-NEXT:    buffer_wbl2 sc0 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v5, v[0:1], v[6:7] sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc0 sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v5, v7
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB34_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    v_lshrrev_b32_e32 v0, v3, v5
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_system_atomic_fmin_ret_f16__offset12b_pos__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -8654,41 +8654,41 @@ define void @flat_system_atomic_fmin_noret_f16__offset12b_pos__amdgpu_no_fine_gr
 ; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s0
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_system_atomic_fmin_noret_f16__offset12b_pos__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0x7fe
-; GFX940-NEXT:    v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1]
-; GFX940-NEXT:    v_and_b32_e32 v0, -4, v4
-; GFX940-NEXT:    v_mov_b32_e32 v1, v5
-; GFX940-NEXT:    flat_load_dword v3, v[0:1]
-; GFX940-NEXT:    v_and_b32_e32 v4, 3, v4
-; GFX940-NEXT:    v_lshlrev_b32_e32 v4, 3, v4
-; GFX940-NEXT:    s_mov_b32 s0, 0xffff
-; GFX940-NEXT:    v_lshlrev_b32_e64 v5, v4, s0
-; GFX940-NEXT:    v_not_b32_e32 v5, v5
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    v_max_f16_e32 v6, v2, v2
-; GFX940-NEXT:  .LBB35_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_lshrrev_b32_e32 v2, v4, v3
-; GFX940-NEXT:    v_max_f16_e32 v2, v2, v2
-; GFX940-NEXT:    v_min_f16_e32 v2, v2, v6
-; GFX940-NEXT:    v_lshlrev_b32_e32 v2, v4, v2
-; GFX940-NEXT:    v_and_or_b32 v2, v3, v5, v2
-; GFX940-NEXT:    buffer_wbl2 sc0 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc0 sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v3, v2
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB35_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_system_atomic_fmin_noret_f16__offset12b_pos__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0x7fe
+; GFX942-NEXT:    v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1]
+; GFX942-NEXT:    v_and_b32_e32 v0, -4, v4
+; GFX942-NEXT:    v_mov_b32_e32 v1, v5
+; GFX942-NEXT:    flat_load_dword v3, v[0:1]
+; GFX942-NEXT:    v_and_b32_e32 v4, 3, v4
+; GFX942-NEXT:    v_lshlrev_b32_e32 v4, 3, v4
+; GFX942-NEXT:    s_mov_b32 s0, 0xffff
+; GFX942-NEXT:    v_lshlrev_b32_e64 v5, v4, s0
+; GFX942-NEXT:    v_not_b32_e32 v5, v5
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    v_max_f16_e32 v6, v2, v2
+; GFX942-NEXT:  .LBB35_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_lshrrev_b32_e32 v2, v4, v3
+; GFX942-NEXT:    v_max_f16_e32 v2, v2, v2
+; GFX942-NEXT:    v_min_f16_e32 v2, v2, v6
+; GFX942-NEXT:    v_lshlrev_b32_e32 v2, v4, v2
+; GFX942-NEXT:    v_and_or_b32 v2, v3, v5, v2
+; GFX942-NEXT:    buffer_wbl2 sc0 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc0 sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v3, v2
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB35_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_system_atomic_fmin_noret_f16__offset12b_pos__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -8970,47 +8970,47 @@ define bfloat @flat_agent_atomic_fmin_ret_bf16__amdgpu_no_fine_grained_memory(pt
 ; GFX12-NEXT:    v_lshrrev_b32_e32 v0, v3, v5
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fmin_ret_bf16__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v3, v0
-; GFX940-NEXT:    v_and_b32_e32 v0, -4, v3
-; GFX940-NEXT:    flat_load_dword v5, v[0:1]
-; GFX940-NEXT:    v_and_b32_e32 v3, 3, v3
-; GFX940-NEXT:    v_lshlrev_b32_e32 v3, 3, v3
-; GFX940-NEXT:    s_mov_b32 s0, 0xffff
-; GFX940-NEXT:    v_lshlrev_b32_e64 v4, v3, s0
-; GFX940-NEXT:    v_not_b32_e32 v4, v4
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
-; GFX940-NEXT:    s_movk_i32 s2, 0x7fff
-; GFX940-NEXT:  .LBB36_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v7, v5
-; GFX940-NEXT:    v_lshrrev_b32_sdwa v5, v3, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_min_f32_e32 v5, v5, v2
-; GFX940-NEXT:    v_bfe_u32 v6, v5, 16, 1
-; GFX940-NEXT:    v_or_b32_e32 v8, 0x400000, v5
-; GFX940-NEXT:    v_add3_u32 v6, v6, v5, s2
-; GFX940-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v5, v6, v8, vcc
-; GFX940-NEXT:    v_lshlrev_b32_sdwa v5, v3, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX940-NEXT:    v_and_or_b32 v6, v7, v4, v5
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v5, v[0:1], v[6:7] sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v5, v7
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB36_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    v_lshrrev_b32_e32 v0, v3, v5
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fmin_ret_bf16__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v3, v0
+; GFX942-NEXT:    v_and_b32_e32 v0, -4, v3
+; GFX942-NEXT:    flat_load_dword v5, v[0:1]
+; GFX942-NEXT:    v_and_b32_e32 v3, 3, v3
+; GFX942-NEXT:    v_lshlrev_b32_e32 v3, 3, v3
+; GFX942-NEXT:    s_mov_b32 s0, 0xffff
+; GFX942-NEXT:    v_lshlrev_b32_e64 v4, v3, s0
+; GFX942-NEXT:    v_not_b32_e32 v4, v4
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
+; GFX942-NEXT:    s_movk_i32 s2, 0x7fff
+; GFX942-NEXT:  .LBB36_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v7, v5
+; GFX942-NEXT:    v_lshrrev_b32_sdwa v5, v3, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_min_f32_e32 v5, v5, v2
+; GFX942-NEXT:    v_bfe_u32 v6, v5, 16, 1
+; GFX942-NEXT:    v_or_b32_e32 v8, 0x400000, v5
+; GFX942-NEXT:    v_add3_u32 v6, v6, v5, s2
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v5, v6, v8, vcc
+; GFX942-NEXT:    v_lshlrev_b32_sdwa v5, v3, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX942-NEXT:    v_and_or_b32 v6, v7, v4, v5
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v5, v[0:1], v[6:7] sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v5, v7
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB36_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    v_lshrrev_b32_e32 v0, v3, v5
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fmin_ret_bf16__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -9316,49 +9316,49 @@ define bfloat @flat_agent_atomic_fmin_ret_bf16__offset12b_pos__amdgpu_no_fine_gr
 ; GFX12-NEXT:    v_lshrrev_b32_e32 v0, v3, v5
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fmin_ret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0x7fe
-; GFX940-NEXT:    v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1]
-; GFX940-NEXT:    v_and_b32_e32 v0, -4, v4
-; GFX940-NEXT:    v_mov_b32_e32 v1, v5
-; GFX940-NEXT:    flat_load_dword v5, v[0:1]
-; GFX940-NEXT:    v_and_b32_e32 v3, 3, v4
-; GFX940-NEXT:    v_lshlrev_b32_e32 v3, 3, v3
-; GFX940-NEXT:    s_mov_b32 s0, 0xffff
-; GFX940-NEXT:    v_lshlrev_b32_e64 v4, v3, s0
-; GFX940-NEXT:    v_not_b32_e32 v4, v4
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
-; GFX940-NEXT:    s_movk_i32 s2, 0x7fff
-; GFX940-NEXT:  .LBB37_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v7, v5
-; GFX940-NEXT:    v_lshrrev_b32_sdwa v5, v3, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_min_f32_e32 v5, v5, v2
-; GFX940-NEXT:    v_bfe_u32 v6, v5, 16, 1
-; GFX940-NEXT:    v_or_b32_e32 v8, 0x400000, v5
-; GFX940-NEXT:    v_add3_u32 v6, v6, v5, s2
-; GFX940-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v5, v6, v8, vcc
-; GFX940-NEXT:    v_lshlrev_b32_sdwa v5, v3, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX940-NEXT:    v_and_or_b32 v6, v7, v4, v5
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v5, v[0:1], v[6:7] sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v5, v7
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB37_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    v_lshrrev_b32_e32 v0, v3, v5
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fmin_ret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0x7fe
+; GFX942-NEXT:    v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1]
+; GFX942-NEXT:    v_and_b32_e32 v0, -4, v4
+; GFX942-NEXT:    v_mov_b32_e32 v1, v5
+; GFX942-NEXT:    flat_load_dword v5, v[0:1]
+; GFX942-NEXT:    v_and_b32_e32 v3, 3, v4
+; GFX942-NEXT:    v_lshlrev_b32_e32 v3, 3, v3
+; GFX942-NEXT:    s_mov_b32 s0, 0xffff
+; GFX942-NEXT:    v_lshlrev_b32_e64 v4, v3, s0
+; GFX942-NEXT:    v_not_b32_e32 v4, v4
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
+; GFX942-NEXT:    s_movk_i32 s2, 0x7fff
+; GFX942-NEXT:  .LBB37_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v7, v5
+; GFX942-NEXT:    v_lshrrev_b32_sdwa v5, v3, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_min_f32_e32 v5, v5, v2
+; GFX942-NEXT:    v_bfe_u32 v6, v5, 16, 1
+; GFX942-NEXT:    v_or_b32_e32 v8, 0x400000, v5
+; GFX942-NEXT:    v_add3_u32 v6, v6, v5, s2
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v5, v6, v8, vcc
+; GFX942-NEXT:    v_lshlrev_b32_sdwa v5, v3, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX942-NEXT:    v_and_or_b32 v6, v7, v4, v5
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v5, v[0:1], v[6:7] sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v5, v7
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB37_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    v_lshrrev_b32_e32 v0, v3, v5
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fmin_ret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -9672,50 +9672,50 @@ define bfloat @flat_agent_atomic_fmin_ret_bf16__offset12b_neg__amdgpu_no_fine_gr
 ; GFX12-NEXT:    v_lshrrev_b32_e32 v0, v3, v5
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fmin_ret_bf16__offset12b_neg__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    s_movk_i32 s0, 0xf800
-; GFX940-NEXT:    s_mov_b32 s1, -1
-; GFX940-NEXT:    v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1]
-; GFX940-NEXT:    v_and_b32_e32 v0, -4, v4
-; GFX940-NEXT:    v_mov_b32_e32 v1, v5
-; GFX940-NEXT:    flat_load_dword v5, v[0:1]
-; GFX940-NEXT:    v_and_b32_e32 v3, 3, v4
-; GFX940-NEXT:    v_lshlrev_b32_e32 v3, 3, v3
-; GFX940-NEXT:    s_mov_b32 s0, 0xffff
-; GFX940-NEXT:    v_lshlrev_b32_e64 v4, v3, s0
-; GFX940-NEXT:    v_not_b32_e32 v4, v4
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
-; GFX940-NEXT:    s_movk_i32 s2, 0x7fff
-; GFX940-NEXT:  .LBB38_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v7, v5
-; GFX940-NEXT:    v_lshrrev_b32_sdwa v5, v3, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_min_f32_e32 v5, v5, v2
-; GFX940-NEXT:    v_bfe_u32 v6, v5, 16, 1
-; GFX940-NEXT:    v_or_b32_e32 v8, 0x400000, v5
-; GFX940-NEXT:    v_add3_u32 v6, v6, v5, s2
-; GFX940-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v5, v6, v8, vcc
-; GFX940-NEXT:    v_lshlrev_b32_sdwa v5, v3, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX940-NEXT:    v_and_or_b32 v6, v7, v4, v5
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v5, v[0:1], v[6:7] sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v5, v7
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB38_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    v_lshrrev_b32_e32 v0, v3, v5
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fmin_ret_bf16__offset12b_neg__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    s_movk_i32 s0, 0xf800
+; GFX942-NEXT:    s_mov_b32 s1, -1
+; GFX942-NEXT:    v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1]
+; GFX942-NEXT:    v_and_b32_e32 v0, -4, v4
+; GFX942-NEXT:    v_mov_b32_e32 v1, v5
+; GFX942-NEXT:    flat_load_dword v5, v[0:1]
+; GFX942-NEXT:    v_and_b32_e32 v3, 3, v4
+; GFX942-NEXT:    v_lshlrev_b32_e32 v3, 3, v3
+; GFX942-NEXT:    s_mov_b32 s0, 0xffff
+; GFX942-NEXT:    v_lshlrev_b32_e64 v4, v3, s0
+; GFX942-NEXT:    v_not_b32_e32 v4, v4
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
+; GFX942-NEXT:    s_movk_i32 s2, 0x7fff
+; GFX942-NEXT:  .LBB38_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v7, v5
+; GFX942-NEXT:    v_lshrrev_b32_sdwa v5, v3, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_min_f32_e32 v5, v5, v2
+; GFX942-NEXT:    v_bfe_u32 v6, v5, 16, 1
+; GFX942-NEXT:    v_or_b32_e32 v8, 0x400000, v5
+; GFX942-NEXT:    v_add3_u32 v6, v6, v5, s2
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v5, v6, v8, vcc
+; GFX942-NEXT:    v_lshlrev_b32_sdwa v5, v3, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX942-NEXT:    v_and_or_b32 v6, v7, v4, v5
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v5, v[0:1], v[6:7] sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v5, v7
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB38_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    v_lshrrev_b32_e32 v0, v3, v5
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fmin_ret_bf16__offset12b_neg__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -10024,46 +10024,46 @@ define void @flat_agent_atomic_fmin_noret_bf16__amdgpu_no_fine_grained_memory(pt
 ; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s0
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fmin_noret_bf16__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v3, v0
-; GFX940-NEXT:    v_and_b32_e32 v0, -4, v3
-; GFX940-NEXT:    flat_load_dword v5, v[0:1]
-; GFX940-NEXT:    v_and_b32_e32 v3, 3, v3
-; GFX940-NEXT:    v_lshlrev_b32_e32 v3, 3, v3
-; GFX940-NEXT:    s_mov_b32 s0, 0xffff
-; GFX940-NEXT:    v_lshlrev_b32_e64 v4, v3, s0
-; GFX940-NEXT:    v_not_b32_e32 v6, v4
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
-; GFX940-NEXT:    s_movk_i32 s2, 0x7fff
-; GFX940-NEXT:  .LBB39_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_lshrrev_b32_sdwa v4, v3, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_min_f32_e32 v4, v4, v2
-; GFX940-NEXT:    v_bfe_u32 v7, v4, 16, 1
-; GFX940-NEXT:    v_or_b32_e32 v8, 0x400000, v4
-; GFX940-NEXT:    v_add3_u32 v7, v7, v4, s2
-; GFX940-NEXT:    v_cmp_u_f32_e32 vcc, v4, v4
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v4, v7, v8, vcc
-; GFX940-NEXT:    v_lshlrev_b32_sdwa v4, v3, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX940-NEXT:    v_and_or_b32 v4, v5, v6, v4
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v4, v[0:1], v[4:5] sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v4, v5
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v5, v4
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB39_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fmin_noret_bf16__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v3, v0
+; GFX942-NEXT:    v_and_b32_e32 v0, -4, v3
+; GFX942-NEXT:    flat_load_dword v5, v[0:1]
+; GFX942-NEXT:    v_and_b32_e32 v3, 3, v3
+; GFX942-NEXT:    v_lshlrev_b32_e32 v3, 3, v3
+; GFX942-NEXT:    s_mov_b32 s0, 0xffff
+; GFX942-NEXT:    v_lshlrev_b32_e64 v4, v3, s0
+; GFX942-NEXT:    v_not_b32_e32 v6, v4
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
+; GFX942-NEXT:    s_movk_i32 s2, 0x7fff
+; GFX942-NEXT:  .LBB39_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_lshrrev_b32_sdwa v4, v3, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_min_f32_e32 v4, v4, v2
+; GFX942-NEXT:    v_bfe_u32 v7, v4, 16, 1
+; GFX942-NEXT:    v_or_b32_e32 v8, 0x400000, v4
+; GFX942-NEXT:    v_add3_u32 v7, v7, v4, s2
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v4, v4
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v4, v7, v8, vcc
+; GFX942-NEXT:    v_lshlrev_b32_sdwa v4, v3, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX942-NEXT:    v_and_or_b32 v4, v5, v6, v4
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v4, v[0:1], v[4:5] sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v4, v5
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v5, v4
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB39_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fmin_noret_bf16__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -10359,48 +10359,48 @@ define void @flat_agent_atomic_fmin_noret_bf16__offset12b_pos__amdgpu_no_fine_gr
 ; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s0
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fmin_noret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0x7fe
-; GFX940-NEXT:    v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1]
-; GFX940-NEXT:    v_and_b32_e32 v0, -4, v4
-; GFX940-NEXT:    v_mov_b32_e32 v1, v5
-; GFX940-NEXT:    flat_load_dword v3, v[0:1]
-; GFX940-NEXT:    v_and_b32_e32 v4, 3, v4
-; GFX940-NEXT:    v_lshlrev_b32_e32 v4, 3, v4
-; GFX940-NEXT:    s_mov_b32 s0, 0xffff
-; GFX940-NEXT:    v_lshlrev_b32_e64 v5, v4, s0
-; GFX940-NEXT:    v_not_b32_e32 v5, v5
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    v_lshlrev_b32_e32 v6, 16, v2
-; GFX940-NEXT:    s_movk_i32 s2, 0x7fff
-; GFX940-NEXT:  .LBB40_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_lshrrev_b32_sdwa v2, v4, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_min_f32_e32 v2, v2, v6
-; GFX940-NEXT:    v_bfe_u32 v7, v2, 16, 1
-; GFX940-NEXT:    v_or_b32_e32 v8, 0x400000, v2
-; GFX940-NEXT:    v_add3_u32 v7, v7, v2, s2
-; GFX940-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v2, v7, v8, vcc
-; GFX940-NEXT:    v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX940-NEXT:    v_and_or_b32 v2, v3, v5, v2
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v3, v2
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB40_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fmin_noret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0x7fe
+; GFX942-NEXT:    v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1]
+; GFX942-NEXT:    v_and_b32_e32 v0, -4, v4
+; GFX942-NEXT:    v_mov_b32_e32 v1, v5
+; GFX942-NEXT:    flat_load_dword v3, v[0:1]
+; GFX942-NEXT:    v_and_b32_e32 v4, 3, v4
+; GFX942-NEXT:    v_lshlrev_b32_e32 v4, 3, v4
+; GFX942-NEXT:    s_mov_b32 s0, 0xffff
+; GFX942-NEXT:    v_lshlrev_b32_e64 v5, v4, s0
+; GFX942-NEXT:    v_not_b32_e32 v5, v5
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    v_lshlrev_b32_e32 v6, 16, v2
+; GFX942-NEXT:    s_movk_i32 s2, 0x7fff
+; GFX942-NEXT:  .LBB40_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_lshrrev_b32_sdwa v2, v4, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_min_f32_e32 v2, v2, v6
+; GFX942-NEXT:    v_bfe_u32 v7, v2, 16, 1
+; GFX942-NEXT:    v_or_b32_e32 v8, 0x400000, v2
+; GFX942-NEXT:    v_add3_u32 v7, v7, v2, s2
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v7, v8, vcc
+; GFX942-NEXT:    v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX942-NEXT:    v_and_or_b32 v2, v3, v5, v2
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v3, v2
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB40_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fmin_noret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -10704,49 +10704,49 @@ define void @flat_agent_atomic_fmin_noret_bf16__offset12b_neg__amdgpu_no_fine_gr
 ; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s0
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fmin_noret_bf16__offset12b_neg__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    s_movk_i32 s0, 0xf800
-; GFX940-NEXT:    s_mov_b32 s1, -1
-; GFX940-NEXT:    v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1]
-; GFX940-NEXT:    v_and_b32_e32 v0, -4, v4
-; GFX940-NEXT:    v_mov_b32_e32 v1, v5
-; GFX940-NEXT:    flat_load_dword v3, v[0:1]
-; GFX940-NEXT:    v_and_b32_e32 v4, 3, v4
-; GFX940-NEXT:    v_lshlrev_b32_e32 v4, 3, v4
-; GFX940-NEXT:    s_mov_b32 s0, 0xffff
-; GFX940-NEXT:    v_lshlrev_b32_e64 v5, v4, s0
-; GFX940-NEXT:    v_not_b32_e32 v5, v5
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    v_lshlrev_b32_e32 v6, 16, v2
-; GFX940-NEXT:    s_movk_i32 s2, 0x7fff
-; GFX940-NEXT:  .LBB41_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_lshrrev_b32_sdwa v2, v4, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_min_f32_e32 v2, v2, v6
-; GFX940-NEXT:    v_bfe_u32 v7, v2, 16, 1
-; GFX940-NEXT:    v_or_b32_e32 v8, 0x400000, v2
-; GFX940-NEXT:    v_add3_u32 v7, v7, v2, s2
-; GFX940-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v2, v7, v8, vcc
-; GFX940-NEXT:    v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX940-NEXT:    v_and_or_b32 v2, v3, v5, v2
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v3, v2
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB41_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fmin_noret_bf16__offset12b_neg__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    s_movk_i32 s0, 0xf800
+; GFX942-NEXT:    s_mov_b32 s1, -1
+; GFX942-NEXT:    v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1]
+; GFX942-NEXT:    v_and_b32_e32 v0, -4, v4
+; GFX942-NEXT:    v_mov_b32_e32 v1, v5
+; GFX942-NEXT:    flat_load_dword v3, v[0:1]
+; GFX942-NEXT:    v_and_b32_e32 v4, 3, v4
+; GFX942-NEXT:    v_lshlrev_b32_e32 v4, 3, v4
+; GFX942-NEXT:    s_mov_b32 s0, 0xffff
+; GFX942-NEXT:    v_lshlrev_b32_e64 v5, v4, s0
+; GFX942-NEXT:    v_not_b32_e32 v5, v5
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    v_lshlrev_b32_e32 v6, 16, v2
+; GFX942-NEXT:    s_movk_i32 s2, 0x7fff
+; GFX942-NEXT:  .LBB41_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_lshrrev_b32_sdwa v2, v4, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_min_f32_e32 v2, v2, v6
+; GFX942-NEXT:    v_bfe_u32 v7, v2, 16, 1
+; GFX942-NEXT:    v_or_b32_e32 v8, 0x400000, v2
+; GFX942-NEXT:    v_add3_u32 v7, v7, v2, s2
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v7, v8, vcc
+; GFX942-NEXT:    v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX942-NEXT:    v_and_or_b32 v2, v3, v5, v2
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v3, v2
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB41_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fmin_noret_bf16__offset12b_neg__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -11039,40 +11039,40 @@ define bfloat @flat_agent_atomic_fmin_ret_bf16__offset12b_pos__align4__amdgpu_no
 ; GFX12-NEXT:    v_mov_b32_e32 v0, v3
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fmin_ret_bf16__offset12b_pos__align4__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    flat_load_dword v3, v[0:1] offset:2046
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
-; GFX940-NEXT:    s_movk_i32 s2, 0x7fff
-; GFX940-NEXT:    s_mov_b32 s3, 0xffff0000
-; GFX940-NEXT:  .LBB42_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v5, v3
-; GFX940-NEXT:    v_lshlrev_b32_e32 v3, 16, v5
-; GFX940-NEXT:    v_min_f32_e32 v3, v3, v2
-; GFX940-NEXT:    v_bfe_u32 v4, v3, 16, 1
-; GFX940-NEXT:    v_or_b32_e32 v6, 0x400000, v3
-; GFX940-NEXT:    v_add3_u32 v4, v4, v3, s2
-; GFX940-NEXT:    v_cmp_u_f32_e32 vcc, v3, v3
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v3, v4, v6, vcc
-; GFX940-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
-; GFX940-NEXT:    v_and_or_b32 v4, v5, s3, v3
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2046 sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB42_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v0, v3
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fmin_ret_bf16__offset12b_pos__align4__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    flat_load_dword v3, v[0:1] offset:2046
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
+; GFX942-NEXT:    s_movk_i32 s2, 0x7fff
+; GFX942-NEXT:    s_mov_b32 s3, 0xffff0000
+; GFX942-NEXT:  .LBB42_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v5, v3
+; GFX942-NEXT:    v_lshlrev_b32_e32 v3, 16, v5
+; GFX942-NEXT:    v_min_f32_e32 v3, v3, v2
+; GFX942-NEXT:    v_bfe_u32 v4, v3, 16, 1
+; GFX942-NEXT:    v_or_b32_e32 v6, 0x400000, v3
+; GFX942-NEXT:    v_add3_u32 v4, v4, v3, s2
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v3, v3
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v4, v6, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
+; GFX942-NEXT:    v_and_or_b32 v4, v5, s3, v3
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2046 sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB42_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v0, v3
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fmin_ret_bf16__offset12b_pos__align4__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -11323,39 +11323,39 @@ define void @flat_agent_atomic_fmin_noret_bf16__offset12b__align4_pos__amdgpu_no
 ; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s0
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fmin_noret_bf16__offset12b__align4_pos__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    flat_load_dword v3, v[0:1] offset:2046
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    v_lshlrev_b32_e32 v4, 16, v2
-; GFX940-NEXT:    s_movk_i32 s2, 0x7fff
-; GFX940-NEXT:    s_mov_b32 s3, 0xffff0000
-; GFX940-NEXT:  .LBB43_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_lshlrev_b32_e32 v2, 16, v3
-; GFX940-NEXT:    v_min_f32_e32 v2, v2, v4
-; GFX940-NEXT:    v_bfe_u32 v5, v2, 16, 1
-; GFX940-NEXT:    v_or_b32_e32 v6, 0x400000, v2
-; GFX940-NEXT:    v_add3_u32 v5, v5, v2, s2
-; GFX940-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v2, v5, v6, vcc
-; GFX940-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
-; GFX940-NEXT:    v_and_or_b32 v2, v3, s3, v2
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2046 sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v3, v2
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB43_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fmin_noret_bf16__offset12b__align4_pos__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    flat_load_dword v3, v[0:1] offset:2046
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    v_lshlrev_b32_e32 v4, 16, v2
+; GFX942-NEXT:    s_movk_i32 s2, 0x7fff
+; GFX942-NEXT:    s_mov_b32 s3, 0xffff0000
+; GFX942-NEXT:  .LBB43_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_lshlrev_b32_e32 v2, 16, v3
+; GFX942-NEXT:    v_min_f32_e32 v2, v2, v4
+; GFX942-NEXT:    v_bfe_u32 v5, v2, 16, 1
+; GFX942-NEXT:    v_or_b32_e32 v6, 0x400000, v2
+; GFX942-NEXT:    v_add3_u32 v5, v5, v2, s2
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v5, v6, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
+; GFX942-NEXT:    v_and_or_b32 v2, v3, s3, v2
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2046 sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v3, v2
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB43_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fmin_noret_bf16__offset12b__align4_pos__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -11617,49 +11617,49 @@ define bfloat @flat_system_atomic_fmin_ret_bf16__offset12b_pos__amdgpu_no_fine_g
 ; GFX12-NEXT:    v_lshrrev_b32_e32 v0, v3, v5
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_system_atomic_fmin_ret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0x7fe
-; GFX940-NEXT:    v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1]
-; GFX940-NEXT:    v_and_b32_e32 v0, -4, v4
-; GFX940-NEXT:    v_mov_b32_e32 v1, v5
-; GFX940-NEXT:    flat_load_dword v5, v[0:1]
-; GFX940-NEXT:    v_and_b32_e32 v3, 3, v4
-; GFX940-NEXT:    v_lshlrev_b32_e32 v3, 3, v3
-; GFX940-NEXT:    s_mov_b32 s0, 0xffff
-; GFX940-NEXT:    v_lshlrev_b32_e64 v4, v3, s0
-; GFX940-NEXT:    v_not_b32_e32 v4, v4
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
-; GFX940-NEXT:    s_movk_i32 s2, 0x7fff
-; GFX940-NEXT:  .LBB44_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v7, v5
-; GFX940-NEXT:    v_lshrrev_b32_sdwa v5, v3, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_min_f32_e32 v5, v5, v2
-; GFX940-NEXT:    v_bfe_u32 v6, v5, 16, 1
-; GFX940-NEXT:    v_or_b32_e32 v8, 0x400000, v5
-; GFX940-NEXT:    v_add3_u32 v6, v6, v5, s2
-; GFX940-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v5, v6, v8, vcc
-; GFX940-NEXT:    v_lshlrev_b32_sdwa v5, v3, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX940-NEXT:    v_and_or_b32 v6, v7, v4, v5
-; GFX940-NEXT:    buffer_wbl2 sc0 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v5, v[0:1], v[6:7] sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc0 sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v5, v7
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB44_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    v_lshrrev_b32_e32 v0, v3, v5
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_system_atomic_fmin_ret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0x7fe
+; GFX942-NEXT:    v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1]
+; GFX942-NEXT:    v_and_b32_e32 v0, -4, v4
+; GFX942-NEXT:    v_mov_b32_e32 v1, v5
+; GFX942-NEXT:    flat_load_dword v5, v[0:1]
+; GFX942-NEXT:    v_and_b32_e32 v3, 3, v4
+; GFX942-NEXT:    v_lshlrev_b32_e32 v3, 3, v3
+; GFX942-NEXT:    s_mov_b32 s0, 0xffff
+; GFX942-NEXT:    v_lshlrev_b32_e64 v4, v3, s0
+; GFX942-NEXT:    v_not_b32_e32 v4, v4
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
+; GFX942-NEXT:    s_movk_i32 s2, 0x7fff
+; GFX942-NEXT:  .LBB44_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v7, v5
+; GFX942-NEXT:    v_lshrrev_b32_sdwa v5, v3, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_min_f32_e32 v5, v5, v2
+; GFX942-NEXT:    v_bfe_u32 v6, v5, 16, 1
+; GFX942-NEXT:    v_or_b32_e32 v8, 0x400000, v5
+; GFX942-NEXT:    v_add3_u32 v6, v6, v5, s2
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v5, v6, v8, vcc
+; GFX942-NEXT:    v_lshlrev_b32_sdwa v5, v3, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX942-NEXT:    v_and_or_b32 v6, v7, v4, v5
+; GFX942-NEXT:    buffer_wbl2 sc0 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v5, v[0:1], v[6:7] sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc0 sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v5, v7
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB44_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    v_lshrrev_b32_e32 v0, v3, v5
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_system_atomic_fmin_ret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -11974,48 +11974,48 @@ define void @flat_system_atomic_fmin_noret_bf16__offset12b_pos__amdgpu_no_fine_g
 ; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s0
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_system_atomic_fmin_noret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0x7fe
-; GFX940-NEXT:    v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1]
-; GFX940-NEXT:    v_and_b32_e32 v0, -4, v4
-; GFX940-NEXT:    v_mov_b32_e32 v1, v5
-; GFX940-NEXT:    flat_load_dword v3, v[0:1]
-; GFX940-NEXT:    v_and_b32_e32 v4, 3, v4
-; GFX940-NEXT:    v_lshlrev_b32_e32 v4, 3, v4
-; GFX940-NEXT:    s_mov_b32 s0, 0xffff
-; GFX940-NEXT:    v_lshlrev_b32_e64 v5, v4, s0
-; GFX940-NEXT:    v_not_b32_e32 v5, v5
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    v_lshlrev_b32_e32 v6, 16, v2
-; GFX940-NEXT:    s_movk_i32 s2, 0x7fff
-; GFX940-NEXT:  .LBB45_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_lshrrev_b32_sdwa v2, v4, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_min_f32_e32 v2, v2, v6
-; GFX940-NEXT:    v_bfe_u32 v7, v2, 16, 1
-; GFX940-NEXT:    v_or_b32_e32 v8, 0x400000, v2
-; GFX940-NEXT:    v_add3_u32 v7, v7, v2, s2
-; GFX940-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v2, v7, v8, vcc
-; GFX940-NEXT:    v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX940-NEXT:    v_and_or_b32 v2, v3, v5, v2
-; GFX940-NEXT:    buffer_wbl2 sc0 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc0 sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v3, v2
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB45_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_system_atomic_fmin_noret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0x7fe
+; GFX942-NEXT:    v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1]
+; GFX942-NEXT:    v_and_b32_e32 v0, -4, v4
+; GFX942-NEXT:    v_mov_b32_e32 v1, v5
+; GFX942-NEXT:    flat_load_dword v3, v[0:1]
+; GFX942-NEXT:    v_and_b32_e32 v4, 3, v4
+; GFX942-NEXT:    v_lshlrev_b32_e32 v4, 3, v4
+; GFX942-NEXT:    s_mov_b32 s0, 0xffff
+; GFX942-NEXT:    v_lshlrev_b32_e64 v5, v4, s0
+; GFX942-NEXT:    v_not_b32_e32 v5, v5
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    v_lshlrev_b32_e32 v6, 16, v2
+; GFX942-NEXT:    s_movk_i32 s2, 0x7fff
+; GFX942-NEXT:  .LBB45_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_lshrrev_b32_sdwa v2, v4, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_min_f32_e32 v2, v2, v6
+; GFX942-NEXT:    v_bfe_u32 v7, v2, 16, 1
+; GFX942-NEXT:    v_or_b32_e32 v8, 0x400000, v2
+; GFX942-NEXT:    v_add3_u32 v7, v7, v2, s2
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v7, v8, vcc
+; GFX942-NEXT:    v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX942-NEXT:    v_and_or_b32 v2, v3, v5, v2
+; GFX942-NEXT:    buffer_wbl2 sc0 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc0 sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v3, v2
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB45_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_system_atomic_fmin_noret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -12303,31 +12303,31 @@ define <2 x half> @flat_agent_atomic_fmin_ret_v2f16__amdgpu_no_fine_grained_memo
 ; GFX12-NEXT:    v_mov_b32_e32 v0, v3
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fmin_ret_v2f16__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    flat_load_dword v3, v[0:1]
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    v_pk_max_f16 v2, v2, v2
-; GFX940-NEXT:  .LBB46_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v5, v3
-; GFX940-NEXT:    v_pk_max_f16 v3, v5, v5
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_pk_min_f16 v4, v3, v2
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v3, v[0:1], v[4:5] sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB46_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v0, v3
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fmin_ret_v2f16__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    flat_load_dword v3, v[0:1]
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    v_pk_max_f16 v2, v2, v2
+; GFX942-NEXT:  .LBB46_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v5, v3
+; GFX942-NEXT:    v_pk_max_f16 v3, v5, v5
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_pk_min_f16 v4, v3, v2
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v3, v[0:1], v[4:5] sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB46_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v0, v3
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fmin_ret_v2f16__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -12538,31 +12538,31 @@ define <2 x half> @flat_agent_atomic_fmin_ret_v2f16__offset12b_pos__amdgpu_no_fi
 ; GFX12-NEXT:    v_mov_b32_e32 v0, v3
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fmin_ret_v2f16__offset12b_pos__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    flat_load_dword v3, v[0:1] offset:2044
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    v_pk_max_f16 v2, v2, v2
-; GFX940-NEXT:  .LBB47_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v5, v3
-; GFX940-NEXT:    v_pk_max_f16 v3, v5, v5
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_pk_min_f16 v4, v3, v2
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2044 sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB47_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v0, v3
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fmin_ret_v2f16__offset12b_pos__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    flat_load_dword v3, v[0:1] offset:2044
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    v_pk_max_f16 v2, v2, v2
+; GFX942-NEXT:  .LBB47_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v5, v3
+; GFX942-NEXT:    v_pk_max_f16 v3, v5, v5
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_pk_min_f16 v4, v3, v2
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2044 sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB47_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v0, v3
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fmin_ret_v2f16__offset12b_pos__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -12776,38 +12776,38 @@ define <2 x half> @flat_agent_atomic_fmin_ret_v2f16__offset12b_neg__amdgpu_no_fi
 ; GFX12-NEXT:    v_mov_b32_e32 v0, v3
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fmin_ret_v2f16__offset12b_neg__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v4, v0
-; GFX940-NEXT:    v_mov_b32_e32 v5, v1
-; GFX940-NEXT:    v_add_co_u32_e32 v0, vcc, 0xfffff800, v4
-; GFX940-NEXT:    s_movk_i32 s0, 0xf800
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v5, vcc
-; GFX940-NEXT:    flat_load_dword v0, v[0:1]
-; GFX940-NEXT:    s_mov_b32 s1, -1
-; GFX940-NEXT:    v_lshl_add_u64 v[4:5], v[4:5], 0, s[0:1]
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    v_pk_max_f16 v1, v2, v2
-; GFX940-NEXT:  .LBB48_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v3, v0
-; GFX940-NEXT:    v_pk_max_f16 v0, v3, v3
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_pk_min_f16 v2, v0, v1
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v0, v[4:5], v[2:3] sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v0, v3
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB48_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fmin_ret_v2f16__offset12b_neg__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v4, v0
+; GFX942-NEXT:    v_mov_b32_e32 v5, v1
+; GFX942-NEXT:    v_add_co_u32_e32 v0, vcc, 0xfffff800, v4
+; GFX942-NEXT:    s_movk_i32 s0, 0xf800
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v5, vcc
+; GFX942-NEXT:    flat_load_dword v0, v[0:1]
+; GFX942-NEXT:    s_mov_b32 s1, -1
+; GFX942-NEXT:    v_lshl_add_u64 v[4:5], v[4:5], 0, s[0:1]
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    v_pk_max_f16 v1, v2, v2
+; GFX942-NEXT:  .LBB48_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v3, v0
+; GFX942-NEXT:    v_pk_max_f16 v0, v3, v3
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_pk_min_f16 v2, v0, v1
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v0, v[4:5], v[2:3] sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v0, v3
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB48_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fmin_ret_v2f16__offset12b_neg__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -13031,30 +13031,30 @@ define void @flat_agent_atomic_fmin_noret_v2f16__amdgpu_no_fine_grained_memory(p
 ; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s0
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fmin_noret_v2f16__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    flat_load_dword v3, v[0:1]
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    v_pk_max_f16 v4, v2, v2
-; GFX940-NEXT:  .LBB49_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_pk_max_f16 v2, v3, v3
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_pk_min_f16 v2, v2, v4
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v3, v2
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB49_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fmin_noret_v2f16__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    flat_load_dword v3, v[0:1]
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    v_pk_max_f16 v4, v2, v2
+; GFX942-NEXT:  .LBB49_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_pk_max_f16 v2, v3, v3
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_pk_min_f16 v2, v2, v4
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v3, v2
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB49_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fmin_noret_v2f16__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -13257,30 +13257,30 @@ define void @flat_agent_atomic_fmin_noret_v2f16__offset12b_pos__amdgpu_no_fine_g
 ; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s0
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fmin_noret_v2f16__offset12b_pos__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    flat_load_dword v3, v[0:1] offset:2044
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    v_pk_max_f16 v4, v2, v2
-; GFX940-NEXT:  .LBB50_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_pk_max_f16 v2, v3, v3
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_pk_min_f16 v2, v2, v4
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2044 sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v3, v2
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB50_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fmin_noret_v2f16__offset12b_pos__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    flat_load_dword v3, v[0:1] offset:2044
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    v_pk_max_f16 v4, v2, v2
+; GFX942-NEXT:  .LBB50_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_pk_max_f16 v2, v3, v3
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_pk_min_f16 v2, v2, v4
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2044 sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v3, v2
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB50_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fmin_noret_v2f16__offset12b_pos__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -13490,36 +13490,36 @@ define void @flat_agent_atomic_fmin_noret_v2f16__offset12b_neg__amdgpu_no_fine_g
 ; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s0
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fmin_noret_v2f16__offset12b_neg__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_add_co_u32_e32 v4, vcc, 0xfffff800, v0
-; GFX940-NEXT:    s_movk_i32 s0, 0xf800
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_addc_co_u32_e32 v5, vcc, -1, v1, vcc
-; GFX940-NEXT:    flat_load_dword v3, v[4:5]
-; GFX940-NEXT:    s_mov_b32 s1, -1
-; GFX940-NEXT:    v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1]
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    v_pk_max_f16 v4, v2, v2
-; GFX940-NEXT:  .LBB51_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_pk_max_f16 v2, v3, v3
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_pk_min_f16 v2, v2, v4
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v3, v2
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB51_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fmin_noret_v2f16__offset12b_neg__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_add_co_u32_e32 v4, vcc, 0xfffff800, v0
+; GFX942-NEXT:    s_movk_i32 s0, 0xf800
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_addc_co_u32_e32 v5, vcc, -1, v1, vcc
+; GFX942-NEXT:    flat_load_dword v3, v[4:5]
+; GFX942-NEXT:    s_mov_b32 s1, -1
+; GFX942-NEXT:    v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1]
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    v_pk_max_f16 v4, v2, v2
+; GFX942-NEXT:  .LBB51_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_pk_max_f16 v2, v3, v3
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_pk_min_f16 v2, v2, v4
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v3, v2
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB51_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fmin_noret_v2f16__offset12b_neg__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -13743,31 +13743,31 @@ define <2 x half> @flat_system_atomic_fmin_ret_v2f16__offset12b_pos__amdgpu_no_f
 ; GFX12-NEXT:    v_mov_b32_e32 v0, v3
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_system_atomic_fmin_ret_v2f16__offset12b_pos__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    flat_load_dword v3, v[0:1] offset:2044
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    v_pk_max_f16 v2, v2, v2
-; GFX940-NEXT:  .LBB52_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v5, v3
-; GFX940-NEXT:    v_pk_max_f16 v3, v5, v5
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_pk_min_f16 v4, v3, v2
-; GFX940-NEXT:    buffer_wbl2 sc0 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2044 sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc0 sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB52_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v0, v3
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_system_atomic_fmin_ret_v2f16__offset12b_pos__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    flat_load_dword v3, v[0:1] offset:2044
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    v_pk_max_f16 v2, v2, v2
+; GFX942-NEXT:  .LBB52_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v5, v3
+; GFX942-NEXT:    v_pk_max_f16 v3, v5, v5
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_pk_min_f16 v4, v3, v2
+; GFX942-NEXT:    buffer_wbl2 sc0 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2044 sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc0 sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB52_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v0, v3
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_system_atomic_fmin_ret_v2f16__offset12b_pos__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -13983,30 +13983,30 @@ define void @flat_system_atomic_fmin_noret_v2f16__offset12b_pos__amdgpu_no_fine_
 ; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s0
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_system_atomic_fmin_noret_v2f16__offset12b_pos__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    flat_load_dword v3, v[0:1] offset:2044
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    v_pk_max_f16 v4, v2, v2
-; GFX940-NEXT:  .LBB53_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_pk_max_f16 v2, v3, v3
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_pk_min_f16 v2, v2, v4
-; GFX940-NEXT:    buffer_wbl2 sc0 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2044 sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc0 sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v3, v2
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB53_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_system_atomic_fmin_noret_v2f16__offset12b_pos__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    flat_load_dword v3, v[0:1] offset:2044
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    v_pk_max_f16 v4, v2, v2
+; GFX942-NEXT:  .LBB53_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_pk_max_f16 v2, v3, v3
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_pk_min_f16 v2, v2, v4
+; GFX942-NEXT:    buffer_wbl2 sc0 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2044 sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc0 sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v3, v2
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB53_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_system_atomic_fmin_noret_v2f16__offset12b_pos__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -14242,47 +14242,47 @@ define <2 x bfloat> @flat_agent_atomic_fmin_ret_v2bf16__amdgpu_no_fine_grained_m
 ; GFX12-NEXT:    v_mov_b32_e32 v0, v3
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fmin_ret_v2bf16__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    flat_load_dword v3, v[0:1]
-; GFX940-NEXT:    s_mov_b64 s[2:3], 0
-; GFX940-NEXT:    v_lshlrev_b32_e32 v4, 16, v2
-; GFX940-NEXT:    s_movk_i32 s4, 0x7fff
-; GFX940-NEXT:    v_and_b32_e32 v2, 0xffff0000, v2
-; GFX940-NEXT:    s_mov_b32 s5, 0x7060302
-; GFX940-NEXT:  .LBB54_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v7, v3
-; GFX940-NEXT:    v_lshlrev_b32_e32 v3, 16, v7
-; GFX940-NEXT:    v_and_b32_e32 v5, 0xffff0000, v7
-; GFX940-NEXT:    v_min_f32_e32 v3, v3, v4
-; GFX940-NEXT:    v_min_f32_e32 v5, v5, v2
-; GFX940-NEXT:    v_bfe_u32 v6, v3, 16, 1
-; GFX940-NEXT:    v_bfe_u32 v9, v5, 16, 1
-; GFX940-NEXT:    v_or_b32_e32 v8, 0x400000, v3
-; GFX940-NEXT:    v_or_b32_e32 v10, 0x400000, v5
-; GFX940-NEXT:    v_add3_u32 v6, v6, v3, s4
-; GFX940-NEXT:    v_add3_u32 v9, v9, v5, s4
-; GFX940-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
-; GFX940-NEXT:    v_cmp_u_f32_e64 s[0:1], v3, v3
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cndmask_b32_e32 v5, v9, v10, vcc
-; GFX940-NEXT:    v_cndmask_b32_e64 v3, v6, v8, s[0:1]
-; GFX940-NEXT:    v_perm_b32 v6, v5, v3, s5
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v3, v[0:1], v[6:7] sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v7
-; GFX940-NEXT:    s_or_b64 s[2:3], vcc, s[2:3]
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[2:3]
-; GFX940-NEXT:    s_cbranch_execnz .LBB54_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[2:3]
-; GFX940-NEXT:    v_mov_b32_e32 v0, v3
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fmin_ret_v2bf16__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    flat_load_dword v3, v[0:1]
+; GFX942-NEXT:    s_mov_b64 s[2:3], 0
+; GFX942-NEXT:    v_lshlrev_b32_e32 v4, 16, v2
+; GFX942-NEXT:    s_movk_i32 s4, 0x7fff
+; GFX942-NEXT:    v_and_b32_e32 v2, 0xffff0000, v2
+; GFX942-NEXT:    s_mov_b32 s5, 0x7060302
+; GFX942-NEXT:  .LBB54_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v7, v3
+; GFX942-NEXT:    v_lshlrev_b32_e32 v3, 16, v7
+; GFX942-NEXT:    v_and_b32_e32 v5, 0xffff0000, v7
+; GFX942-NEXT:    v_min_f32_e32 v3, v3, v4
+; GFX942-NEXT:    v_min_f32_e32 v5, v5, v2
+; GFX942-NEXT:    v_bfe_u32 v6, v3, 16, 1
+; GFX942-NEXT:    v_bfe_u32 v9, v5, 16, 1
+; GFX942-NEXT:    v_or_b32_e32 v8, 0x400000, v3
+; GFX942-NEXT:    v_or_b32_e32 v10, 0x400000, v5
+; GFX942-NEXT:    v_add3_u32 v6, v6, v3, s4
+; GFX942-NEXT:    v_add3_u32 v9, v9, v5, s4
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
+; GFX942-NEXT:    v_cmp_u_f32_e64 s[0:1], v3, v3
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v5, v9, v10, vcc
+; GFX942-NEXT:    v_cndmask_b32_e64 v3, v6, v8, s[0:1]
+; GFX942-NEXT:    v_perm_b32 v6, v5, v3, s5
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v3, v[0:1], v[6:7] sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v7
+; GFX942-NEXT:    s_or_b64 s[2:3], vcc, s[2:3]
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[2:3]
+; GFX942-NEXT:    s_cbranch_execnz .LBB54_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[2:3]
+; GFX942-NEXT:    v_mov_b32_e32 v0, v3
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fmin_ret_v2bf16__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -14588,47 +14588,47 @@ define <2 x bfloat> @flat_agent_atomic_fmin_ret_v2bf16__offset12b_pos__amdgpu_no
 ; GFX12-NEXT:    v_mov_b32_e32 v0, v3
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fmin_ret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    flat_load_dword v3, v[0:1] offset:2044
-; GFX940-NEXT:    s_mov_b64 s[2:3], 0
-; GFX940-NEXT:    v_lshlrev_b32_e32 v4, 16, v2
-; GFX940-NEXT:    s_movk_i32 s4, 0x7fff
-; GFX940-NEXT:    v_and_b32_e32 v2, 0xffff0000, v2
-; GFX940-NEXT:    s_mov_b32 s5, 0x7060302
-; GFX940-NEXT:  .LBB55_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v7, v3
-; GFX940-NEXT:    v_lshlrev_b32_e32 v3, 16, v7
-; GFX940-NEXT:    v_and_b32_e32 v5, 0xffff0000, v7
-; GFX940-NEXT:    v_min_f32_e32 v3, v3, v4
-; GFX940-NEXT:    v_min_f32_e32 v5, v5, v2
-; GFX940-NEXT:    v_bfe_u32 v6, v3, 16, 1
-; GFX940-NEXT:    v_bfe_u32 v9, v5, 16, 1
-; GFX940-NEXT:    v_or_b32_e32 v8, 0x400000, v3
-; GFX940-NEXT:    v_or_b32_e32 v10, 0x400000, v5
-; GFX940-NEXT:    v_add3_u32 v6, v6, v3, s4
-; GFX940-NEXT:    v_add3_u32 v9, v9, v5, s4
-; GFX940-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
-; GFX940-NEXT:    v_cmp_u_f32_e64 s[0:1], v3, v3
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cndmask_b32_e32 v5, v9, v10, vcc
-; GFX940-NEXT:    v_cndmask_b32_e64 v3, v6, v8, s[0:1]
-; GFX940-NEXT:    v_perm_b32 v6, v5, v3, s5
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v3, v[0:1], v[6:7] offset:2044 sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v7
-; GFX940-NEXT:    s_or_b64 s[2:3], vcc, s[2:3]
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[2:3]
-; GFX940-NEXT:    s_cbranch_execnz .LBB55_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[2:3]
-; GFX940-NEXT:    v_mov_b32_e32 v0, v3
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fmin_ret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    flat_load_dword v3, v[0:1] offset:2044
+; GFX942-NEXT:    s_mov_b64 s[2:3], 0
+; GFX942-NEXT:    v_lshlrev_b32_e32 v4, 16, v2
+; GFX942-NEXT:    s_movk_i32 s4, 0x7fff
+; GFX942-NEXT:    v_and_b32_e32 v2, 0xffff0000, v2
+; GFX942-NEXT:    s_mov_b32 s5, 0x7060302
+; GFX942-NEXT:  .LBB55_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v7, v3
+; GFX942-NEXT:    v_lshlrev_b32_e32 v3, 16, v7
+; GFX942-NEXT:    v_and_b32_e32 v5, 0xffff0000, v7
+; GFX942-NEXT:    v_min_f32_e32 v3, v3, v4
+; GFX942-NEXT:    v_min_f32_e32 v5, v5, v2
+; GFX942-NEXT:    v_bfe_u32 v6, v3, 16, 1
+; GFX942-NEXT:    v_bfe_u32 v9, v5, 16, 1
+; GFX942-NEXT:    v_or_b32_e32 v8, 0x400000, v3
+; GFX942-NEXT:    v_or_b32_e32 v10, 0x400000, v5
+; GFX942-NEXT:    v_add3_u32 v6, v6, v3, s4
+; GFX942-NEXT:    v_add3_u32 v9, v9, v5, s4
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
+; GFX942-NEXT:    v_cmp_u_f32_e64 s[0:1], v3, v3
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v5, v9, v10, vcc
+; GFX942-NEXT:    v_cndmask_b32_e64 v3, v6, v8, s[0:1]
+; GFX942-NEXT:    v_perm_b32 v6, v5, v3, s5
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v3, v[0:1], v[6:7] offset:2044 sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v7
+; GFX942-NEXT:    s_or_b64 s[2:3], vcc, s[2:3]
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[2:3]
+; GFX942-NEXT:    s_cbranch_execnz .LBB55_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[2:3]
+; GFX942-NEXT:    v_mov_b32_e32 v0, v3
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fmin_ret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -14937,54 +14937,54 @@ define <2 x bfloat> @flat_agent_atomic_fmin_ret_v2bf16__offset12b_neg__amdgpu_no
 ; GFX12-NEXT:    v_mov_b32_e32 v0, v3
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fmin_ret_v2bf16__offset12b_neg__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v4, v0
-; GFX940-NEXT:    v_mov_b32_e32 v5, v1
-; GFX940-NEXT:    v_add_co_u32_e32 v0, vcc, 0xfffff800, v4
-; GFX940-NEXT:    s_movk_i32 s0, 0xf800
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v5, vcc
-; GFX940-NEXT:    flat_load_dword v0, v[0:1]
-; GFX940-NEXT:    s_mov_b32 s1, -1
-; GFX940-NEXT:    v_lshl_add_u64 v[4:5], v[4:5], 0, s[0:1]
-; GFX940-NEXT:    s_mov_b64 s[2:3], 0
-; GFX940-NEXT:    v_lshlrev_b32_e32 v1, 16, v2
-; GFX940-NEXT:    s_movk_i32 s4, 0x7fff
-; GFX940-NEXT:    v_and_b32_e32 v2, 0xffff0000, v2
-; GFX940-NEXT:    s_mov_b32 s5, 0x7060302
-; GFX940-NEXT:  .LBB56_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v7, v0
-; GFX940-NEXT:    v_lshlrev_b32_e32 v0, 16, v7
-; GFX940-NEXT:    v_and_b32_e32 v3, 0xffff0000, v7
-; GFX940-NEXT:    v_min_f32_e32 v0, v0, v1
-; GFX940-NEXT:    v_min_f32_e32 v3, v3, v2
-; GFX940-NEXT:    v_bfe_u32 v6, v0, 16, 1
-; GFX940-NEXT:    v_bfe_u32 v9, v3, 16, 1
-; GFX940-NEXT:    v_or_b32_e32 v8, 0x400000, v0
-; GFX940-NEXT:    v_or_b32_e32 v10, 0x400000, v3
-; GFX940-NEXT:    v_add3_u32 v6, v6, v0, s4
-; GFX940-NEXT:    v_add3_u32 v9, v9, v3, s4
-; GFX940-NEXT:    v_cmp_u_f32_e32 vcc, v3, v3
-; GFX940-NEXT:    v_cmp_u_f32_e64 s[0:1], v0, v0
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cndmask_b32_e32 v3, v9, v10, vcc
-; GFX940-NEXT:    v_cndmask_b32_e64 v0, v6, v8, s[0:1]
-; GFX940-NEXT:    v_perm_b32 v6, v3, v0, s5
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v0, v[4:5], v[6:7] sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v0, v7
-; GFX940-NEXT:    s_or_b64 s[2:3], vcc, s[2:3]
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[2:3]
-; GFX940-NEXT:    s_cbranch_execnz .LBB56_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[2:3]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fmin_ret_v2bf16__offset12b_neg__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v4, v0
+; GFX942-NEXT:    v_mov_b32_e32 v5, v1
+; GFX942-NEXT:    v_add_co_u32_e32 v0, vcc, 0xfffff800, v4
+; GFX942-NEXT:    s_movk_i32 s0, 0xf800
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v5, vcc
+; GFX942-NEXT:    flat_load_dword v0, v[0:1]
+; GFX942-NEXT:    s_mov_b32 s1, -1
+; GFX942-NEXT:    v_lshl_add_u64 v[4:5], v[4:5], 0, s[0:1]
+; GFX942-NEXT:    s_mov_b64 s[2:3], 0
+; GFX942-NEXT:    v_lshlrev_b32_e32 v1, 16, v2
+; GFX942-NEXT:    s_movk_i32 s4, 0x7fff
+; GFX942-NEXT:    v_and_b32_e32 v2, 0xffff0000, v2
+; GFX942-NEXT:    s_mov_b32 s5, 0x7060302
+; GFX942-NEXT:  .LBB56_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v7, v0
+; GFX942-NEXT:    v_lshlrev_b32_e32 v0, 16, v7
+; GFX942-NEXT:    v_and_b32_e32 v3, 0xffff0000, v7
+; GFX942-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX942-NEXT:    v_min_f32_e32 v3, v3, v2
+; GFX942-NEXT:    v_bfe_u32 v6, v0, 16, 1
+; GFX942-NEXT:    v_bfe_u32 v9, v3, 16, 1
+; GFX942-NEXT:    v_or_b32_e32 v8, 0x400000, v0
+; GFX942-NEXT:    v_or_b32_e32 v10, 0x400000, v3
+; GFX942-NEXT:    v_add3_u32 v6, v6, v0, s4
+; GFX942-NEXT:    v_add3_u32 v9, v9, v3, s4
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v3, v3
+; GFX942-NEXT:    v_cmp_u_f32_e64 s[0:1], v0, v0
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v9, v10, vcc
+; GFX942-NEXT:    v_cndmask_b32_e64 v0, v6, v8, s[0:1]
+; GFX942-NEXT:    v_perm_b32 v6, v3, v0, s5
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v0, v[4:5], v[6:7] sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v0, v7
+; GFX942-NEXT:    s_or_b64 s[2:3], vcc, s[2:3]
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[2:3]
+; GFX942-NEXT:    s_cbranch_execnz .LBB56_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[2:3]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fmin_ret_v2bf16__offset12b_neg__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -15303,46 +15303,46 @@ define void @flat_agent_atomic_fmin_noret_v2bf16__amdgpu_no_fine_grained_memory(
 ; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s1
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fmin_noret_v2bf16__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    flat_load_dword v3, v[0:1]
-; GFX940-NEXT:    s_mov_b64 s[2:3], 0
-; GFX940-NEXT:    v_lshlrev_b32_e32 v4, 16, v2
-; GFX940-NEXT:    s_movk_i32 s4, 0x7fff
-; GFX940-NEXT:    v_and_b32_e32 v5, 0xffff0000, v2
-; GFX940-NEXT:    s_mov_b32 s5, 0x7060302
-; GFX940-NEXT:  .LBB57_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_lshlrev_b32_e32 v2, 16, v3
-; GFX940-NEXT:    v_and_b32_e32 v6, 0xffff0000, v3
-; GFX940-NEXT:    v_min_f32_e32 v2, v2, v4
-; GFX940-NEXT:    v_min_f32_e32 v6, v6, v5
-; GFX940-NEXT:    v_bfe_u32 v7, v2, 16, 1
-; GFX940-NEXT:    v_bfe_u32 v9, v6, 16, 1
-; GFX940-NEXT:    v_or_b32_e32 v8, 0x400000, v2
-; GFX940-NEXT:    v_or_b32_e32 v10, 0x400000, v6
-; GFX940-NEXT:    v_add3_u32 v7, v7, v2, s4
-; GFX940-NEXT:    v_add3_u32 v9, v9, v6, s4
-; GFX940-NEXT:    v_cmp_u_f32_e32 vcc, v6, v6
-; GFX940-NEXT:    v_cmp_u_f32_e64 s[0:1], v2, v2
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cndmask_b32_e32 v6, v9, v10, vcc
-; GFX940-NEXT:    v_cndmask_b32_e64 v2, v7, v8, s[0:1]
-; GFX940-NEXT:    v_perm_b32 v2, v6, v2, s5
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
-; GFX940-NEXT:    s_or_b64 s[2:3], vcc, s[2:3]
-; GFX940-NEXT:    v_mov_b32_e32 v3, v2
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[2:3]
-; GFX940-NEXT:    s_cbranch_execnz .LBB57_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[2:3]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fmin_noret_v2bf16__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    flat_load_dword v3, v[0:1]
+; GFX942-NEXT:    s_mov_b64 s[2:3], 0
+; GFX942-NEXT:    v_lshlrev_b32_e32 v4, 16, v2
+; GFX942-NEXT:    s_movk_i32 s4, 0x7fff
+; GFX942-NEXT:    v_and_b32_e32 v5, 0xffff0000, v2
+; GFX942-NEXT:    s_mov_b32 s5, 0x7060302
+; GFX942-NEXT:  .LBB57_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_lshlrev_b32_e32 v2, 16, v3
+; GFX942-NEXT:    v_and_b32_e32 v6, 0xffff0000, v3
+; GFX942-NEXT:    v_min_f32_e32 v2, v2, v4
+; GFX942-NEXT:    v_min_f32_e32 v6, v6, v5
+; GFX942-NEXT:    v_bfe_u32 v7, v2, 16, 1
+; GFX942-NEXT:    v_bfe_u32 v9, v6, 16, 1
+; GFX942-NEXT:    v_or_b32_e32 v8, 0x400000, v2
+; GFX942-NEXT:    v_or_b32_e32 v10, 0x400000, v6
+; GFX942-NEXT:    v_add3_u32 v7, v7, v2, s4
+; GFX942-NEXT:    v_add3_u32 v9, v9, v6, s4
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v6, v6
+; GFX942-NEXT:    v_cmp_u_f32_e64 s[0:1], v2, v2
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v6, v9, v10, vcc
+; GFX942-NEXT:    v_cndmask_b32_e64 v2, v7, v8, s[0:1]
+; GFX942-NEXT:    v_perm_b32 v2, v6, v2, s5
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX942-NEXT:    s_or_b64 s[2:3], vcc, s[2:3]
+; GFX942-NEXT:    v_mov_b32_e32 v3, v2
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[2:3]
+; GFX942-NEXT:    s_cbranch_execnz .LBB57_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[2:3]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fmin_noret_v2bf16__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -15639,46 +15639,46 @@ define void @flat_agent_atomic_fmin_noret_v2bf16__offset12b_pos__amdgpu_no_fine_
 ; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s1
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fmin_noret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    flat_load_dword v3, v[0:1] offset:2044
-; GFX940-NEXT:    s_mov_b64 s[2:3], 0
-; GFX940-NEXT:    v_lshlrev_b32_e32 v4, 16, v2
-; GFX940-NEXT:    s_movk_i32 s4, 0x7fff
-; GFX940-NEXT:    v_and_b32_e32 v5, 0xffff0000, v2
-; GFX940-NEXT:    s_mov_b32 s5, 0x7060302
-; GFX940-NEXT:  .LBB58_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_lshlrev_b32_e32 v2, 16, v3
-; GFX940-NEXT:    v_and_b32_e32 v6, 0xffff0000, v3
-; GFX940-NEXT:    v_min_f32_e32 v2, v2, v4
-; GFX940-NEXT:    v_min_f32_e32 v6, v6, v5
-; GFX940-NEXT:    v_bfe_u32 v7, v2, 16, 1
-; GFX940-NEXT:    v_bfe_u32 v9, v6, 16, 1
-; GFX940-NEXT:    v_or_b32_e32 v8, 0x400000, v2
-; GFX940-NEXT:    v_or_b32_e32 v10, 0x400000, v6
-; GFX940-NEXT:    v_add3_u32 v7, v7, v2, s4
-; GFX940-NEXT:    v_add3_u32 v9, v9, v6, s4
-; GFX940-NEXT:    v_cmp_u_f32_e32 vcc, v6, v6
-; GFX940-NEXT:    v_cmp_u_f32_e64 s[0:1], v2, v2
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cndmask_b32_e32 v6, v9, v10, vcc
-; GFX940-NEXT:    v_cndmask_b32_e64 v2, v7, v8, s[0:1]
-; GFX940-NEXT:    v_perm_b32 v2, v6, v2, s5
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2044 sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
-; GFX940-NEXT:    s_or_b64 s[2:3], vcc, s[2:3]
-; GFX940-NEXT:    v_mov_b32_e32 v3, v2
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[2:3]
-; GFX940-NEXT:    s_cbranch_execnz .LBB58_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[2:3]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fmin_noret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    flat_load_dword v3, v[0:1] offset:2044
+; GFX942-NEXT:    s_mov_b64 s[2:3], 0
+; GFX942-NEXT:    v_lshlrev_b32_e32 v4, 16, v2
+; GFX942-NEXT:    s_movk_i32 s4, 0x7fff
+; GFX942-NEXT:    v_and_b32_e32 v5, 0xffff0000, v2
+; GFX942-NEXT:    s_mov_b32 s5, 0x7060302
+; GFX942-NEXT:  .LBB58_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_lshlrev_b32_e32 v2, 16, v3
+; GFX942-NEXT:    v_and_b32_e32 v6, 0xffff0000, v3
+; GFX942-NEXT:    v_min_f32_e32 v2, v2, v4
+; GFX942-NEXT:    v_min_f32_e32 v6, v6, v5
+; GFX942-NEXT:    v_bfe_u32 v7, v2, 16, 1
+; GFX942-NEXT:    v_bfe_u32 v9, v6, 16, 1
+; GFX942-NEXT:    v_or_b32_e32 v8, 0x400000, v2
+; GFX942-NEXT:    v_or_b32_e32 v10, 0x400000, v6
+; GFX942-NEXT:    v_add3_u32 v7, v7, v2, s4
+; GFX942-NEXT:    v_add3_u32 v9, v9, v6, s4
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v6, v6
+; GFX942-NEXT:    v_cmp_u_f32_e64 s[0:1], v2, v2
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v6, v9, v10, vcc
+; GFX942-NEXT:    v_cndmask_b32_e64 v2, v7, v8, s[0:1]
+; GFX942-NEXT:    v_perm_b32 v2, v6, v2, s5
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2044 sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX942-NEXT:    s_or_b64 s[2:3], vcc, s[2:3]
+; GFX942-NEXT:    v_mov_b32_e32 v3, v2
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[2:3]
+; GFX942-NEXT:    s_cbranch_execnz .LBB58_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[2:3]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fmin_noret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -15982,52 +15982,52 @@ define void @flat_agent_atomic_fmin_noret_v2bf16__offset12b_neg__amdgpu_no_fine_
 ; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s1
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fmin_noret_v2bf16__offset12b_neg__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_add_co_u32_e32 v4, vcc, 0xfffff800, v0
-; GFX940-NEXT:    s_movk_i32 s0, 0xf800
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_addc_co_u32_e32 v5, vcc, -1, v1, vcc
-; GFX940-NEXT:    flat_load_dword v3, v[4:5]
-; GFX940-NEXT:    s_mov_b32 s1, -1
-; GFX940-NEXT:    v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1]
-; GFX940-NEXT:    s_mov_b64 s[2:3], 0
-; GFX940-NEXT:    v_lshlrev_b32_e32 v4, 16, v2
-; GFX940-NEXT:    s_movk_i32 s4, 0x7fff
-; GFX940-NEXT:    v_and_b32_e32 v5, 0xffff0000, v2
-; GFX940-NEXT:    s_mov_b32 s5, 0x7060302
-; GFX940-NEXT:  .LBB59_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_lshlrev_b32_e32 v2, 16, v3
-; GFX940-NEXT:    v_and_b32_e32 v6, 0xffff0000, v3
-; GFX940-NEXT:    v_min_f32_e32 v2, v2, v4
-; GFX940-NEXT:    v_min_f32_e32 v6, v6, v5
-; GFX940-NEXT:    v_bfe_u32 v7, v2, 16, 1
-; GFX940-NEXT:    v_bfe_u32 v9, v6, 16, 1
-; GFX940-NEXT:    v_or_b32_e32 v8, 0x400000, v2
-; GFX940-NEXT:    v_or_b32_e32 v10, 0x400000, v6
-; GFX940-NEXT:    v_add3_u32 v7, v7, v2, s4
-; GFX940-NEXT:    v_add3_u32 v9, v9, v6, s4
-; GFX940-NEXT:    v_cmp_u_f32_e32 vcc, v6, v6
-; GFX940-NEXT:    v_cmp_u_f32_e64 s[0:1], v2, v2
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cndmask_b32_e32 v6, v9, v10, vcc
-; GFX940-NEXT:    v_cndmask_b32_e64 v2, v7, v8, s[0:1]
-; GFX940-NEXT:    v_perm_b32 v2, v6, v2, s5
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
-; GFX940-NEXT:    s_or_b64 s[2:3], vcc, s[2:3]
-; GFX940-NEXT:    v_mov_b32_e32 v3, v2
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[2:3]
-; GFX940-NEXT:    s_cbranch_execnz .LBB59_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[2:3]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fmin_noret_v2bf16__offset12b_neg__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_add_co_u32_e32 v4, vcc, 0xfffff800, v0
+; GFX942-NEXT:    s_movk_i32 s0, 0xf800
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_addc_co_u32_e32 v5, vcc, -1, v1, vcc
+; GFX942-NEXT:    flat_load_dword v3, v[4:5]
+; GFX942-NEXT:    s_mov_b32 s1, -1
+; GFX942-NEXT:    v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1]
+; GFX942-NEXT:    s_mov_b64 s[2:3], 0
+; GFX942-NEXT:    v_lshlrev_b32_e32 v4, 16, v2
+; GFX942-NEXT:    s_movk_i32 s4, 0x7fff
+; GFX942-NEXT:    v_and_b32_e32 v5, 0xffff0000, v2
+; GFX942-NEXT:    s_mov_b32 s5, 0x7060302
+; GFX942-NEXT:  .LBB59_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_lshlrev_b32_e32 v2, 16, v3
+; GFX942-NEXT:    v_and_b32_e32 v6, 0xffff0000, v3
+; GFX942-NEXT:    v_min_f32_e32 v2, v2, v4
+; GFX942-NEXT:    v_min_f32_e32 v6, v6, v5
+; GFX942-NEXT:    v_bfe_u32 v7, v2, 16, 1
+; GFX942-NEXT:    v_bfe_u32 v9, v6, 16, 1
+; GFX942-NEXT:    v_or_b32_e32 v8, 0x400000, v2
+; GFX942-NEXT:    v_or_b32_e32 v10, 0x400000, v6
+; GFX942-NEXT:    v_add3_u32 v7, v7, v2, s4
+; GFX942-NEXT:    v_add3_u32 v9, v9, v6, s4
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v6, v6
+; GFX942-NEXT:    v_cmp_u_f32_e64 s[0:1], v2, v2
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v6, v9, v10, vcc
+; GFX942-NEXT:    v_cndmask_b32_e64 v2, v7, v8, s[0:1]
+; GFX942-NEXT:    v_perm_b32 v2, v6, v2, s5
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX942-NEXT:    s_or_b64 s[2:3], vcc, s[2:3]
+; GFX942-NEXT:    v_mov_b32_e32 v3, v2
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[2:3]
+; GFX942-NEXT:    s_cbranch_execnz .LBB59_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[2:3]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fmin_noret_v2bf16__offset12b_neg__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -16345,47 +16345,47 @@ define <2 x bfloat> @flat_system_atomic_fmin_ret_v2bf16__offset12b_pos__amdgpu_n
 ; GFX12-NEXT:    v_mov_b32_e32 v0, v3
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_system_atomic_fmin_ret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    flat_load_dword v3, v[0:1] offset:2044
-; GFX940-NEXT:    s_mov_b64 s[2:3], 0
-; GFX940-NEXT:    v_lshlrev_b32_e32 v4, 16, v2
-; GFX940-NEXT:    s_movk_i32 s4, 0x7fff
-; GFX940-NEXT:    v_and_b32_e32 v2, 0xffff0000, v2
-; GFX940-NEXT:    s_mov_b32 s5, 0x7060302
-; GFX940-NEXT:  .LBB60_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v7, v3
-; GFX940-NEXT:    v_lshlrev_b32_e32 v3, 16, v7
-; GFX940-NEXT:    v_and_b32_e32 v5, 0xffff0000, v7
-; GFX940-NEXT:    v_min_f32_e32 v3, v3, v4
-; GFX940-NEXT:    v_min_f32_e32 v5, v5, v2
-; GFX940-NEXT:    v_bfe_u32 v6, v3, 16, 1
-; GFX940-NEXT:    v_bfe_u32 v9, v5, 16, 1
-; GFX940-NEXT:    v_or_b32_e32 v8, 0x400000, v3
-; GFX940-NEXT:    v_or_b32_e32 v10, 0x400000, v5
-; GFX940-NEXT:    v_add3_u32 v6, v6, v3, s4
-; GFX940-NEXT:    v_add3_u32 v9, v9, v5, s4
-; GFX940-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
-; GFX940-NEXT:    v_cmp_u_f32_e64 s[0:1], v3, v3
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cndmask_b32_e32 v5, v9, v10, vcc
-; GFX940-NEXT:    v_cndmask_b32_e64 v3, v6, v8, s[0:1]
-; GFX940-NEXT:    v_perm_b32 v6, v5, v3, s5
-; GFX940-NEXT:    buffer_wbl2 sc0 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v3, v[0:1], v[6:7] offset:2044 sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc0 sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v7
-; GFX940-NEXT:    s_or_b64 s[2:3], vcc, s[2:3]
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[2:3]
-; GFX940-NEXT:    s_cbranch_execnz .LBB60_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[2:3]
-; GFX940-NEXT:    v_mov_b32_e32 v0, v3
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_system_atomic_fmin_ret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    flat_load_dword v3, v[0:1] offset:2044
+; GFX942-NEXT:    s_mov_b64 s[2:3], 0
+; GFX942-NEXT:    v_lshlrev_b32_e32 v4, 16, v2
+; GFX942-NEXT:    s_movk_i32 s4, 0x7fff
+; GFX942-NEXT:    v_and_b32_e32 v2, 0xffff0000, v2
+; GFX942-NEXT:    s_mov_b32 s5, 0x7060302
+; GFX942-NEXT:  .LBB60_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v7, v3
+; GFX942-NEXT:    v_lshlrev_b32_e32 v3, 16, v7
+; GFX942-NEXT:    v_and_b32_e32 v5, 0xffff0000, v7
+; GFX942-NEXT:    v_min_f32_e32 v3, v3, v4
+; GFX942-NEXT:    v_min_f32_e32 v5, v5, v2
+; GFX942-NEXT:    v_bfe_u32 v6, v3, 16, 1
+; GFX942-NEXT:    v_bfe_u32 v9, v5, 16, 1
+; GFX942-NEXT:    v_or_b32_e32 v8, 0x400000, v3
+; GFX942-NEXT:    v_or_b32_e32 v10, 0x400000, v5
+; GFX942-NEXT:    v_add3_u32 v6, v6, v3, s4
+; GFX942-NEXT:    v_add3_u32 v9, v9, v5, s4
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
+; GFX942-NEXT:    v_cmp_u_f32_e64 s[0:1], v3, v3
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v5, v9, v10, vcc
+; GFX942-NEXT:    v_cndmask_b32_e64 v3, v6, v8, s[0:1]
+; GFX942-NEXT:    v_perm_b32 v6, v5, v3, s5
+; GFX942-NEXT:    buffer_wbl2 sc0 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v3, v[0:1], v[6:7] offset:2044 sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc0 sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v7
+; GFX942-NEXT:    s_or_b64 s[2:3], vcc, s[2:3]
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[2:3]
+; GFX942-NEXT:    s_cbranch_execnz .LBB60_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[2:3]
+; GFX942-NEXT:    v_mov_b32_e32 v0, v3
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_system_atomic_fmin_ret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -16696,46 +16696,46 @@ define void @flat_system_atomic_fmin_noret_v2bf16__offset12b_pos__amdgpu_no_fine
 ; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s1
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_system_atomic_fmin_noret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    flat_load_dword v3, v[0:1] offset:2044
-; GFX940-NEXT:    s_mov_b64 s[2:3], 0
-; GFX940-NEXT:    v_lshlrev_b32_e32 v4, 16, v2
-; GFX940-NEXT:    s_movk_i32 s4, 0x7fff
-; GFX940-NEXT:    v_and_b32_e32 v5, 0xffff0000, v2
-; GFX940-NEXT:    s_mov_b32 s5, 0x7060302
-; GFX940-NEXT:  .LBB61_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_lshlrev_b32_e32 v2, 16, v3
-; GFX940-NEXT:    v_and_b32_e32 v6, 0xffff0000, v3
-; GFX940-NEXT:    v_min_f32_e32 v2, v2, v4
-; GFX940-NEXT:    v_min_f32_e32 v6, v6, v5
-; GFX940-NEXT:    v_bfe_u32 v7, v2, 16, 1
-; GFX940-NEXT:    v_bfe_u32 v9, v6, 16, 1
-; GFX940-NEXT:    v_or_b32_e32 v8, 0x400000, v2
-; GFX940-NEXT:    v_or_b32_e32 v10, 0x400000, v6
-; GFX940-NEXT:    v_add3_u32 v7, v7, v2, s4
-; GFX940-NEXT:    v_add3_u32 v9, v9, v6, s4
-; GFX940-NEXT:    v_cmp_u_f32_e32 vcc, v6, v6
-; GFX940-NEXT:    v_cmp_u_f32_e64 s[0:1], v2, v2
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cndmask_b32_e32 v6, v9, v10, vcc
-; GFX940-NEXT:    v_cndmask_b32_e64 v2, v7, v8, s[0:1]
-; GFX940-NEXT:    v_perm_b32 v2, v6, v2, s5
-; GFX940-NEXT:    buffer_wbl2 sc0 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2044 sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc0 sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
-; GFX940-NEXT:    s_or_b64 s[2:3], vcc, s[2:3]
-; GFX940-NEXT:    v_mov_b32_e32 v3, v2
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[2:3]
-; GFX940-NEXT:    s_cbranch_execnz .LBB61_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[2:3]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_system_atomic_fmin_noret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    flat_load_dword v3, v[0:1] offset:2044
+; GFX942-NEXT:    s_mov_b64 s[2:3], 0
+; GFX942-NEXT:    v_lshlrev_b32_e32 v4, 16, v2
+; GFX942-NEXT:    s_movk_i32 s4, 0x7fff
+; GFX942-NEXT:    v_and_b32_e32 v5, 0xffff0000, v2
+; GFX942-NEXT:    s_mov_b32 s5, 0x7060302
+; GFX942-NEXT:  .LBB61_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_lshlrev_b32_e32 v2, 16, v3
+; GFX942-NEXT:    v_and_b32_e32 v6, 0xffff0000, v3
+; GFX942-NEXT:    v_min_f32_e32 v2, v2, v4
+; GFX942-NEXT:    v_min_f32_e32 v6, v6, v5
+; GFX942-NEXT:    v_bfe_u32 v7, v2, 16, 1
+; GFX942-NEXT:    v_bfe_u32 v9, v6, 16, 1
+; GFX942-NEXT:    v_or_b32_e32 v8, 0x400000, v2
+; GFX942-NEXT:    v_or_b32_e32 v10, 0x400000, v6
+; GFX942-NEXT:    v_add3_u32 v7, v7, v2, s4
+; GFX942-NEXT:    v_add3_u32 v9, v9, v6, s4
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v6, v6
+; GFX942-NEXT:    v_cmp_u_f32_e64 s[0:1], v2, v2
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v6, v9, v10, vcc
+; GFX942-NEXT:    v_cndmask_b32_e64 v2, v7, v8, s[0:1]
+; GFX942-NEXT:    v_perm_b32 v2, v6, v2, s5
+; GFX942-NEXT:    buffer_wbl2 sc0 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2044 sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc0 sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX942-NEXT:    s_or_b64 s[2:3], vcc, s[2:3]
+; GFX942-NEXT:    v_mov_b32_e32 v3, v2
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[2:3]
+; GFX942-NEXT:    s_cbranch_execnz .LBB61_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[2:3]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_system_atomic_fmin_noret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
diff --git llvm/test/CodeGen/AMDGPU/flat-atomicrmw-fsub.ll llvm/test/CodeGen/AMDGPU/flat-atomicrmw-fsub.ll
index cd1a16134666..35771e49f387 100644
--- llvm/test/CodeGen/AMDGPU/flat-atomicrmw-fsub.ll
+++ llvm/test/CodeGen/AMDGPU/flat-atomicrmw-fsub.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
 ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 < %s | FileCheck -check-prefix=GFX12 %s
-; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx940 < %s | FileCheck -check-prefix=GFX940 %s
+; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx942 < %s | FileCheck -check-prefix=GFX942 %s
 ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 < %s | FileCheck -check-prefix=GFX11 %s
 ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s
 ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx90a < %s | FileCheck -check-prefix=GFX90A %s
@@ -43,28 +43,28 @@ define float @flat_agent_atomic_fsub_ret_f32(ptr %ptr, float %val) #0 {
 ; GFX12-NEXT:    v_mov_b32_e32 v0, v3
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fsub_ret_f32:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    flat_load_dword v3, v[0:1]
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:  .LBB0_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v5, v3
-; GFX940-NEXT:    v_sub_f32_e32 v4, v5, v2
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v3, v[0:1], v[4:5] sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB0_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v0, v3
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fsub_ret_f32:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    flat_load_dword v3, v[0:1]
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:  .LBB0_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v5, v3
+; GFX942-NEXT:    v_sub_f32_e32 v4, v5, v2
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v3, v[0:1], v[4:5] sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB0_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v0, v3
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fsub_ret_f32:
 ; GFX11:       ; %bb.0:
@@ -238,28 +238,28 @@ define float @flat_agent_atomic_fsub_ret_f32__offset12b_pos(ptr %ptr, float %val
 ; GFX12-NEXT:    v_mov_b32_e32 v0, v3
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fsub_ret_f32__offset12b_pos:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    flat_load_dword v3, v[0:1] offset:2044
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:  .LBB1_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v5, v3
-; GFX940-NEXT:    v_sub_f32_e32 v4, v5, v2
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2044 sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB1_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v0, v3
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fsub_ret_f32__offset12b_pos:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    flat_load_dword v3, v[0:1] offset:2044
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:  .LBB1_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v5, v3
+; GFX942-NEXT:    v_sub_f32_e32 v4, v5, v2
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2044 sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB1_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v0, v3
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fsub_ret_f32__offset12b_pos:
 ; GFX11:       ; %bb.0:
@@ -437,35 +437,35 @@ define float @flat_agent_atomic_fsub_ret_f32__offset12b_neg(ptr %ptr, float %val
 ; GFX12-NEXT:    v_mov_b32_e32 v0, v3
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fsub_ret_f32__offset12b_neg:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v4, v0
-; GFX940-NEXT:    v_mov_b32_e32 v5, v1
-; GFX940-NEXT:    v_add_co_u32_e32 v0, vcc, 0xfffff800, v4
-; GFX940-NEXT:    s_movk_i32 s0, 0xf800
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v5, vcc
-; GFX940-NEXT:    flat_load_dword v0, v[0:1]
-; GFX940-NEXT:    s_mov_b32 s1, -1
-; GFX940-NEXT:    v_lshl_add_u64 v[4:5], v[4:5], 0, s[0:1]
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:  .LBB2_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v1, v0
-; GFX940-NEXT:    v_sub_f32_e32 v0, v1, v2
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v0, v[4:5], v[0:1] sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v0, v1
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB2_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fsub_ret_f32__offset12b_neg:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v4, v0
+; GFX942-NEXT:    v_mov_b32_e32 v5, v1
+; GFX942-NEXT:    v_add_co_u32_e32 v0, vcc, 0xfffff800, v4
+; GFX942-NEXT:    s_movk_i32 s0, 0xf800
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v5, vcc
+; GFX942-NEXT:    flat_load_dword v0, v[0:1]
+; GFX942-NEXT:    s_mov_b32 s1, -1
+; GFX942-NEXT:    v_lshl_add_u64 v[4:5], v[4:5], 0, s[0:1]
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:  .LBB2_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v1, v0
+; GFX942-NEXT:    v_sub_f32_e32 v0, v1, v2
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v0, v[4:5], v[0:1] sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v0, v1
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB2_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fsub_ret_f32__offset12b_neg:
 ; GFX11:       ; %bb.0:
@@ -652,27 +652,27 @@ define void @flat_agent_atomic_fsub_noret_f32(ptr %ptr, float %val) #0 {
 ; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s0
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fsub_noret_f32:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    flat_load_dword v5, v[0:1]
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:  .LBB3_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_sub_f32_e32 v4, v5, v2
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v3, v[0:1], v[4:5] sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v5, v3
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB3_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fsub_noret_f32:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    flat_load_dword v5, v[0:1]
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:  .LBB3_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_sub_f32_e32 v4, v5, v2
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v3, v[0:1], v[4:5] sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v5, v3
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB3_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fsub_noret_f32:
 ; GFX11:       ; %bb.0:
@@ -837,27 +837,27 @@ define void @flat_agent_atomic_fsub_noret_f32__offset12b_pos(ptr %ptr, float %va
 ; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s0
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fsub_noret_f32__offset12b_pos:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    flat_load_dword v5, v[0:1] offset:2044
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:  .LBB4_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_sub_f32_e32 v4, v5, v2
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2044 sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v5, v3
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB4_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fsub_noret_f32__offset12b_pos:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    flat_load_dword v5, v[0:1] offset:2044
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:  .LBB4_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_sub_f32_e32 v4, v5, v2
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2044 sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v5, v3
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB4_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fsub_noret_f32__offset12b_pos:
 ; GFX11:       ; %bb.0:
@@ -1029,33 +1029,33 @@ define void @flat_agent_atomic_fsub_noret_f32__offset12b_neg(ptr %ptr, float %va
 ; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s0
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fsub_noret_f32__offset12b_neg:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_add_co_u32_e32 v4, vcc, 0xfffff800, v0
-; GFX940-NEXT:    s_movk_i32 s0, 0xf800
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_addc_co_u32_e32 v5, vcc, -1, v1, vcc
-; GFX940-NEXT:    flat_load_dword v5, v[4:5]
-; GFX940-NEXT:    s_mov_b32 s1, -1
-; GFX940-NEXT:    v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1]
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:  .LBB5_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_sub_f32_e32 v4, v5, v2
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v3, v[0:1], v[4:5] sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v5, v3
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB5_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fsub_noret_f32__offset12b_neg:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_add_co_u32_e32 v4, vcc, 0xfffff800, v0
+; GFX942-NEXT:    s_movk_i32 s0, 0xf800
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_addc_co_u32_e32 v5, vcc, -1, v1, vcc
+; GFX942-NEXT:    flat_load_dword v5, v[4:5]
+; GFX942-NEXT:    s_mov_b32 s1, -1
+; GFX942-NEXT:    v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1]
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:  .LBB5_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_sub_f32_e32 v4, v5, v2
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v3, v[0:1], v[4:5] sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v5, v3
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB5_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fsub_noret_f32__offset12b_neg:
 ; GFX11:       ; %bb.0:
@@ -1242,28 +1242,28 @@ define float @flat_system_atomic_fsub_ret_f32__offset12b_pos(ptr %ptr, float %va
 ; GFX12-NEXT:    v_mov_b32_e32 v0, v3
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_system_atomic_fsub_ret_f32__offset12b_pos:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    flat_load_dword v3, v[0:1] offset:2044
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:  .LBB6_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v5, v3
-; GFX940-NEXT:    v_sub_f32_e32 v4, v5, v2
-; GFX940-NEXT:    buffer_wbl2 sc0 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2044 sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc0 sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB6_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v0, v3
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_system_atomic_fsub_ret_f32__offset12b_pos:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    flat_load_dword v3, v[0:1] offset:2044
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:  .LBB6_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v5, v3
+; GFX942-NEXT:    v_sub_f32_e32 v4, v5, v2
+; GFX942-NEXT:    buffer_wbl2 sc0 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2044 sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc0 sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB6_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v0, v3
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_system_atomic_fsub_ret_f32__offset12b_pos:
 ; GFX11:       ; %bb.0:
@@ -1442,27 +1442,27 @@ define void @flat_system_atomic_fsub_noret_f32__offset12b_pos(ptr %ptr, float %v
 ; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s0
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_system_atomic_fsub_noret_f32__offset12b_pos:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    flat_load_dword v5, v[0:1] offset:2044
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:  .LBB7_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_sub_f32_e32 v4, v5, v2
-; GFX940-NEXT:    buffer_wbl2 sc0 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2044 sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc0 sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v5, v3
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB7_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_system_atomic_fsub_noret_f32__offset12b_pos:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    flat_load_dword v5, v[0:1] offset:2044
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:  .LBB7_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_sub_f32_e32 v4, v5, v2
+; GFX942-NEXT:    buffer_wbl2 sc0 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2044 sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc0 sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v5, v3
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB7_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_system_atomic_fsub_noret_f32__offset12b_pos:
 ; GFX11:       ; %bb.0:
@@ -1642,28 +1642,28 @@ define float @flat_agent_atomic_fsub_ret_f32__ftz(ptr %ptr, float %val) #1 {
 ; GFX12-NEXT:    v_mov_b32_e32 v0, v3
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fsub_ret_f32__ftz:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    flat_load_dword v3, v[0:1]
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:  .LBB8_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v5, v3
-; GFX940-NEXT:    v_sub_f32_e32 v4, v5, v2
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v3, v[0:1], v[4:5] sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB8_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v0, v3
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fsub_ret_f32__ftz:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    flat_load_dword v3, v[0:1]
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:  .LBB8_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v5, v3
+; GFX942-NEXT:    v_sub_f32_e32 v4, v5, v2
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v3, v[0:1], v[4:5] sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB8_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v0, v3
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fsub_ret_f32__ftz:
 ; GFX11:       ; %bb.0:
@@ -1837,28 +1837,28 @@ define float @flat_agent_atomic_fsub_ret_f32__offset12b_pos__ftz(ptr %ptr, float
 ; GFX12-NEXT:    v_mov_b32_e32 v0, v3
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fsub_ret_f32__offset12b_pos__ftz:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    flat_load_dword v3, v[0:1] offset:2044
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:  .LBB9_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v5, v3
-; GFX940-NEXT:    v_sub_f32_e32 v4, v5, v2
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2044 sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB9_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v0, v3
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fsub_ret_f32__offset12b_pos__ftz:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    flat_load_dword v3, v[0:1] offset:2044
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:  .LBB9_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v5, v3
+; GFX942-NEXT:    v_sub_f32_e32 v4, v5, v2
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2044 sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB9_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v0, v3
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fsub_ret_f32__offset12b_pos__ftz:
 ; GFX11:       ; %bb.0:
@@ -2036,35 +2036,35 @@ define float @flat_agent_atomic_fsub_ret_f32__offset12b_neg__ftz(ptr %ptr, float
 ; GFX12-NEXT:    v_mov_b32_e32 v0, v3
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fsub_ret_f32__offset12b_neg__ftz:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v4, v0
-; GFX940-NEXT:    v_mov_b32_e32 v5, v1
-; GFX940-NEXT:    v_add_co_u32_e32 v0, vcc, 0xfffff800, v4
-; GFX940-NEXT:    s_movk_i32 s0, 0xf800
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v5, vcc
-; GFX940-NEXT:    flat_load_dword v0, v[0:1]
-; GFX940-NEXT:    s_mov_b32 s1, -1
-; GFX940-NEXT:    v_lshl_add_u64 v[4:5], v[4:5], 0, s[0:1]
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:  .LBB10_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v1, v0
-; GFX940-NEXT:    v_sub_f32_e32 v0, v1, v2
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v0, v[4:5], v[0:1] sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v0, v1
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB10_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fsub_ret_f32__offset12b_neg__ftz:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v4, v0
+; GFX942-NEXT:    v_mov_b32_e32 v5, v1
+; GFX942-NEXT:    v_add_co_u32_e32 v0, vcc, 0xfffff800, v4
+; GFX942-NEXT:    s_movk_i32 s0, 0xf800
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v5, vcc
+; GFX942-NEXT:    flat_load_dword v0, v[0:1]
+; GFX942-NEXT:    s_mov_b32 s1, -1
+; GFX942-NEXT:    v_lshl_add_u64 v[4:5], v[4:5], 0, s[0:1]
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:  .LBB10_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v1, v0
+; GFX942-NEXT:    v_sub_f32_e32 v0, v1, v2
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v0, v[4:5], v[0:1] sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v0, v1
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB10_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fsub_ret_f32__offset12b_neg__ftz:
 ; GFX11:       ; %bb.0:
@@ -2251,27 +2251,27 @@ define void @flat_agent_atomic_fsub_noret_f32__ftz(ptr %ptr, float %val) #1 {
 ; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s0
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fsub_noret_f32__ftz:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    flat_load_dword v5, v[0:1]
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:  .LBB11_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_sub_f32_e32 v4, v5, v2
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v3, v[0:1], v[4:5] sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v5, v3
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB11_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fsub_noret_f32__ftz:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    flat_load_dword v5, v[0:1]
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:  .LBB11_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_sub_f32_e32 v4, v5, v2
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v3, v[0:1], v[4:5] sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v5, v3
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB11_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fsub_noret_f32__ftz:
 ; GFX11:       ; %bb.0:
@@ -2436,27 +2436,27 @@ define void @flat_agent_atomic_fsub_noret_f32__offset12b_pos__ftz(ptr %ptr, floa
 ; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s0
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fsub_noret_f32__offset12b_pos__ftz:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    flat_load_dword v5, v[0:1] offset:2044
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:  .LBB12_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_sub_f32_e32 v4, v5, v2
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2044 sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v5, v3
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB12_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fsub_noret_f32__offset12b_pos__ftz:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    flat_load_dword v5, v[0:1] offset:2044
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:  .LBB12_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_sub_f32_e32 v4, v5, v2
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2044 sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v5, v3
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB12_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fsub_noret_f32__offset12b_pos__ftz:
 ; GFX11:       ; %bb.0:
@@ -2628,33 +2628,33 @@ define void @flat_agent_atomic_fsub_noret_f32__offset12b_neg__ftz(ptr %ptr, floa
 ; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s0
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fsub_noret_f32__offset12b_neg__ftz:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_add_co_u32_e32 v4, vcc, 0xfffff800, v0
-; GFX940-NEXT:    s_movk_i32 s0, 0xf800
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_addc_co_u32_e32 v5, vcc, -1, v1, vcc
-; GFX940-NEXT:    flat_load_dword v5, v[4:5]
-; GFX940-NEXT:    s_mov_b32 s1, -1
-; GFX940-NEXT:    v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1]
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:  .LBB13_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_sub_f32_e32 v4, v5, v2
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v3, v[0:1], v[4:5] sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v5, v3
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB13_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fsub_noret_f32__offset12b_neg__ftz:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_add_co_u32_e32 v4, vcc, 0xfffff800, v0
+; GFX942-NEXT:    s_movk_i32 s0, 0xf800
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_addc_co_u32_e32 v5, vcc, -1, v1, vcc
+; GFX942-NEXT:    flat_load_dword v5, v[4:5]
+; GFX942-NEXT:    s_mov_b32 s1, -1
+; GFX942-NEXT:    v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1]
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:  .LBB13_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_sub_f32_e32 v4, v5, v2
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v3, v[0:1], v[4:5] sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v5, v3
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB13_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fsub_noret_f32__offset12b_neg__ftz:
 ; GFX11:       ; %bb.0:
@@ -2841,28 +2841,28 @@ define float @flat_system_atomic_fsub_ret_f32__offset12b_pos__ftz(ptr %ptr, floa
 ; GFX12-NEXT:    v_mov_b32_e32 v0, v3
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_system_atomic_fsub_ret_f32__offset12b_pos__ftz:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    flat_load_dword v3, v[0:1] offset:2044
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:  .LBB14_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v5, v3
-; GFX940-NEXT:    v_sub_f32_e32 v4, v5, v2
-; GFX940-NEXT:    buffer_wbl2 sc0 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2044 sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc0 sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB14_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v0, v3
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_system_atomic_fsub_ret_f32__offset12b_pos__ftz:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    flat_load_dword v3, v[0:1] offset:2044
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:  .LBB14_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v5, v3
+; GFX942-NEXT:    v_sub_f32_e32 v4, v5, v2
+; GFX942-NEXT:    buffer_wbl2 sc0 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2044 sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc0 sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB14_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v0, v3
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_system_atomic_fsub_ret_f32__offset12b_pos__ftz:
 ; GFX11:       ; %bb.0:
@@ -3041,27 +3041,27 @@ define void @flat_system_atomic_fsub_noret_f32__offset12b_pos__ftz(ptr %ptr, flo
 ; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s0
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_system_atomic_fsub_noret_f32__offset12b_pos__ftz:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    flat_load_dword v5, v[0:1] offset:2044
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:  .LBB15_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_sub_f32_e32 v4, v5, v2
-; GFX940-NEXT:    buffer_wbl2 sc0 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2044 sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc0 sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v5, v3
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB15_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_system_atomic_fsub_noret_f32__offset12b_pos__ftz:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    flat_load_dword v5, v[0:1] offset:2044
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:  .LBB15_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_sub_f32_e32 v4, v5, v2
+; GFX942-NEXT:    buffer_wbl2 sc0 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2044 sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc0 sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v5, v3
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB15_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_system_atomic_fsub_noret_f32__offset12b_pos__ftz:
 ; GFX11:       ; %bb.0:
@@ -3266,52 +3266,52 @@ define double @flat_agent_atomic_fsub_ret_f64(ptr %ptr, double %val) #0 {
 ; GFX12-NEXT:    v_dual_mov_b32 v0, v4 :: v_dual_mov_b32 v1, v5
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fsub_ret_f64:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    s_mov_b64 s[0:1], src_private_base
-; GFX940-NEXT:    v_cmp_ne_u32_e32 vcc, s1, v1
-; GFX940-NEXT:    ; implicit-def: $vgpr4_vgpr5
-; GFX940-NEXT:    s_and_saveexec_b64 s[0:1], vcc
-; GFX940-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execz .LBB16_4
-; GFX940-NEXT:  ; %bb.1: ; %atomicrmw.global
-; GFX940-NEXT:    flat_load_dwordx2 v[4:5], v[0:1]
-; GFX940-NEXT:    s_mov_b64 s[2:3], 0
-; GFX940-NEXT:  .LBB16_2: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b64_e32 v[6:7], v[4:5]
-; GFX940-NEXT:    v_add_f64 v[4:5], v[6:7], -v[2:3]
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7]
-; GFX940-NEXT:    s_or_b64 s[2:3], vcc, s[2:3]
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[2:3]
-; GFX940-NEXT:    s_cbranch_execnz .LBB16_2
-; GFX940-NEXT:  ; %bb.3: ; %Flow
-; GFX940-NEXT:    s_or_b64 exec, exec, s[2:3]
-; GFX940-NEXT:    ; implicit-def: $vgpr0_vgpr1
-; GFX940-NEXT:    ; implicit-def: $vgpr2_vgpr3
-; GFX940-NEXT:  .LBB16_4: ; %Flow3
-; GFX940-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
-; GFX940-NEXT:    s_cbranch_execz .LBB16_6
-; GFX940-NEXT:  ; %bb.5: ; %atomicrmw.private
-; GFX940-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[0:1]
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v6, -1, v0, vcc
-; GFX940-NEXT:    scratch_load_dwordx2 v[4:5], v6, off
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_add_f64 v[0:1], v[4:5], -v[2:3]
-; GFX940-NEXT:    scratch_store_dwordx2 v6, v[0:1], off sc0 sc1
-; GFX940-NEXT:  .LBB16_6: ; %atomicrmw.phi
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v0, v4
-; GFX940-NEXT:    v_mov_b32_e32 v1, v5
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fsub_ret_f64:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    s_mov_b64 s[0:1], src_private_base
+; GFX942-NEXT:    v_cmp_ne_u32_e32 vcc, s1, v1
+; GFX942-NEXT:    ; implicit-def: $vgpr4_vgpr5
+; GFX942-NEXT:    s_and_saveexec_b64 s[0:1], vcc
+; GFX942-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execz .LBB16_4
+; GFX942-NEXT:  ; %bb.1: ; %atomicrmw.global
+; GFX942-NEXT:    flat_load_dwordx2 v[4:5], v[0:1]
+; GFX942-NEXT:    s_mov_b64 s[2:3], 0
+; GFX942-NEXT:  .LBB16_2: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b64_e32 v[6:7], v[4:5]
+; GFX942-NEXT:    v_add_f64 v[4:5], v[6:7], -v[2:3]
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7]
+; GFX942-NEXT:    s_or_b64 s[2:3], vcc, s[2:3]
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[2:3]
+; GFX942-NEXT:    s_cbranch_execnz .LBB16_2
+; GFX942-NEXT:  ; %bb.3: ; %Flow
+; GFX942-NEXT:    s_or_b64 exec, exec, s[2:3]
+; GFX942-NEXT:    ; implicit-def: $vgpr0_vgpr1
+; GFX942-NEXT:    ; implicit-def: $vgpr2_vgpr3
+; GFX942-NEXT:  .LBB16_4: ; %Flow3
+; GFX942-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
+; GFX942-NEXT:    s_cbranch_execz .LBB16_6
+; GFX942-NEXT:  ; %bb.5: ; %atomicrmw.private
+; GFX942-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[0:1]
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v6, -1, v0, vcc
+; GFX942-NEXT:    scratch_load_dwordx2 v[4:5], v6, off
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_add_f64 v[0:1], v[4:5], -v[2:3]
+; GFX942-NEXT:    scratch_store_dwordx2 v6, v[0:1], off sc0 sc1
+; GFX942-NEXT:  .LBB16_6: ; %atomicrmw.phi
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v0, v4
+; GFX942-NEXT:    v_mov_b32_e32 v1, v5
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fsub_ret_f64:
 ; GFX11:       ; %bb.0:
@@ -3682,56 +3682,56 @@ define double @flat_agent_atomic_fsub_ret_f64__offset12b_pos(ptr %ptr, double %v
 ; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s0
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fsub_ret_f64__offset12b_pos:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0x7f8
-; GFX940-NEXT:    v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1]
-; GFX940-NEXT:    s_mov_b64 s[0:1], src_private_base
-; GFX940-NEXT:    v_cmp_ne_u32_e32 vcc, s1, v5
-; GFX940-NEXT:    ; implicit-def: $vgpr0_vgpr1
-; GFX940-NEXT:    s_and_saveexec_b64 s[0:1], vcc
-; GFX940-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB17_3
-; GFX940-NEXT:  ; %bb.1: ; %Flow3
-; GFX940-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB17_6
-; GFX940-NEXT:  .LBB17_2: ; %atomicrmw.phi
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
-; GFX940-NEXT:  .LBB17_3: ; %atomicrmw.global
-; GFX940-NEXT:    flat_load_dwordx2 v[0:1], v[4:5]
-; GFX940-NEXT:    s_mov_b64 s[2:3], 0
-; GFX940-NEXT:  .LBB17_4: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b64_e32 v[8:9], v[0:1]
-; GFX940-NEXT:    v_add_f64 v[6:7], v[8:9], -v[2:3]
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[6:9] sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[8:9]
-; GFX940-NEXT:    s_or_b64 s[2:3], vcc, s[2:3]
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[2:3]
-; GFX940-NEXT:    s_cbranch_execnz .LBB17_4
-; GFX940-NEXT:  ; %bb.5: ; %Flow
-; GFX940-NEXT:    s_or_b64 exec, exec, s[2:3]
-; GFX940-NEXT:    ; implicit-def: $vgpr4_vgpr5
-; GFX940-NEXT:    ; implicit-def: $vgpr2_vgpr3
-; GFX940-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
-; GFX940-NEXT:    s_cbranch_execz .LBB17_2
-; GFX940-NEXT:  .LBB17_6: ; %atomicrmw.private
-; GFX940-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[4:5]
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v4, -1, v4, vcc
-; GFX940-NEXT:    scratch_load_dwordx2 v[0:1], v4, off
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_add_f64 v[2:3], v[0:1], -v[2:3]
-; GFX940-NEXT:    scratch_store_dwordx2 v4, v[2:3], off sc0 sc1
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fsub_ret_f64__offset12b_pos:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0x7f8
+; GFX942-NEXT:    v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1]
+; GFX942-NEXT:    s_mov_b64 s[0:1], src_private_base
+; GFX942-NEXT:    v_cmp_ne_u32_e32 vcc, s1, v5
+; GFX942-NEXT:    ; implicit-def: $vgpr0_vgpr1
+; GFX942-NEXT:    s_and_saveexec_b64 s[0:1], vcc
+; GFX942-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB17_3
+; GFX942-NEXT:  ; %bb.1: ; %Flow3
+; GFX942-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB17_6
+; GFX942-NEXT:  .LBB17_2: ; %atomicrmw.phi
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-NEXT:  .LBB17_3: ; %atomicrmw.global
+; GFX942-NEXT:    flat_load_dwordx2 v[0:1], v[4:5]
+; GFX942-NEXT:    s_mov_b64 s[2:3], 0
+; GFX942-NEXT:  .LBB17_4: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b64_e32 v[8:9], v[0:1]
+; GFX942-NEXT:    v_add_f64 v[6:7], v[8:9], -v[2:3]
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[6:9] sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[8:9]
+; GFX942-NEXT:    s_or_b64 s[2:3], vcc, s[2:3]
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[2:3]
+; GFX942-NEXT:    s_cbranch_execnz .LBB17_4
+; GFX942-NEXT:  ; %bb.5: ; %Flow
+; GFX942-NEXT:    s_or_b64 exec, exec, s[2:3]
+; GFX942-NEXT:    ; implicit-def: $vgpr4_vgpr5
+; GFX942-NEXT:    ; implicit-def: $vgpr2_vgpr3
+; GFX942-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
+; GFX942-NEXT:    s_cbranch_execz .LBB17_2
+; GFX942-NEXT:  .LBB17_6: ; %atomicrmw.private
+; GFX942-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[4:5]
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v4, -1, v4, vcc
+; GFX942-NEXT:    scratch_load_dwordx2 v[0:1], v4, off
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_add_f64 v[2:3], v[0:1], -v[2:3]
+; GFX942-NEXT:    scratch_store_dwordx2 v4, v[2:3], off sc0 sc1
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fsub_ret_f64__offset12b_pos:
 ; GFX11:       ; %bb.0:
@@ -4129,57 +4129,57 @@ define double @flat_agent_atomic_fsub_ret_f64__offset12b_neg(ptr %ptr, double %v
 ; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s0
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fsub_ret_f64__offset12b_neg:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    s_movk_i32 s0, 0xf800
-; GFX940-NEXT:    s_mov_b32 s1, -1
-; GFX940-NEXT:    v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1]
-; GFX940-NEXT:    s_mov_b64 s[0:1], src_private_base
-; GFX940-NEXT:    v_cmp_ne_u32_e32 vcc, s1, v5
-; GFX940-NEXT:    ; implicit-def: $vgpr0_vgpr1
-; GFX940-NEXT:    s_and_saveexec_b64 s[0:1], vcc
-; GFX940-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB18_3
-; GFX940-NEXT:  ; %bb.1: ; %Flow3
-; GFX940-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB18_6
-; GFX940-NEXT:  .LBB18_2: ; %atomicrmw.phi
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
-; GFX940-NEXT:  .LBB18_3: ; %atomicrmw.global
-; GFX940-NEXT:    flat_load_dwordx2 v[0:1], v[4:5]
-; GFX940-NEXT:    s_mov_b64 s[2:3], 0
-; GFX940-NEXT:  .LBB18_4: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b64_e32 v[8:9], v[0:1]
-; GFX940-NEXT:    v_add_f64 v[6:7], v[8:9], -v[2:3]
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[6:9] sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[8:9]
-; GFX940-NEXT:    s_or_b64 s[2:3], vcc, s[2:3]
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[2:3]
-; GFX940-NEXT:    s_cbranch_execnz .LBB18_4
-; GFX940-NEXT:  ; %bb.5: ; %Flow
-; GFX940-NEXT:    s_or_b64 exec, exec, s[2:3]
-; GFX940-NEXT:    ; implicit-def: $vgpr4_vgpr5
-; GFX940-NEXT:    ; implicit-def: $vgpr2_vgpr3
-; GFX940-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
-; GFX940-NEXT:    s_cbranch_execz .LBB18_2
-; GFX940-NEXT:  .LBB18_6: ; %atomicrmw.private
-; GFX940-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[4:5]
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v4, -1, v4, vcc
-; GFX940-NEXT:    scratch_load_dwordx2 v[0:1], v4, off
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_add_f64 v[2:3], v[0:1], -v[2:3]
-; GFX940-NEXT:    scratch_store_dwordx2 v4, v[2:3], off sc0 sc1
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fsub_ret_f64__offset12b_neg:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    s_movk_i32 s0, 0xf800
+; GFX942-NEXT:    s_mov_b32 s1, -1
+; GFX942-NEXT:    v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1]
+; GFX942-NEXT:    s_mov_b64 s[0:1], src_private_base
+; GFX942-NEXT:    v_cmp_ne_u32_e32 vcc, s1, v5
+; GFX942-NEXT:    ; implicit-def: $vgpr0_vgpr1
+; GFX942-NEXT:    s_and_saveexec_b64 s[0:1], vcc
+; GFX942-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB18_3
+; GFX942-NEXT:  ; %bb.1: ; %Flow3
+; GFX942-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB18_6
+; GFX942-NEXT:  .LBB18_2: ; %atomicrmw.phi
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-NEXT:  .LBB18_3: ; %atomicrmw.global
+; GFX942-NEXT:    flat_load_dwordx2 v[0:1], v[4:5]
+; GFX942-NEXT:    s_mov_b64 s[2:3], 0
+; GFX942-NEXT:  .LBB18_4: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b64_e32 v[8:9], v[0:1]
+; GFX942-NEXT:    v_add_f64 v[6:7], v[8:9], -v[2:3]
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[6:9] sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[8:9]
+; GFX942-NEXT:    s_or_b64 s[2:3], vcc, s[2:3]
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[2:3]
+; GFX942-NEXT:    s_cbranch_execnz .LBB18_4
+; GFX942-NEXT:  ; %bb.5: ; %Flow
+; GFX942-NEXT:    s_or_b64 exec, exec, s[2:3]
+; GFX942-NEXT:    ; implicit-def: $vgpr4_vgpr5
+; GFX942-NEXT:    ; implicit-def: $vgpr2_vgpr3
+; GFX942-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
+; GFX942-NEXT:    s_cbranch_execz .LBB18_2
+; GFX942-NEXT:  .LBB18_6: ; %atomicrmw.private
+; GFX942-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[4:5]
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v4, -1, v4, vcc
+; GFX942-NEXT:    scratch_load_dwordx2 v[0:1], v4, off
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_add_f64 v[2:3], v[0:1], -v[2:3]
+; GFX942-NEXT:    scratch_store_dwordx2 v4, v[2:3], off sc0 sc1
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fsub_ret_f64__offset12b_neg:
 ; GFX11:       ; %bb.0:
@@ -4570,53 +4570,53 @@ define void @flat_agent_atomic_fsub_noret_f64(ptr %ptr, double %val) #0 {
 ; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s0
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fsub_noret_f64:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    s_mov_b64 s[0:1], src_private_base
-; GFX940-NEXT:    v_cmp_ne_u32_e32 vcc, s1, v1
-; GFX940-NEXT:    s_and_saveexec_b64 s[0:1], vcc
-; GFX940-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB19_3
-; GFX940-NEXT:  ; %bb.1: ; %Flow3
-; GFX940-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB19_6
-; GFX940-NEXT:  .LBB19_2: ; %atomicrmw.phi
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
-; GFX940-NEXT:  .LBB19_3: ; %atomicrmw.global
-; GFX940-NEXT:    flat_load_dwordx2 v[6:7], v[0:1]
-; GFX940-NEXT:    s_mov_b64 s[2:3], 0
-; GFX940-NEXT:  .LBB19_4: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_add_f64 v[4:5], v[6:7], -v[2:3]
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7]
-; GFX940-NEXT:    s_or_b64 s[2:3], vcc, s[2:3]
-; GFX940-NEXT:    v_mov_b64_e32 v[6:7], v[4:5]
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[2:3]
-; GFX940-NEXT:    s_cbranch_execnz .LBB19_4
-; GFX940-NEXT:  ; %bb.5: ; %Flow
-; GFX940-NEXT:    s_or_b64 exec, exec, s[2:3]
-; GFX940-NEXT:    ; implicit-def: $vgpr0_vgpr1
-; GFX940-NEXT:    ; implicit-def: $vgpr2_vgpr3
-; GFX940-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
-; GFX940-NEXT:    s_cbranch_execz .LBB19_2
-; GFX940-NEXT:  .LBB19_6: ; %atomicrmw.private
-; GFX940-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[0:1]
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v4, -1, v0, vcc
-; GFX940-NEXT:    scratch_load_dwordx2 v[0:1], v4, off
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_add_f64 v[0:1], v[0:1], -v[2:3]
-; GFX940-NEXT:    scratch_store_dwordx2 v4, v[0:1], off sc0 sc1
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fsub_noret_f64:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    s_mov_b64 s[0:1], src_private_base
+; GFX942-NEXT:    v_cmp_ne_u32_e32 vcc, s1, v1
+; GFX942-NEXT:    s_and_saveexec_b64 s[0:1], vcc
+; GFX942-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB19_3
+; GFX942-NEXT:  ; %bb.1: ; %Flow3
+; GFX942-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB19_6
+; GFX942-NEXT:  .LBB19_2: ; %atomicrmw.phi
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-NEXT:  .LBB19_3: ; %atomicrmw.global
+; GFX942-NEXT:    flat_load_dwordx2 v[6:7], v[0:1]
+; GFX942-NEXT:    s_mov_b64 s[2:3], 0
+; GFX942-NEXT:  .LBB19_4: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_add_f64 v[4:5], v[6:7], -v[2:3]
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7]
+; GFX942-NEXT:    s_or_b64 s[2:3], vcc, s[2:3]
+; GFX942-NEXT:    v_mov_b64_e32 v[6:7], v[4:5]
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[2:3]
+; GFX942-NEXT:    s_cbranch_execnz .LBB19_4
+; GFX942-NEXT:  ; %bb.5: ; %Flow
+; GFX942-NEXT:    s_or_b64 exec, exec, s[2:3]
+; GFX942-NEXT:    ; implicit-def: $vgpr0_vgpr1
+; GFX942-NEXT:    ; implicit-def: $vgpr2_vgpr3
+; GFX942-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
+; GFX942-NEXT:    s_cbranch_execz .LBB19_2
+; GFX942-NEXT:  .LBB19_6: ; %atomicrmw.private
+; GFX942-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[0:1]
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v4, -1, v0, vcc
+; GFX942-NEXT:    scratch_load_dwordx2 v[0:1], v4, off
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_add_f64 v[0:1], v[0:1], -v[2:3]
+; GFX942-NEXT:    scratch_store_dwordx2 v4, v[0:1], off sc0 sc1
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fsub_noret_f64:
 ; GFX11:       ; %bb.0:
@@ -4991,55 +4991,55 @@ define void @flat_agent_atomic_fsub_noret_f64__offset12b_pos(ptr %ptr, double %v
 ; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s0
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fsub_noret_f64__offset12b_pos:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0x7f8
-; GFX940-NEXT:    v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1]
-; GFX940-NEXT:    s_mov_b64 s[0:1], src_private_base
-; GFX940-NEXT:    v_cmp_ne_u32_e32 vcc, s1, v1
-; GFX940-NEXT:    s_and_saveexec_b64 s[0:1], vcc
-; GFX940-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB20_3
-; GFX940-NEXT:  ; %bb.1: ; %Flow3
-; GFX940-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB20_6
-; GFX940-NEXT:  .LBB20_2: ; %atomicrmw.phi
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
-; GFX940-NEXT:  .LBB20_3: ; %atomicrmw.global
-; GFX940-NEXT:    flat_load_dwordx2 v[6:7], v[0:1]
-; GFX940-NEXT:    s_mov_b64 s[2:3], 0
-; GFX940-NEXT:  .LBB20_4: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_add_f64 v[4:5], v[6:7], -v[2:3]
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7]
-; GFX940-NEXT:    s_or_b64 s[2:3], vcc, s[2:3]
-; GFX940-NEXT:    v_mov_b64_e32 v[6:7], v[4:5]
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[2:3]
-; GFX940-NEXT:    s_cbranch_execnz .LBB20_4
-; GFX940-NEXT:  ; %bb.5: ; %Flow
-; GFX940-NEXT:    s_or_b64 exec, exec, s[2:3]
-; GFX940-NEXT:    ; implicit-def: $vgpr0_vgpr1
-; GFX940-NEXT:    ; implicit-def: $vgpr2_vgpr3
-; GFX940-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
-; GFX940-NEXT:    s_cbranch_execz .LBB20_2
-; GFX940-NEXT:  .LBB20_6: ; %atomicrmw.private
-; GFX940-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[0:1]
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v4, -1, v0, vcc
-; GFX940-NEXT:    scratch_load_dwordx2 v[0:1], v4, off
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_add_f64 v[0:1], v[0:1], -v[2:3]
-; GFX940-NEXT:    scratch_store_dwordx2 v4, v[0:1], off sc0 sc1
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fsub_noret_f64__offset12b_pos:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0x7f8
+; GFX942-NEXT:    v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1]
+; GFX942-NEXT:    s_mov_b64 s[0:1], src_private_base
+; GFX942-NEXT:    v_cmp_ne_u32_e32 vcc, s1, v1
+; GFX942-NEXT:    s_and_saveexec_b64 s[0:1], vcc
+; GFX942-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB20_3
+; GFX942-NEXT:  ; %bb.1: ; %Flow3
+; GFX942-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB20_6
+; GFX942-NEXT:  .LBB20_2: ; %atomicrmw.phi
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-NEXT:  .LBB20_3: ; %atomicrmw.global
+; GFX942-NEXT:    flat_load_dwordx2 v[6:7], v[0:1]
+; GFX942-NEXT:    s_mov_b64 s[2:3], 0
+; GFX942-NEXT:  .LBB20_4: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_add_f64 v[4:5], v[6:7], -v[2:3]
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7]
+; GFX942-NEXT:    s_or_b64 s[2:3], vcc, s[2:3]
+; GFX942-NEXT:    v_mov_b64_e32 v[6:7], v[4:5]
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[2:3]
+; GFX942-NEXT:    s_cbranch_execnz .LBB20_4
+; GFX942-NEXT:  ; %bb.5: ; %Flow
+; GFX942-NEXT:    s_or_b64 exec, exec, s[2:3]
+; GFX942-NEXT:    ; implicit-def: $vgpr0_vgpr1
+; GFX942-NEXT:    ; implicit-def: $vgpr2_vgpr3
+; GFX942-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
+; GFX942-NEXT:    s_cbranch_execz .LBB20_2
+; GFX942-NEXT:  .LBB20_6: ; %atomicrmw.private
+; GFX942-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[0:1]
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v4, -1, v0, vcc
+; GFX942-NEXT:    scratch_load_dwordx2 v[0:1], v4, off
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_add_f64 v[0:1], v[0:1], -v[2:3]
+; GFX942-NEXT:    scratch_store_dwordx2 v4, v[0:1], off sc0 sc1
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fsub_noret_f64__offset12b_pos:
 ; GFX11:       ; %bb.0:
@@ -5428,56 +5428,56 @@ define void @flat_agent_atomic_fsub_noret_f64__offset12b_neg(ptr %ptr, double %v
 ; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s0
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fsub_noret_f64__offset12b_neg:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    s_movk_i32 s0, 0xf800
-; GFX940-NEXT:    s_mov_b32 s1, -1
-; GFX940-NEXT:    v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1]
-; GFX940-NEXT:    s_mov_b64 s[0:1], src_private_base
-; GFX940-NEXT:    v_cmp_ne_u32_e32 vcc, s1, v1
-; GFX940-NEXT:    s_and_saveexec_b64 s[0:1], vcc
-; GFX940-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB21_3
-; GFX940-NEXT:  ; %bb.1: ; %Flow3
-; GFX940-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB21_6
-; GFX940-NEXT:  .LBB21_2: ; %atomicrmw.phi
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
-; GFX940-NEXT:  .LBB21_3: ; %atomicrmw.global
-; GFX940-NEXT:    flat_load_dwordx2 v[6:7], v[0:1]
-; GFX940-NEXT:    s_mov_b64 s[2:3], 0
-; GFX940-NEXT:  .LBB21_4: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_add_f64 v[4:5], v[6:7], -v[2:3]
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7]
-; GFX940-NEXT:    s_or_b64 s[2:3], vcc, s[2:3]
-; GFX940-NEXT:    v_mov_b64_e32 v[6:7], v[4:5]
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[2:3]
-; GFX940-NEXT:    s_cbranch_execnz .LBB21_4
-; GFX940-NEXT:  ; %bb.5: ; %Flow
-; GFX940-NEXT:    s_or_b64 exec, exec, s[2:3]
-; GFX940-NEXT:    ; implicit-def: $vgpr0_vgpr1
-; GFX940-NEXT:    ; implicit-def: $vgpr2_vgpr3
-; GFX940-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
-; GFX940-NEXT:    s_cbranch_execz .LBB21_2
-; GFX940-NEXT:  .LBB21_6: ; %atomicrmw.private
-; GFX940-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[0:1]
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v4, -1, v0, vcc
-; GFX940-NEXT:    scratch_load_dwordx2 v[0:1], v4, off
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_add_f64 v[0:1], v[0:1], -v[2:3]
-; GFX940-NEXT:    scratch_store_dwordx2 v4, v[0:1], off sc0 sc1
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fsub_noret_f64__offset12b_neg:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    s_movk_i32 s0, 0xf800
+; GFX942-NEXT:    s_mov_b32 s1, -1
+; GFX942-NEXT:    v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1]
+; GFX942-NEXT:    s_mov_b64 s[0:1], src_private_base
+; GFX942-NEXT:    v_cmp_ne_u32_e32 vcc, s1, v1
+; GFX942-NEXT:    s_and_saveexec_b64 s[0:1], vcc
+; GFX942-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB21_3
+; GFX942-NEXT:  ; %bb.1: ; %Flow3
+; GFX942-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB21_6
+; GFX942-NEXT:  .LBB21_2: ; %atomicrmw.phi
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-NEXT:  .LBB21_3: ; %atomicrmw.global
+; GFX942-NEXT:    flat_load_dwordx2 v[6:7], v[0:1]
+; GFX942-NEXT:    s_mov_b64 s[2:3], 0
+; GFX942-NEXT:  .LBB21_4: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_add_f64 v[4:5], v[6:7], -v[2:3]
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7]
+; GFX942-NEXT:    s_or_b64 s[2:3], vcc, s[2:3]
+; GFX942-NEXT:    v_mov_b64_e32 v[6:7], v[4:5]
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[2:3]
+; GFX942-NEXT:    s_cbranch_execnz .LBB21_4
+; GFX942-NEXT:  ; %bb.5: ; %Flow
+; GFX942-NEXT:    s_or_b64 exec, exec, s[2:3]
+; GFX942-NEXT:    ; implicit-def: $vgpr0_vgpr1
+; GFX942-NEXT:    ; implicit-def: $vgpr2_vgpr3
+; GFX942-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
+; GFX942-NEXT:    s_cbranch_execz .LBB21_2
+; GFX942-NEXT:  .LBB21_6: ; %atomicrmw.private
+; GFX942-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[0:1]
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v4, -1, v0, vcc
+; GFX942-NEXT:    scratch_load_dwordx2 v[0:1], v4, off
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_add_f64 v[0:1], v[0:1], -v[2:3]
+; GFX942-NEXT:    scratch_store_dwordx2 v4, v[0:1], off sc0 sc1
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fsub_noret_f64__offset12b_neg:
 ; GFX11:       ; %bb.0:
@@ -5852,38 +5852,38 @@ define half @flat_agent_atomic_fsub_ret_f16(ptr %ptr, half %val) #0 {
 ; GFX12-NEXT:    v_lshrrev_b32_e32 v0, v3, v5
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fsub_ret_f16:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v3, v0
-; GFX940-NEXT:    v_and_b32_e32 v0, -4, v3
-; GFX940-NEXT:    flat_load_dword v4, v[0:1]
-; GFX940-NEXT:    v_and_b32_e32 v3, 3, v3
-; GFX940-NEXT:    v_lshlrev_b32_e32 v3, 3, v3
-; GFX940-NEXT:    s_mov_b32 s0, 0xffff
-; GFX940-NEXT:    v_lshlrev_b32_e64 v5, v3, s0
-; GFX940-NEXT:    v_not_b32_e32 v5, v5
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:  .LBB22_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v7, v4
-; GFX940-NEXT:    v_lshrrev_b32_e32 v4, v3, v7
-; GFX940-NEXT:    v_sub_f16_e32 v4, v4, v2
-; GFX940-NEXT:    v_lshlrev_b32_e32 v4, v3, v4
-; GFX940-NEXT:    v_and_or_b32 v6, v7, v5, v4
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v4, v[0:1], v[6:7] sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v4, v7
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB22_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    v_lshrrev_b32_e32 v0, v3, v4
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fsub_ret_f16:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v3, v0
+; GFX942-NEXT:    v_and_b32_e32 v0, -4, v3
+; GFX942-NEXT:    flat_load_dword v4, v[0:1]
+; GFX942-NEXT:    v_and_b32_e32 v3, 3, v3
+; GFX942-NEXT:    v_lshlrev_b32_e32 v3, 3, v3
+; GFX942-NEXT:    s_mov_b32 s0, 0xffff
+; GFX942-NEXT:    v_lshlrev_b32_e64 v5, v3, s0
+; GFX942-NEXT:    v_not_b32_e32 v5, v5
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:  .LBB22_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v7, v4
+; GFX942-NEXT:    v_lshrrev_b32_e32 v4, v3, v7
+; GFX942-NEXT:    v_sub_f16_e32 v4, v4, v2
+; GFX942-NEXT:    v_lshlrev_b32_e32 v4, v3, v4
+; GFX942-NEXT:    v_and_or_b32 v6, v7, v5, v4
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v4, v[0:1], v[6:7] sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v4, v7
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB22_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    v_lshrrev_b32_e32 v0, v3, v4
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fsub_ret_f16:
 ; GFX11:       ; %bb.0:
@@ -6142,40 +6142,40 @@ define half @flat_agent_atomic_fsub_ret_f16__offset12b_pos(ptr %ptr, half %val)
 ; GFX12-NEXT:    v_lshrrev_b32_e32 v0, v3, v5
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fsub_ret_f16__offset12b_pos:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0x7fe
-; GFX940-NEXT:    v_lshl_add_u64 v[6:7], v[0:1], 0, s[0:1]
-; GFX940-NEXT:    v_and_b32_e32 v0, -4, v6
-; GFX940-NEXT:    v_mov_b32_e32 v1, v7
-; GFX940-NEXT:    flat_load_dword v4, v[0:1]
-; GFX940-NEXT:    v_and_b32_e32 v3, 3, v6
-; GFX940-NEXT:    v_lshlrev_b32_e32 v3, 3, v3
-; GFX940-NEXT:    s_mov_b32 s0, 0xffff
-; GFX940-NEXT:    v_lshlrev_b32_e64 v5, v3, s0
-; GFX940-NEXT:    v_not_b32_e32 v5, v5
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:  .LBB23_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v7, v4
-; GFX940-NEXT:    v_lshrrev_b32_e32 v4, v3, v7
-; GFX940-NEXT:    v_sub_f16_e32 v4, v4, v2
-; GFX940-NEXT:    v_lshlrev_b32_e32 v4, v3, v4
-; GFX940-NEXT:    v_and_or_b32 v6, v7, v5, v4
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v4, v[0:1], v[6:7] sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v4, v7
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB23_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    v_lshrrev_b32_e32 v0, v3, v4
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fsub_ret_f16__offset12b_pos:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0x7fe
+; GFX942-NEXT:    v_lshl_add_u64 v[6:7], v[0:1], 0, s[0:1]
+; GFX942-NEXT:    v_and_b32_e32 v0, -4, v6
+; GFX942-NEXT:    v_mov_b32_e32 v1, v7
+; GFX942-NEXT:    flat_load_dword v4, v[0:1]
+; GFX942-NEXT:    v_and_b32_e32 v3, 3, v6
+; GFX942-NEXT:    v_lshlrev_b32_e32 v3, 3, v3
+; GFX942-NEXT:    s_mov_b32 s0, 0xffff
+; GFX942-NEXT:    v_lshlrev_b32_e64 v5, v3, s0
+; GFX942-NEXT:    v_not_b32_e32 v5, v5
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:  .LBB23_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v7, v4
+; GFX942-NEXT:    v_lshrrev_b32_e32 v4, v3, v7
+; GFX942-NEXT:    v_sub_f16_e32 v4, v4, v2
+; GFX942-NEXT:    v_lshlrev_b32_e32 v4, v3, v4
+; GFX942-NEXT:    v_and_or_b32 v6, v7, v5, v4
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v4, v[0:1], v[6:7] sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v4, v7
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB23_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    v_lshrrev_b32_e32 v0, v3, v4
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fsub_ret_f16__offset12b_pos:
 ; GFX11:       ; %bb.0:
@@ -6441,41 +6441,41 @@ define half @flat_agent_atomic_fsub_ret_f16__offset12b_neg(ptr %ptr, half %val)
 ; GFX12-NEXT:    v_lshrrev_b32_e32 v0, v3, v5
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fsub_ret_f16__offset12b_neg:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    s_movk_i32 s0, 0xf800
-; GFX940-NEXT:    s_mov_b32 s1, -1
-; GFX940-NEXT:    v_lshl_add_u64 v[6:7], v[0:1], 0, s[0:1]
-; GFX940-NEXT:    v_and_b32_e32 v0, -4, v6
-; GFX940-NEXT:    v_mov_b32_e32 v1, v7
-; GFX940-NEXT:    flat_load_dword v4, v[0:1]
-; GFX940-NEXT:    v_and_b32_e32 v3, 3, v6
-; GFX940-NEXT:    v_lshlrev_b32_e32 v3, 3, v3
-; GFX940-NEXT:    s_mov_b32 s0, 0xffff
-; GFX940-NEXT:    v_lshlrev_b32_e64 v5, v3, s0
-; GFX940-NEXT:    v_not_b32_e32 v5, v5
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:  .LBB24_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v7, v4
-; GFX940-NEXT:    v_lshrrev_b32_e32 v4, v3, v7
-; GFX940-NEXT:    v_sub_f16_e32 v4, v4, v2
-; GFX940-NEXT:    v_lshlrev_b32_e32 v4, v3, v4
-; GFX940-NEXT:    v_and_or_b32 v6, v7, v5, v4
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v4, v[0:1], v[6:7] sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v4, v7
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB24_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    v_lshrrev_b32_e32 v0, v3, v4
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fsub_ret_f16__offset12b_neg:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    s_movk_i32 s0, 0xf800
+; GFX942-NEXT:    s_mov_b32 s1, -1
+; GFX942-NEXT:    v_lshl_add_u64 v[6:7], v[0:1], 0, s[0:1]
+; GFX942-NEXT:    v_and_b32_e32 v0, -4, v6
+; GFX942-NEXT:    v_mov_b32_e32 v1, v7
+; GFX942-NEXT:    flat_load_dword v4, v[0:1]
+; GFX942-NEXT:    v_and_b32_e32 v3, 3, v6
+; GFX942-NEXT:    v_lshlrev_b32_e32 v3, 3, v3
+; GFX942-NEXT:    s_mov_b32 s0, 0xffff
+; GFX942-NEXT:    v_lshlrev_b32_e64 v5, v3, s0
+; GFX942-NEXT:    v_not_b32_e32 v5, v5
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:  .LBB24_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v7, v4
+; GFX942-NEXT:    v_lshrrev_b32_e32 v4, v3, v7
+; GFX942-NEXT:    v_sub_f16_e32 v4, v4, v2
+; GFX942-NEXT:    v_lshlrev_b32_e32 v4, v3, v4
+; GFX942-NEXT:    v_and_or_b32 v6, v7, v5, v4
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v4, v[0:1], v[6:7] sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v4, v7
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB24_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    v_lshrrev_b32_e32 v0, v3, v4
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fsub_ret_f16__offset12b_neg:
 ; GFX11:       ; %bb.0:
@@ -6737,37 +6737,37 @@ define void @flat_agent_atomic_fsub_noret_f16(ptr %ptr, half %val) #0 {
 ; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s0
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fsub_noret_f16:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v3, v0
-; GFX940-NEXT:    v_and_b32_e32 v0, -4, v3
-; GFX940-NEXT:    flat_load_dword v5, v[0:1]
-; GFX940-NEXT:    v_and_b32_e32 v3, 3, v3
-; GFX940-NEXT:    v_lshlrev_b32_e32 v3, 3, v3
-; GFX940-NEXT:    s_mov_b32 s0, 0xffff
-; GFX940-NEXT:    v_lshlrev_b32_e64 v4, v3, s0
-; GFX940-NEXT:    v_not_b32_e32 v6, v4
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:  .LBB25_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_lshrrev_b32_e32 v4, v3, v5
-; GFX940-NEXT:    v_sub_f16_e32 v4, v4, v2
-; GFX940-NEXT:    v_lshlrev_b32_e32 v4, v3, v4
-; GFX940-NEXT:    v_and_or_b32 v4, v5, v6, v4
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v4, v[0:1], v[4:5] sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v4, v5
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v5, v4
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB25_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fsub_noret_f16:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v3, v0
+; GFX942-NEXT:    v_and_b32_e32 v0, -4, v3
+; GFX942-NEXT:    flat_load_dword v5, v[0:1]
+; GFX942-NEXT:    v_and_b32_e32 v3, 3, v3
+; GFX942-NEXT:    v_lshlrev_b32_e32 v3, 3, v3
+; GFX942-NEXT:    s_mov_b32 s0, 0xffff
+; GFX942-NEXT:    v_lshlrev_b32_e64 v4, v3, s0
+; GFX942-NEXT:    v_not_b32_e32 v6, v4
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:  .LBB25_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_lshrrev_b32_e32 v4, v3, v5
+; GFX942-NEXT:    v_sub_f16_e32 v4, v4, v2
+; GFX942-NEXT:    v_lshlrev_b32_e32 v4, v3, v4
+; GFX942-NEXT:    v_and_or_b32 v4, v5, v6, v4
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v4, v[0:1], v[4:5] sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v4, v5
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v5, v4
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB25_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fsub_noret_f16:
 ; GFX11:       ; %bb.0:
@@ -7016,39 +7016,39 @@ define void @flat_agent_atomic_fsub_noret_f16__offset12b_pos(ptr %ptr, half %val
 ; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s0
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fsub_noret_f16__offset12b_pos:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0x7fe
-; GFX940-NEXT:    v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1]
-; GFX940-NEXT:    v_and_b32_e32 v0, -4, v4
-; GFX940-NEXT:    v_mov_b32_e32 v1, v5
-; GFX940-NEXT:    flat_load_dword v5, v[0:1]
-; GFX940-NEXT:    v_and_b32_e32 v3, 3, v4
-; GFX940-NEXT:    v_lshlrev_b32_e32 v3, 3, v3
-; GFX940-NEXT:    s_mov_b32 s0, 0xffff
-; GFX940-NEXT:    v_lshlrev_b32_e64 v4, v3, s0
-; GFX940-NEXT:    v_not_b32_e32 v6, v4
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:  .LBB26_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_lshrrev_b32_e32 v4, v3, v5
-; GFX940-NEXT:    v_sub_f16_e32 v4, v4, v2
-; GFX940-NEXT:    v_lshlrev_b32_e32 v4, v3, v4
-; GFX940-NEXT:    v_and_or_b32 v4, v5, v6, v4
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v4, v[0:1], v[4:5] sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v4, v5
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v5, v4
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB26_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fsub_noret_f16__offset12b_pos:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0x7fe
+; GFX942-NEXT:    v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1]
+; GFX942-NEXT:    v_and_b32_e32 v0, -4, v4
+; GFX942-NEXT:    v_mov_b32_e32 v1, v5
+; GFX942-NEXT:    flat_load_dword v5, v[0:1]
+; GFX942-NEXT:    v_and_b32_e32 v3, 3, v4
+; GFX942-NEXT:    v_lshlrev_b32_e32 v3, 3, v3
+; GFX942-NEXT:    s_mov_b32 s0, 0xffff
+; GFX942-NEXT:    v_lshlrev_b32_e64 v4, v3, s0
+; GFX942-NEXT:    v_not_b32_e32 v6, v4
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:  .LBB26_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_lshrrev_b32_e32 v4, v3, v5
+; GFX942-NEXT:    v_sub_f16_e32 v4, v4, v2
+; GFX942-NEXT:    v_lshlrev_b32_e32 v4, v3, v4
+; GFX942-NEXT:    v_and_or_b32 v4, v5, v6, v4
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v4, v[0:1], v[4:5] sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v4, v5
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v5, v4
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB26_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fsub_noret_f16__offset12b_pos:
 ; GFX11:       ; %bb.0:
@@ -7304,40 +7304,40 @@ define void @flat_agent_atomic_fsub_noret_f16__offset12b_neg(ptr %ptr, half %val
 ; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s0
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fsub_noret_f16__offset12b_neg:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    s_movk_i32 s0, 0xf800
-; GFX940-NEXT:    s_mov_b32 s1, -1
-; GFX940-NEXT:    v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1]
-; GFX940-NEXT:    v_and_b32_e32 v0, -4, v4
-; GFX940-NEXT:    v_mov_b32_e32 v1, v5
-; GFX940-NEXT:    flat_load_dword v5, v[0:1]
-; GFX940-NEXT:    v_and_b32_e32 v3, 3, v4
-; GFX940-NEXT:    v_lshlrev_b32_e32 v3, 3, v3
-; GFX940-NEXT:    s_mov_b32 s0, 0xffff
-; GFX940-NEXT:    v_lshlrev_b32_e64 v4, v3, s0
-; GFX940-NEXT:    v_not_b32_e32 v6, v4
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:  .LBB27_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_lshrrev_b32_e32 v4, v3, v5
-; GFX940-NEXT:    v_sub_f16_e32 v4, v4, v2
-; GFX940-NEXT:    v_lshlrev_b32_e32 v4, v3, v4
-; GFX940-NEXT:    v_and_or_b32 v4, v5, v6, v4
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v4, v[0:1], v[4:5] sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v4, v5
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v5, v4
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB27_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fsub_noret_f16__offset12b_neg:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    s_movk_i32 s0, 0xf800
+; GFX942-NEXT:    s_mov_b32 s1, -1
+; GFX942-NEXT:    v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1]
+; GFX942-NEXT:    v_and_b32_e32 v0, -4, v4
+; GFX942-NEXT:    v_mov_b32_e32 v1, v5
+; GFX942-NEXT:    flat_load_dword v5, v[0:1]
+; GFX942-NEXT:    v_and_b32_e32 v3, 3, v4
+; GFX942-NEXT:    v_lshlrev_b32_e32 v3, 3, v3
+; GFX942-NEXT:    s_mov_b32 s0, 0xffff
+; GFX942-NEXT:    v_lshlrev_b32_e64 v4, v3, s0
+; GFX942-NEXT:    v_not_b32_e32 v6, v4
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:  .LBB27_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_lshrrev_b32_e32 v4, v3, v5
+; GFX942-NEXT:    v_sub_f16_e32 v4, v4, v2
+; GFX942-NEXT:    v_lshlrev_b32_e32 v4, v3, v4
+; GFX942-NEXT:    v_and_or_b32 v4, v5, v6, v4
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v4, v[0:1], v[4:5] sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v4, v5
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v5, v4
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB27_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fsub_noret_f16__offset12b_neg:
 ; GFX11:       ; %bb.0:
@@ -7582,30 +7582,30 @@ define half @flat_agent_atomic_fsub_ret_f16__offset12b_pos__align4(ptr %ptr, hal
 ; GFX12-NEXT:    v_mov_b32_e32 v0, v3
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fsub_ret_f16__offset12b_pos__align4:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    flat_load_dword v3, v[0:1] offset:2046
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    s_mov_b32 s2, 0xffff0000
-; GFX940-NEXT:  .LBB28_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v5, v3
-; GFX940-NEXT:    v_sub_f16_e32 v3, v5, v2
-; GFX940-NEXT:    v_and_or_b32 v4, v5, s2, v3
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2046 sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB28_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v0, v3
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fsub_ret_f16__offset12b_pos__align4:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    flat_load_dword v3, v[0:1] offset:2046
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    s_mov_b32 s2, 0xffff0000
+; GFX942-NEXT:  .LBB28_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v5, v3
+; GFX942-NEXT:    v_sub_f16_e32 v3, v5, v2
+; GFX942-NEXT:    v_and_or_b32 v4, v5, s2, v3
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2046 sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB28_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v0, v3
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fsub_ret_f16__offset12b_pos__align4:
 ; GFX11:       ; %bb.0:
@@ -7802,29 +7802,29 @@ define void @flat_agent_atomic_fsub_noret_f16__offset12b__align4_pos(ptr %ptr, h
 ; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s0
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fsub_noret_f16__offset12b__align4_pos:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    flat_load_dword v5, v[0:1] offset:2046
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    s_mov_b32 s2, 0xffff0000
-; GFX940-NEXT:  .LBB29_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_sub_f16_e32 v3, v5, v2
-; GFX940-NEXT:    v_and_or_b32 v4, v5, s2, v3
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2046 sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v5, v3
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB29_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fsub_noret_f16__offset12b__align4_pos:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    flat_load_dword v5, v[0:1] offset:2046
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    s_mov_b32 s2, 0xffff0000
+; GFX942-NEXT:  .LBB29_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_sub_f16_e32 v3, v5, v2
+; GFX942-NEXT:    v_and_or_b32 v4, v5, s2, v3
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2046 sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v5, v3
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB29_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fsub_noret_f16__offset12b__align4_pos:
 ; GFX11:       ; %bb.0:
@@ -8032,40 +8032,40 @@ define half @flat_system_atomic_fsub_ret_f16__offset12b_pos(ptr %ptr, half %val)
 ; GFX12-NEXT:    v_lshrrev_b32_e32 v0, v3, v5
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_system_atomic_fsub_ret_f16__offset12b_pos:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0x7fe
-; GFX940-NEXT:    v_lshl_add_u64 v[6:7], v[0:1], 0, s[0:1]
-; GFX940-NEXT:    v_and_b32_e32 v0, -4, v6
-; GFX940-NEXT:    v_mov_b32_e32 v1, v7
-; GFX940-NEXT:    flat_load_dword v4, v[0:1]
-; GFX940-NEXT:    v_and_b32_e32 v3, 3, v6
-; GFX940-NEXT:    v_lshlrev_b32_e32 v3, 3, v3
-; GFX940-NEXT:    s_mov_b32 s0, 0xffff
-; GFX940-NEXT:    v_lshlrev_b32_e64 v5, v3, s0
-; GFX940-NEXT:    v_not_b32_e32 v5, v5
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:  .LBB30_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v7, v4
-; GFX940-NEXT:    v_lshrrev_b32_e32 v4, v3, v7
-; GFX940-NEXT:    v_sub_f16_e32 v4, v4, v2
-; GFX940-NEXT:    v_lshlrev_b32_e32 v4, v3, v4
-; GFX940-NEXT:    v_and_or_b32 v6, v7, v5, v4
-; GFX940-NEXT:    buffer_wbl2 sc0 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v4, v[0:1], v[6:7] sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc0 sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v4, v7
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB30_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    v_lshrrev_b32_e32 v0, v3, v4
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_system_atomic_fsub_ret_f16__offset12b_pos:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0x7fe
+; GFX942-NEXT:    v_lshl_add_u64 v[6:7], v[0:1], 0, s[0:1]
+; GFX942-NEXT:    v_and_b32_e32 v0, -4, v6
+; GFX942-NEXT:    v_mov_b32_e32 v1, v7
+; GFX942-NEXT:    flat_load_dword v4, v[0:1]
+; GFX942-NEXT:    v_and_b32_e32 v3, 3, v6
+; GFX942-NEXT:    v_lshlrev_b32_e32 v3, 3, v3
+; GFX942-NEXT:    s_mov_b32 s0, 0xffff
+; GFX942-NEXT:    v_lshlrev_b32_e64 v5, v3, s0
+; GFX942-NEXT:    v_not_b32_e32 v5, v5
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:  .LBB30_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v7, v4
+; GFX942-NEXT:    v_lshrrev_b32_e32 v4, v3, v7
+; GFX942-NEXT:    v_sub_f16_e32 v4, v4, v2
+; GFX942-NEXT:    v_lshlrev_b32_e32 v4, v3, v4
+; GFX942-NEXT:    v_and_or_b32 v6, v7, v5, v4
+; GFX942-NEXT:    buffer_wbl2 sc0 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v4, v[0:1], v[6:7] sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc0 sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v4, v7
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB30_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    v_lshrrev_b32_e32 v0, v3, v4
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_system_atomic_fsub_ret_f16__offset12b_pos:
 ; GFX11:       ; %bb.0:
@@ -8332,39 +8332,39 @@ define void @flat_system_atomic_fsub_noret_f16__offset12b_pos(ptr %ptr, half %va
 ; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s0
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_system_atomic_fsub_noret_f16__offset12b_pos:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0x7fe
-; GFX940-NEXT:    v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1]
-; GFX940-NEXT:    v_and_b32_e32 v0, -4, v4
-; GFX940-NEXT:    v_mov_b32_e32 v1, v5
-; GFX940-NEXT:    flat_load_dword v5, v[0:1]
-; GFX940-NEXT:    v_and_b32_e32 v3, 3, v4
-; GFX940-NEXT:    v_lshlrev_b32_e32 v3, 3, v3
-; GFX940-NEXT:    s_mov_b32 s0, 0xffff
-; GFX940-NEXT:    v_lshlrev_b32_e64 v4, v3, s0
-; GFX940-NEXT:    v_not_b32_e32 v6, v4
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:  .LBB31_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_lshrrev_b32_e32 v4, v3, v5
-; GFX940-NEXT:    v_sub_f16_e32 v4, v4, v2
-; GFX940-NEXT:    v_lshlrev_b32_e32 v4, v3, v4
-; GFX940-NEXT:    v_and_or_b32 v4, v5, v6, v4
-; GFX940-NEXT:    buffer_wbl2 sc0 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v4, v[0:1], v[4:5] sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc0 sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v4, v5
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v5, v4
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB31_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_system_atomic_fsub_noret_f16__offset12b_pos:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0x7fe
+; GFX942-NEXT:    v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1]
+; GFX942-NEXT:    v_and_b32_e32 v0, -4, v4
+; GFX942-NEXT:    v_mov_b32_e32 v1, v5
+; GFX942-NEXT:    flat_load_dword v5, v[0:1]
+; GFX942-NEXT:    v_and_b32_e32 v3, 3, v4
+; GFX942-NEXT:    v_lshlrev_b32_e32 v3, 3, v3
+; GFX942-NEXT:    s_mov_b32 s0, 0xffff
+; GFX942-NEXT:    v_lshlrev_b32_e64 v4, v3, s0
+; GFX942-NEXT:    v_not_b32_e32 v6, v4
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:  .LBB31_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_lshrrev_b32_e32 v4, v3, v5
+; GFX942-NEXT:    v_sub_f16_e32 v4, v4, v2
+; GFX942-NEXT:    v_lshlrev_b32_e32 v4, v3, v4
+; GFX942-NEXT:    v_and_or_b32 v4, v5, v6, v4
+; GFX942-NEXT:    buffer_wbl2 sc0 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v4, v[0:1], v[4:5] sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc0 sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v4, v5
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v5, v4
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB31_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_system_atomic_fsub_noret_f16__offset12b_pos:
 ; GFX11:       ; %bb.0:
@@ -8635,47 +8635,47 @@ define bfloat @flat_agent_atomic_fsub_ret_bf16(ptr %ptr, bfloat %val) #0 {
 ; GFX12-NEXT:    v_lshrrev_b32_e32 v0, v3, v5
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fsub_ret_bf16:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v3, v0
-; GFX940-NEXT:    v_and_b32_e32 v0, -4, v3
-; GFX940-NEXT:    flat_load_dword v5, v[0:1]
-; GFX940-NEXT:    v_and_b32_e32 v3, 3, v3
-; GFX940-NEXT:    v_lshlrev_b32_e32 v3, 3, v3
-; GFX940-NEXT:    s_mov_b32 s0, 0xffff
-; GFX940-NEXT:    v_lshlrev_b32_e64 v4, v3, s0
-; GFX940-NEXT:    v_not_b32_e32 v4, v4
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
-; GFX940-NEXT:    s_movk_i32 s2, 0x7fff
-; GFX940-NEXT:  .LBB32_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v7, v5
-; GFX940-NEXT:    v_lshrrev_b32_sdwa v5, v3, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_sub_f32_e32 v5, v5, v2
-; GFX940-NEXT:    v_bfe_u32 v6, v5, 16, 1
-; GFX940-NEXT:    v_or_b32_e32 v8, 0x400000, v5
-; GFX940-NEXT:    v_add3_u32 v6, v6, v5, s2
-; GFX940-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v5, v6, v8, vcc
-; GFX940-NEXT:    v_lshlrev_b32_sdwa v5, v3, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX940-NEXT:    v_and_or_b32 v6, v7, v4, v5
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v5, v[0:1], v[6:7] sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v5, v7
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB32_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    v_lshrrev_b32_e32 v0, v3, v5
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fsub_ret_bf16:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v3, v0
+; GFX942-NEXT:    v_and_b32_e32 v0, -4, v3
+; GFX942-NEXT:    flat_load_dword v5, v[0:1]
+; GFX942-NEXT:    v_and_b32_e32 v3, 3, v3
+; GFX942-NEXT:    v_lshlrev_b32_e32 v3, 3, v3
+; GFX942-NEXT:    s_mov_b32 s0, 0xffff
+; GFX942-NEXT:    v_lshlrev_b32_e64 v4, v3, s0
+; GFX942-NEXT:    v_not_b32_e32 v4, v4
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
+; GFX942-NEXT:    s_movk_i32 s2, 0x7fff
+; GFX942-NEXT:  .LBB32_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v7, v5
+; GFX942-NEXT:    v_lshrrev_b32_sdwa v5, v3, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_sub_f32_e32 v5, v5, v2
+; GFX942-NEXT:    v_bfe_u32 v6, v5, 16, 1
+; GFX942-NEXT:    v_or_b32_e32 v8, 0x400000, v5
+; GFX942-NEXT:    v_add3_u32 v6, v6, v5, s2
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v5, v6, v8, vcc
+; GFX942-NEXT:    v_lshlrev_b32_sdwa v5, v3, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX942-NEXT:    v_and_or_b32 v6, v7, v4, v5
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v5, v[0:1], v[6:7] sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v5, v7
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB32_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    v_lshrrev_b32_e32 v0, v3, v5
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fsub_ret_bf16:
 ; GFX11:       ; %bb.0:
@@ -8980,49 +8980,49 @@ define bfloat @flat_agent_atomic_fsub_ret_bf16__offset12b_pos(ptr %ptr, bfloat %
 ; GFX12-NEXT:    v_lshrrev_b32_e32 v0, v3, v5
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fsub_ret_bf16__offset12b_pos:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0x7fe
-; GFX940-NEXT:    v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1]
-; GFX940-NEXT:    v_and_b32_e32 v0, -4, v4
-; GFX940-NEXT:    v_mov_b32_e32 v1, v5
-; GFX940-NEXT:    flat_load_dword v5, v[0:1]
-; GFX940-NEXT:    v_and_b32_e32 v3, 3, v4
-; GFX940-NEXT:    v_lshlrev_b32_e32 v3, 3, v3
-; GFX940-NEXT:    s_mov_b32 s0, 0xffff
-; GFX940-NEXT:    v_lshlrev_b32_e64 v4, v3, s0
-; GFX940-NEXT:    v_not_b32_e32 v4, v4
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
-; GFX940-NEXT:    s_movk_i32 s2, 0x7fff
-; GFX940-NEXT:  .LBB33_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v7, v5
-; GFX940-NEXT:    v_lshrrev_b32_sdwa v5, v3, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_sub_f32_e32 v5, v5, v2
-; GFX940-NEXT:    v_bfe_u32 v6, v5, 16, 1
-; GFX940-NEXT:    v_or_b32_e32 v8, 0x400000, v5
-; GFX940-NEXT:    v_add3_u32 v6, v6, v5, s2
-; GFX940-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v5, v6, v8, vcc
-; GFX940-NEXT:    v_lshlrev_b32_sdwa v5, v3, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX940-NEXT:    v_and_or_b32 v6, v7, v4, v5
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v5, v[0:1], v[6:7] sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v5, v7
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB33_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    v_lshrrev_b32_e32 v0, v3, v5
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fsub_ret_bf16__offset12b_pos:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0x7fe
+; GFX942-NEXT:    v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1]
+; GFX942-NEXT:    v_and_b32_e32 v0, -4, v4
+; GFX942-NEXT:    v_mov_b32_e32 v1, v5
+; GFX942-NEXT:    flat_load_dword v5, v[0:1]
+; GFX942-NEXT:    v_and_b32_e32 v3, 3, v4
+; GFX942-NEXT:    v_lshlrev_b32_e32 v3, 3, v3
+; GFX942-NEXT:    s_mov_b32 s0, 0xffff
+; GFX942-NEXT:    v_lshlrev_b32_e64 v4, v3, s0
+; GFX942-NEXT:    v_not_b32_e32 v4, v4
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
+; GFX942-NEXT:    s_movk_i32 s2, 0x7fff
+; GFX942-NEXT:  .LBB33_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v7, v5
+; GFX942-NEXT:    v_lshrrev_b32_sdwa v5, v3, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_sub_f32_e32 v5, v5, v2
+; GFX942-NEXT:    v_bfe_u32 v6, v5, 16, 1
+; GFX942-NEXT:    v_or_b32_e32 v8, 0x400000, v5
+; GFX942-NEXT:    v_add3_u32 v6, v6, v5, s2
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v5, v6, v8, vcc
+; GFX942-NEXT:    v_lshlrev_b32_sdwa v5, v3, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX942-NEXT:    v_and_or_b32 v6, v7, v4, v5
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v5, v[0:1], v[6:7] sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v5, v7
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB33_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    v_lshrrev_b32_e32 v0, v3, v5
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fsub_ret_bf16__offset12b_pos:
 ; GFX11:       ; %bb.0:
@@ -9335,50 +9335,50 @@ define bfloat @flat_agent_atomic_fsub_ret_bf16__offset12b_neg(ptr %ptr, bfloat %
 ; GFX12-NEXT:    v_lshrrev_b32_e32 v0, v3, v5
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fsub_ret_bf16__offset12b_neg:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    s_movk_i32 s0, 0xf800
-; GFX940-NEXT:    s_mov_b32 s1, -1
-; GFX940-NEXT:    v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1]
-; GFX940-NEXT:    v_and_b32_e32 v0, -4, v4
-; GFX940-NEXT:    v_mov_b32_e32 v1, v5
-; GFX940-NEXT:    flat_load_dword v5, v[0:1]
-; GFX940-NEXT:    v_and_b32_e32 v3, 3, v4
-; GFX940-NEXT:    v_lshlrev_b32_e32 v3, 3, v3
-; GFX940-NEXT:    s_mov_b32 s0, 0xffff
-; GFX940-NEXT:    v_lshlrev_b32_e64 v4, v3, s0
-; GFX940-NEXT:    v_not_b32_e32 v4, v4
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
-; GFX940-NEXT:    s_movk_i32 s2, 0x7fff
-; GFX940-NEXT:  .LBB34_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v7, v5
-; GFX940-NEXT:    v_lshrrev_b32_sdwa v5, v3, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_sub_f32_e32 v5, v5, v2
-; GFX940-NEXT:    v_bfe_u32 v6, v5, 16, 1
-; GFX940-NEXT:    v_or_b32_e32 v8, 0x400000, v5
-; GFX940-NEXT:    v_add3_u32 v6, v6, v5, s2
-; GFX940-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v5, v6, v8, vcc
-; GFX940-NEXT:    v_lshlrev_b32_sdwa v5, v3, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX940-NEXT:    v_and_or_b32 v6, v7, v4, v5
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v5, v[0:1], v[6:7] sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v5, v7
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB34_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    v_lshrrev_b32_e32 v0, v3, v5
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fsub_ret_bf16__offset12b_neg:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    s_movk_i32 s0, 0xf800
+; GFX942-NEXT:    s_mov_b32 s1, -1
+; GFX942-NEXT:    v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1]
+; GFX942-NEXT:    v_and_b32_e32 v0, -4, v4
+; GFX942-NEXT:    v_mov_b32_e32 v1, v5
+; GFX942-NEXT:    flat_load_dword v5, v[0:1]
+; GFX942-NEXT:    v_and_b32_e32 v3, 3, v4
+; GFX942-NEXT:    v_lshlrev_b32_e32 v3, 3, v3
+; GFX942-NEXT:    s_mov_b32 s0, 0xffff
+; GFX942-NEXT:    v_lshlrev_b32_e64 v4, v3, s0
+; GFX942-NEXT:    v_not_b32_e32 v4, v4
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
+; GFX942-NEXT:    s_movk_i32 s2, 0x7fff
+; GFX942-NEXT:  .LBB34_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v7, v5
+; GFX942-NEXT:    v_lshrrev_b32_sdwa v5, v3, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_sub_f32_e32 v5, v5, v2
+; GFX942-NEXT:    v_bfe_u32 v6, v5, 16, 1
+; GFX942-NEXT:    v_or_b32_e32 v8, 0x400000, v5
+; GFX942-NEXT:    v_add3_u32 v6, v6, v5, s2
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v5, v6, v8, vcc
+; GFX942-NEXT:    v_lshlrev_b32_sdwa v5, v3, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX942-NEXT:    v_and_or_b32 v6, v7, v4, v5
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v5, v[0:1], v[6:7] sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v5, v7
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB34_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    v_lshrrev_b32_e32 v0, v3, v5
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fsub_ret_bf16__offset12b_neg:
 ; GFX11:       ; %bb.0:
@@ -9686,46 +9686,46 @@ define void @flat_agent_atomic_fsub_noret_bf16(ptr %ptr, bfloat %val) #0 {
 ; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s0
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fsub_noret_bf16:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v3, v0
-; GFX940-NEXT:    v_and_b32_e32 v0, -4, v3
-; GFX940-NEXT:    flat_load_dword v5, v[0:1]
-; GFX940-NEXT:    v_and_b32_e32 v3, 3, v3
-; GFX940-NEXT:    v_lshlrev_b32_e32 v3, 3, v3
-; GFX940-NEXT:    s_mov_b32 s0, 0xffff
-; GFX940-NEXT:    v_lshlrev_b32_e64 v4, v3, s0
-; GFX940-NEXT:    v_not_b32_e32 v6, v4
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
-; GFX940-NEXT:    s_movk_i32 s2, 0x7fff
-; GFX940-NEXT:  .LBB35_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_lshrrev_b32_sdwa v4, v3, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_sub_f32_e32 v4, v4, v2
-; GFX940-NEXT:    v_bfe_u32 v7, v4, 16, 1
-; GFX940-NEXT:    v_or_b32_e32 v8, 0x400000, v4
-; GFX940-NEXT:    v_add3_u32 v7, v7, v4, s2
-; GFX940-NEXT:    v_cmp_u_f32_e32 vcc, v4, v4
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v4, v7, v8, vcc
-; GFX940-NEXT:    v_lshlrev_b32_sdwa v4, v3, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX940-NEXT:    v_and_or_b32 v4, v5, v6, v4
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v4, v[0:1], v[4:5] sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v4, v5
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v5, v4
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB35_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fsub_noret_bf16:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v3, v0
+; GFX942-NEXT:    v_and_b32_e32 v0, -4, v3
+; GFX942-NEXT:    flat_load_dword v5, v[0:1]
+; GFX942-NEXT:    v_and_b32_e32 v3, 3, v3
+; GFX942-NEXT:    v_lshlrev_b32_e32 v3, 3, v3
+; GFX942-NEXT:    s_mov_b32 s0, 0xffff
+; GFX942-NEXT:    v_lshlrev_b32_e64 v4, v3, s0
+; GFX942-NEXT:    v_not_b32_e32 v6, v4
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
+; GFX942-NEXT:    s_movk_i32 s2, 0x7fff
+; GFX942-NEXT:  .LBB35_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_lshrrev_b32_sdwa v4, v3, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_sub_f32_e32 v4, v4, v2
+; GFX942-NEXT:    v_bfe_u32 v7, v4, 16, 1
+; GFX942-NEXT:    v_or_b32_e32 v8, 0x400000, v4
+; GFX942-NEXT:    v_add3_u32 v7, v7, v4, s2
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v4, v4
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v4, v7, v8, vcc
+; GFX942-NEXT:    v_lshlrev_b32_sdwa v4, v3, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX942-NEXT:    v_and_or_b32 v4, v5, v6, v4
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v4, v[0:1], v[4:5] sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v4, v5
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v5, v4
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB35_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fsub_noret_bf16:
 ; GFX11:       ; %bb.0:
@@ -10020,48 +10020,48 @@ define void @flat_agent_atomic_fsub_noret_bf16__offset12b_pos(ptr %ptr, bfloat %
 ; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s0
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fsub_noret_bf16__offset12b_pos:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0x7fe
-; GFX940-NEXT:    v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1]
-; GFX940-NEXT:    v_and_b32_e32 v0, -4, v4
-; GFX940-NEXT:    v_mov_b32_e32 v1, v5
-; GFX940-NEXT:    flat_load_dword v3, v[0:1]
-; GFX940-NEXT:    v_and_b32_e32 v4, 3, v4
-; GFX940-NEXT:    v_lshlrev_b32_e32 v4, 3, v4
-; GFX940-NEXT:    s_mov_b32 s0, 0xffff
-; GFX940-NEXT:    v_lshlrev_b32_e64 v5, v4, s0
-; GFX940-NEXT:    v_not_b32_e32 v5, v5
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    v_lshlrev_b32_e32 v6, 16, v2
-; GFX940-NEXT:    s_movk_i32 s2, 0x7fff
-; GFX940-NEXT:  .LBB36_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_lshrrev_b32_sdwa v2, v4, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_sub_f32_e32 v2, v2, v6
-; GFX940-NEXT:    v_bfe_u32 v7, v2, 16, 1
-; GFX940-NEXT:    v_or_b32_e32 v8, 0x400000, v2
-; GFX940-NEXT:    v_add3_u32 v7, v7, v2, s2
-; GFX940-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v2, v7, v8, vcc
-; GFX940-NEXT:    v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX940-NEXT:    v_and_or_b32 v2, v3, v5, v2
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v3, v2
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB36_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fsub_noret_bf16__offset12b_pos:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0x7fe
+; GFX942-NEXT:    v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1]
+; GFX942-NEXT:    v_and_b32_e32 v0, -4, v4
+; GFX942-NEXT:    v_mov_b32_e32 v1, v5
+; GFX942-NEXT:    flat_load_dword v3, v[0:1]
+; GFX942-NEXT:    v_and_b32_e32 v4, 3, v4
+; GFX942-NEXT:    v_lshlrev_b32_e32 v4, 3, v4
+; GFX942-NEXT:    s_mov_b32 s0, 0xffff
+; GFX942-NEXT:    v_lshlrev_b32_e64 v5, v4, s0
+; GFX942-NEXT:    v_not_b32_e32 v5, v5
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    v_lshlrev_b32_e32 v6, 16, v2
+; GFX942-NEXT:    s_movk_i32 s2, 0x7fff
+; GFX942-NEXT:  .LBB36_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_lshrrev_b32_sdwa v2, v4, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_sub_f32_e32 v2, v2, v6
+; GFX942-NEXT:    v_bfe_u32 v7, v2, 16, 1
+; GFX942-NEXT:    v_or_b32_e32 v8, 0x400000, v2
+; GFX942-NEXT:    v_add3_u32 v7, v7, v2, s2
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v7, v8, vcc
+; GFX942-NEXT:    v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX942-NEXT:    v_and_or_b32 v2, v3, v5, v2
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v3, v2
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB36_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fsub_noret_bf16__offset12b_pos:
 ; GFX11:       ; %bb.0:
@@ -10364,49 +10364,49 @@ define void @flat_agent_atomic_fsub_noret_bf16__offset12b_neg(ptr %ptr, bfloat %
 ; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s0
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fsub_noret_bf16__offset12b_neg:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    s_movk_i32 s0, 0xf800
-; GFX940-NEXT:    s_mov_b32 s1, -1
-; GFX940-NEXT:    v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1]
-; GFX940-NEXT:    v_and_b32_e32 v0, -4, v4
-; GFX940-NEXT:    v_mov_b32_e32 v1, v5
-; GFX940-NEXT:    flat_load_dword v3, v[0:1]
-; GFX940-NEXT:    v_and_b32_e32 v4, 3, v4
-; GFX940-NEXT:    v_lshlrev_b32_e32 v4, 3, v4
-; GFX940-NEXT:    s_mov_b32 s0, 0xffff
-; GFX940-NEXT:    v_lshlrev_b32_e64 v5, v4, s0
-; GFX940-NEXT:    v_not_b32_e32 v5, v5
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    v_lshlrev_b32_e32 v6, 16, v2
-; GFX940-NEXT:    s_movk_i32 s2, 0x7fff
-; GFX940-NEXT:  .LBB37_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_lshrrev_b32_sdwa v2, v4, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_sub_f32_e32 v2, v2, v6
-; GFX940-NEXT:    v_bfe_u32 v7, v2, 16, 1
-; GFX940-NEXT:    v_or_b32_e32 v8, 0x400000, v2
-; GFX940-NEXT:    v_add3_u32 v7, v7, v2, s2
-; GFX940-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v2, v7, v8, vcc
-; GFX940-NEXT:    v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX940-NEXT:    v_and_or_b32 v2, v3, v5, v2
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v3, v2
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB37_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fsub_noret_bf16__offset12b_neg:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    s_movk_i32 s0, 0xf800
+; GFX942-NEXT:    s_mov_b32 s1, -1
+; GFX942-NEXT:    v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1]
+; GFX942-NEXT:    v_and_b32_e32 v0, -4, v4
+; GFX942-NEXT:    v_mov_b32_e32 v1, v5
+; GFX942-NEXT:    flat_load_dword v3, v[0:1]
+; GFX942-NEXT:    v_and_b32_e32 v4, 3, v4
+; GFX942-NEXT:    v_lshlrev_b32_e32 v4, 3, v4
+; GFX942-NEXT:    s_mov_b32 s0, 0xffff
+; GFX942-NEXT:    v_lshlrev_b32_e64 v5, v4, s0
+; GFX942-NEXT:    v_not_b32_e32 v5, v5
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    v_lshlrev_b32_e32 v6, 16, v2
+; GFX942-NEXT:    s_movk_i32 s2, 0x7fff
+; GFX942-NEXT:  .LBB37_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_lshrrev_b32_sdwa v2, v4, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_sub_f32_e32 v2, v2, v6
+; GFX942-NEXT:    v_bfe_u32 v7, v2, 16, 1
+; GFX942-NEXT:    v_or_b32_e32 v8, 0x400000, v2
+; GFX942-NEXT:    v_add3_u32 v7, v7, v2, s2
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v7, v8, vcc
+; GFX942-NEXT:    v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX942-NEXT:    v_and_or_b32 v2, v3, v5, v2
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v3, v2
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB37_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fsub_noret_bf16__offset12b_neg:
 ; GFX11:       ; %bb.0:
@@ -10698,40 +10698,40 @@ define bfloat @flat_agent_atomic_fsub_ret_bf16__offset12b_pos__align4(ptr %ptr,
 ; GFX12-NEXT:    v_mov_b32_e32 v0, v3
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fsub_ret_bf16__offset12b_pos__align4:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    flat_load_dword v3, v[0:1] offset:2046
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
-; GFX940-NEXT:    s_movk_i32 s2, 0x7fff
-; GFX940-NEXT:    s_mov_b32 s3, 0xffff0000
-; GFX940-NEXT:  .LBB38_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v5, v3
-; GFX940-NEXT:    v_lshlrev_b32_e32 v3, 16, v5
-; GFX940-NEXT:    v_sub_f32_e32 v3, v3, v2
-; GFX940-NEXT:    v_bfe_u32 v4, v3, 16, 1
-; GFX940-NEXT:    v_or_b32_e32 v6, 0x400000, v3
-; GFX940-NEXT:    v_add3_u32 v4, v4, v3, s2
-; GFX940-NEXT:    v_cmp_u_f32_e32 vcc, v3, v3
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v3, v4, v6, vcc
-; GFX940-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
-; GFX940-NEXT:    v_and_or_b32 v4, v5, s3, v3
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2046 sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB38_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v0, v3
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fsub_ret_bf16__offset12b_pos__align4:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    flat_load_dword v3, v[0:1] offset:2046
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
+; GFX942-NEXT:    s_movk_i32 s2, 0x7fff
+; GFX942-NEXT:    s_mov_b32 s3, 0xffff0000
+; GFX942-NEXT:  .LBB38_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v5, v3
+; GFX942-NEXT:    v_lshlrev_b32_e32 v3, 16, v5
+; GFX942-NEXT:    v_sub_f32_e32 v3, v3, v2
+; GFX942-NEXT:    v_bfe_u32 v4, v3, 16, 1
+; GFX942-NEXT:    v_or_b32_e32 v6, 0x400000, v3
+; GFX942-NEXT:    v_add3_u32 v4, v4, v3, s2
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v3, v3
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v4, v6, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
+; GFX942-NEXT:    v_and_or_b32 v4, v5, s3, v3
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2046 sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB38_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v0, v3
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fsub_ret_bf16__offset12b_pos__align4:
 ; GFX11:       ; %bb.0:
@@ -10981,39 +10981,39 @@ define void @flat_agent_atomic_fsub_noret_bf16__offset12b__align4_pos(ptr %ptr,
 ; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s0
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fsub_noret_bf16__offset12b__align4_pos:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    flat_load_dword v3, v[0:1] offset:2046
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    v_lshlrev_b32_e32 v4, 16, v2
-; GFX940-NEXT:    s_movk_i32 s2, 0x7fff
-; GFX940-NEXT:    s_mov_b32 s3, 0xffff0000
-; GFX940-NEXT:  .LBB39_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_lshlrev_b32_e32 v2, 16, v3
-; GFX940-NEXT:    v_sub_f32_e32 v2, v2, v4
-; GFX940-NEXT:    v_bfe_u32 v5, v2, 16, 1
-; GFX940-NEXT:    v_or_b32_e32 v6, 0x400000, v2
-; GFX940-NEXT:    v_add3_u32 v5, v5, v2, s2
-; GFX940-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v2, v5, v6, vcc
-; GFX940-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
-; GFX940-NEXT:    v_and_or_b32 v2, v3, s3, v2
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2046 sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v3, v2
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB39_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fsub_noret_bf16__offset12b__align4_pos:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    flat_load_dword v3, v[0:1] offset:2046
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    v_lshlrev_b32_e32 v4, 16, v2
+; GFX942-NEXT:    s_movk_i32 s2, 0x7fff
+; GFX942-NEXT:    s_mov_b32 s3, 0xffff0000
+; GFX942-NEXT:  .LBB39_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_lshlrev_b32_e32 v2, 16, v3
+; GFX942-NEXT:    v_sub_f32_e32 v2, v2, v4
+; GFX942-NEXT:    v_bfe_u32 v5, v2, 16, 1
+; GFX942-NEXT:    v_or_b32_e32 v6, 0x400000, v2
+; GFX942-NEXT:    v_add3_u32 v5, v5, v2, s2
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v5, v6, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
+; GFX942-NEXT:    v_and_or_b32 v2, v3, s3, v2
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2046 sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v3, v2
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB39_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fsub_noret_bf16__offset12b__align4_pos:
 ; GFX11:       ; %bb.0:
@@ -11274,49 +11274,49 @@ define bfloat @flat_system_atomic_fsub_ret_bf16__offset12b_pos(ptr %ptr, bfloat
 ; GFX12-NEXT:    v_lshrrev_b32_e32 v0, v3, v5
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_system_atomic_fsub_ret_bf16__offset12b_pos:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0x7fe
-; GFX940-NEXT:    v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1]
-; GFX940-NEXT:    v_and_b32_e32 v0, -4, v4
-; GFX940-NEXT:    v_mov_b32_e32 v1, v5
-; GFX940-NEXT:    flat_load_dword v5, v[0:1]
-; GFX940-NEXT:    v_and_b32_e32 v3, 3, v4
-; GFX940-NEXT:    v_lshlrev_b32_e32 v3, 3, v3
-; GFX940-NEXT:    s_mov_b32 s0, 0xffff
-; GFX940-NEXT:    v_lshlrev_b32_e64 v4, v3, s0
-; GFX940-NEXT:    v_not_b32_e32 v4, v4
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
-; GFX940-NEXT:    s_movk_i32 s2, 0x7fff
-; GFX940-NEXT:  .LBB40_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v7, v5
-; GFX940-NEXT:    v_lshrrev_b32_sdwa v5, v3, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_sub_f32_e32 v5, v5, v2
-; GFX940-NEXT:    v_bfe_u32 v6, v5, 16, 1
-; GFX940-NEXT:    v_or_b32_e32 v8, 0x400000, v5
-; GFX940-NEXT:    v_add3_u32 v6, v6, v5, s2
-; GFX940-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v5, v6, v8, vcc
-; GFX940-NEXT:    v_lshlrev_b32_sdwa v5, v3, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX940-NEXT:    v_and_or_b32 v6, v7, v4, v5
-; GFX940-NEXT:    buffer_wbl2 sc0 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v5, v[0:1], v[6:7] sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc0 sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v5, v7
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB40_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    v_lshrrev_b32_e32 v0, v3, v5
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_system_atomic_fsub_ret_bf16__offset12b_pos:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0x7fe
+; GFX942-NEXT:    v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1]
+; GFX942-NEXT:    v_and_b32_e32 v0, -4, v4
+; GFX942-NEXT:    v_mov_b32_e32 v1, v5
+; GFX942-NEXT:    flat_load_dword v5, v[0:1]
+; GFX942-NEXT:    v_and_b32_e32 v3, 3, v4
+; GFX942-NEXT:    v_lshlrev_b32_e32 v3, 3, v3
+; GFX942-NEXT:    s_mov_b32 s0, 0xffff
+; GFX942-NEXT:    v_lshlrev_b32_e64 v4, v3, s0
+; GFX942-NEXT:    v_not_b32_e32 v4, v4
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
+; GFX942-NEXT:    s_movk_i32 s2, 0x7fff
+; GFX942-NEXT:  .LBB40_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v7, v5
+; GFX942-NEXT:    v_lshrrev_b32_sdwa v5, v3, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_sub_f32_e32 v5, v5, v2
+; GFX942-NEXT:    v_bfe_u32 v6, v5, 16, 1
+; GFX942-NEXT:    v_or_b32_e32 v8, 0x400000, v5
+; GFX942-NEXT:    v_add3_u32 v6, v6, v5, s2
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v5, v6, v8, vcc
+; GFX942-NEXT:    v_lshlrev_b32_sdwa v5, v3, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX942-NEXT:    v_and_or_b32 v6, v7, v4, v5
+; GFX942-NEXT:    buffer_wbl2 sc0 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v5, v[0:1], v[6:7] sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc0 sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v5, v7
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB40_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    v_lshrrev_b32_e32 v0, v3, v5
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_system_atomic_fsub_ret_bf16__offset12b_pos:
 ; GFX11:       ; %bb.0:
@@ -11630,48 +11630,48 @@ define void @flat_system_atomic_fsub_noret_bf16__offset12b_pos(ptr %ptr, bfloat
 ; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s0
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_system_atomic_fsub_noret_bf16__offset12b_pos:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0x7fe
-; GFX940-NEXT:    v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1]
-; GFX940-NEXT:    v_and_b32_e32 v0, -4, v4
-; GFX940-NEXT:    v_mov_b32_e32 v1, v5
-; GFX940-NEXT:    flat_load_dword v3, v[0:1]
-; GFX940-NEXT:    v_and_b32_e32 v4, 3, v4
-; GFX940-NEXT:    v_lshlrev_b32_e32 v4, 3, v4
-; GFX940-NEXT:    s_mov_b32 s0, 0xffff
-; GFX940-NEXT:    v_lshlrev_b32_e64 v5, v4, s0
-; GFX940-NEXT:    v_not_b32_e32 v5, v5
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    v_lshlrev_b32_e32 v6, 16, v2
-; GFX940-NEXT:    s_movk_i32 s2, 0x7fff
-; GFX940-NEXT:  .LBB41_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_lshrrev_b32_sdwa v2, v4, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_sub_f32_e32 v2, v2, v6
-; GFX940-NEXT:    v_bfe_u32 v7, v2, 16, 1
-; GFX940-NEXT:    v_or_b32_e32 v8, 0x400000, v2
-; GFX940-NEXT:    v_add3_u32 v7, v7, v2, s2
-; GFX940-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v2, v7, v8, vcc
-; GFX940-NEXT:    v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX940-NEXT:    v_and_or_b32 v2, v3, v5, v2
-; GFX940-NEXT:    buffer_wbl2 sc0 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc0 sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v3, v2
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB41_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_system_atomic_fsub_noret_bf16__offset12b_pos:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0x7fe
+; GFX942-NEXT:    v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1]
+; GFX942-NEXT:    v_and_b32_e32 v0, -4, v4
+; GFX942-NEXT:    v_mov_b32_e32 v1, v5
+; GFX942-NEXT:    flat_load_dword v3, v[0:1]
+; GFX942-NEXT:    v_and_b32_e32 v4, 3, v4
+; GFX942-NEXT:    v_lshlrev_b32_e32 v4, 3, v4
+; GFX942-NEXT:    s_mov_b32 s0, 0xffff
+; GFX942-NEXT:    v_lshlrev_b32_e64 v5, v4, s0
+; GFX942-NEXT:    v_not_b32_e32 v5, v5
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    v_lshlrev_b32_e32 v6, 16, v2
+; GFX942-NEXT:    s_movk_i32 s2, 0x7fff
+; GFX942-NEXT:  .LBB41_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_lshrrev_b32_sdwa v2, v4, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_sub_f32_e32 v2, v2, v6
+; GFX942-NEXT:    v_bfe_u32 v7, v2, 16, 1
+; GFX942-NEXT:    v_or_b32_e32 v8, 0x400000, v2
+; GFX942-NEXT:    v_add3_u32 v7, v7, v2, s2
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v7, v8, vcc
+; GFX942-NEXT:    v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX942-NEXT:    v_and_or_b32 v2, v3, v5, v2
+; GFX942-NEXT:    buffer_wbl2 sc0 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc0 sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v3, v2
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB41_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_system_atomic_fsub_noret_bf16__offset12b_pos:
 ; GFX11:       ; %bb.0:
@@ -11956,28 +11956,28 @@ define <2 x half> @flat_agent_atomic_fsub_ret_v2f16(ptr %ptr, <2 x half> %val) #
 ; GFX12-NEXT:    v_mov_b32_e32 v0, v3
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fsub_ret_v2f16:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    flat_load_dword v3, v[0:1]
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:  .LBB42_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v5, v3
-; GFX940-NEXT:    v_pk_add_f16 v4, v5, v2 neg_lo:[0,1] neg_hi:[0,1]
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v3, v[0:1], v[4:5] sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB42_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v0, v3
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fsub_ret_v2f16:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    flat_load_dword v3, v[0:1]
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:  .LBB42_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v5, v3
+; GFX942-NEXT:    v_pk_add_f16 v4, v5, v2 neg_lo:[0,1] neg_hi:[0,1]
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v3, v[0:1], v[4:5] sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB42_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v0, v3
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fsub_ret_v2f16:
 ; GFX11:       ; %bb.0:
@@ -12176,28 +12176,28 @@ define <2 x half> @flat_agent_atomic_fsub_ret_v2f16__offset12b_pos(ptr %ptr, <2
 ; GFX12-NEXT:    v_mov_b32_e32 v0, v3
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fsub_ret_v2f16__offset12b_pos:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    flat_load_dword v3, v[0:1] offset:2044
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:  .LBB43_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v5, v3
-; GFX940-NEXT:    v_pk_add_f16 v4, v5, v2 neg_lo:[0,1] neg_hi:[0,1]
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2044 sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB43_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v0, v3
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fsub_ret_v2f16__offset12b_pos:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    flat_load_dword v3, v[0:1] offset:2044
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:  .LBB43_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v5, v3
+; GFX942-NEXT:    v_pk_add_f16 v4, v5, v2 neg_lo:[0,1] neg_hi:[0,1]
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2044 sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB43_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v0, v3
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fsub_ret_v2f16__offset12b_pos:
 ; GFX11:       ; %bb.0:
@@ -12399,35 +12399,35 @@ define <2 x half> @flat_agent_atomic_fsub_ret_v2f16__offset12b_neg(ptr %ptr, <2
 ; GFX12-NEXT:    v_mov_b32_e32 v0, v3
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fsub_ret_v2f16__offset12b_neg:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v4, v0
-; GFX940-NEXT:    v_mov_b32_e32 v5, v1
-; GFX940-NEXT:    v_add_co_u32_e32 v0, vcc, 0xfffff800, v4
-; GFX940-NEXT:    s_movk_i32 s0, 0xf800
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v5, vcc
-; GFX940-NEXT:    flat_load_dword v0, v[0:1]
-; GFX940-NEXT:    s_mov_b32 s1, -1
-; GFX940-NEXT:    v_lshl_add_u64 v[4:5], v[4:5], 0, s[0:1]
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:  .LBB44_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v1, v0
-; GFX940-NEXT:    v_pk_add_f16 v0, v1, v2 neg_lo:[0,1] neg_hi:[0,1]
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v0, v[4:5], v[0:1] sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v0, v1
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB44_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fsub_ret_v2f16__offset12b_neg:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v4, v0
+; GFX942-NEXT:    v_mov_b32_e32 v5, v1
+; GFX942-NEXT:    v_add_co_u32_e32 v0, vcc, 0xfffff800, v4
+; GFX942-NEXT:    s_movk_i32 s0, 0xf800
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v5, vcc
+; GFX942-NEXT:    flat_load_dword v0, v[0:1]
+; GFX942-NEXT:    s_mov_b32 s1, -1
+; GFX942-NEXT:    v_lshl_add_u64 v[4:5], v[4:5], 0, s[0:1]
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:  .LBB44_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v1, v0
+; GFX942-NEXT:    v_pk_add_f16 v0, v1, v2 neg_lo:[0,1] neg_hi:[0,1]
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v0, v[4:5], v[0:1] sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v0, v1
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB44_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fsub_ret_v2f16__offset12b_neg:
 ; GFX11:       ; %bb.0:
@@ -12638,27 +12638,27 @@ define void @flat_agent_atomic_fsub_noret_v2f16(ptr %ptr, <2 x half> %val) #0 {
 ; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s0
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fsub_noret_v2f16:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    flat_load_dword v5, v[0:1]
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:  .LBB45_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_pk_add_f16 v4, v5, v2 neg_lo:[0,1] neg_hi:[0,1]
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v3, v[0:1], v[4:5] sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v5, v3
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB45_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fsub_noret_v2f16:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    flat_load_dword v5, v[0:1]
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:  .LBB45_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_pk_add_f16 v4, v5, v2 neg_lo:[0,1] neg_hi:[0,1]
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v3, v[0:1], v[4:5] sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v5, v3
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB45_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fsub_noret_v2f16:
 ; GFX11:       ; %bb.0:
@@ -12847,27 +12847,27 @@ define void @flat_agent_atomic_fsub_noret_v2f16__offset12b_pos(ptr %ptr, <2 x ha
 ; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s0
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fsub_noret_v2f16__offset12b_pos:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    flat_load_dword v5, v[0:1] offset:2044
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:  .LBB46_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_pk_add_f16 v4, v5, v2 neg_lo:[0,1] neg_hi:[0,1]
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2044 sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v5, v3
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB46_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fsub_noret_v2f16__offset12b_pos:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    flat_load_dword v5, v[0:1] offset:2044
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:  .LBB46_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_pk_add_f16 v4, v5, v2 neg_lo:[0,1] neg_hi:[0,1]
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2044 sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v5, v3
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB46_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fsub_noret_v2f16__offset12b_pos:
 ; GFX11:       ; %bb.0:
@@ -13063,33 +13063,33 @@ define void @flat_agent_atomic_fsub_noret_v2f16__offset12b_neg(ptr %ptr, <2 x ha
 ; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s0
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fsub_noret_v2f16__offset12b_neg:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_add_co_u32_e32 v4, vcc, 0xfffff800, v0
-; GFX940-NEXT:    s_movk_i32 s0, 0xf800
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_addc_co_u32_e32 v5, vcc, -1, v1, vcc
-; GFX940-NEXT:    flat_load_dword v5, v[4:5]
-; GFX940-NEXT:    s_mov_b32 s1, -1
-; GFX940-NEXT:    v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1]
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:  .LBB47_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_pk_add_f16 v4, v5, v2 neg_lo:[0,1] neg_hi:[0,1]
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v3, v[0:1], v[4:5] sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v5, v3
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB47_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fsub_noret_v2f16__offset12b_neg:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_add_co_u32_e32 v4, vcc, 0xfffff800, v0
+; GFX942-NEXT:    s_movk_i32 s0, 0xf800
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_addc_co_u32_e32 v5, vcc, -1, v1, vcc
+; GFX942-NEXT:    flat_load_dword v5, v[4:5]
+; GFX942-NEXT:    s_mov_b32 s1, -1
+; GFX942-NEXT:    v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1]
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:  .LBB47_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_pk_add_f16 v4, v5, v2 neg_lo:[0,1] neg_hi:[0,1]
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v3, v[0:1], v[4:5] sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v5, v3
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB47_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fsub_noret_v2f16__offset12b_neg:
 ; GFX11:       ; %bb.0:
@@ -13300,28 +13300,28 @@ define <2 x half> @flat_system_atomic_fsub_ret_v2f16__offset12b_pos(ptr %ptr, <2
 ; GFX12-NEXT:    v_mov_b32_e32 v0, v3
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_system_atomic_fsub_ret_v2f16__offset12b_pos:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    flat_load_dword v3, v[0:1] offset:2044
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:  .LBB48_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v5, v3
-; GFX940-NEXT:    v_pk_add_f16 v4, v5, v2 neg_lo:[0,1] neg_hi:[0,1]
-; GFX940-NEXT:    buffer_wbl2 sc0 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2044 sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc0 sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB48_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v0, v3
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_system_atomic_fsub_ret_v2f16__offset12b_pos:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    flat_load_dword v3, v[0:1] offset:2044
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:  .LBB48_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v5, v3
+; GFX942-NEXT:    v_pk_add_f16 v4, v5, v2 neg_lo:[0,1] neg_hi:[0,1]
+; GFX942-NEXT:    buffer_wbl2 sc0 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2044 sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc0 sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB48_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v0, v3
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_system_atomic_fsub_ret_v2f16__offset12b_pos:
 ; GFX11:       ; %bb.0:
@@ -13524,27 +13524,27 @@ define void @flat_system_atomic_fsub_noret_v2f16__offset12b_pos(ptr %ptr, <2 x h
 ; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s0
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_system_atomic_fsub_noret_v2f16__offset12b_pos:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    flat_load_dword v5, v[0:1] offset:2044
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:  .LBB49_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_pk_add_f16 v4, v5, v2 neg_lo:[0,1] neg_hi:[0,1]
-; GFX940-NEXT:    buffer_wbl2 sc0 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2044 sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc0 sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v5, v3
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB49_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_system_atomic_fsub_noret_v2f16__offset12b_pos:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    flat_load_dword v5, v[0:1] offset:2044
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:  .LBB49_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_pk_add_f16 v4, v5, v2 neg_lo:[0,1] neg_hi:[0,1]
+; GFX942-NEXT:    buffer_wbl2 sc0 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2044 sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc0 sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v5, v3
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB49_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_system_atomic_fsub_noret_v2f16__offset12b_pos:
 ; GFX11:       ; %bb.0:
@@ -13769,47 +13769,47 @@ define <2 x bfloat> @flat_agent_atomic_fsub_ret_v2bf16(ptr %ptr, <2 x bfloat> %v
 ; GFX12-NEXT:    v_mov_b32_e32 v0, v3
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fsub_ret_v2bf16:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    flat_load_dword v3, v[0:1]
-; GFX940-NEXT:    s_mov_b64 s[2:3], 0
-; GFX940-NEXT:    v_lshlrev_b32_e32 v4, 16, v2
-; GFX940-NEXT:    s_movk_i32 s4, 0x7fff
-; GFX940-NEXT:    v_and_b32_e32 v2, 0xffff0000, v2
-; GFX940-NEXT:    s_mov_b32 s5, 0x7060302
-; GFX940-NEXT:  .LBB50_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v7, v3
-; GFX940-NEXT:    v_lshlrev_b32_e32 v3, 16, v7
-; GFX940-NEXT:    v_and_b32_e32 v5, 0xffff0000, v7
-; GFX940-NEXT:    v_sub_f32_e32 v3, v3, v4
-; GFX940-NEXT:    v_sub_f32_e32 v5, v5, v2
-; GFX940-NEXT:    v_bfe_u32 v6, v3, 16, 1
-; GFX940-NEXT:    v_bfe_u32 v9, v5, 16, 1
-; GFX940-NEXT:    v_or_b32_e32 v8, 0x400000, v3
-; GFX940-NEXT:    v_or_b32_e32 v10, 0x400000, v5
-; GFX940-NEXT:    v_add3_u32 v6, v6, v3, s4
-; GFX940-NEXT:    v_add3_u32 v9, v9, v5, s4
-; GFX940-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
-; GFX940-NEXT:    v_cmp_u_f32_e64 s[0:1], v3, v3
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cndmask_b32_e32 v5, v9, v10, vcc
-; GFX940-NEXT:    v_cndmask_b32_e64 v3, v6, v8, s[0:1]
-; GFX940-NEXT:    v_perm_b32 v6, v5, v3, s5
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v3, v[0:1], v[6:7] sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v7
-; GFX940-NEXT:    s_or_b64 s[2:3], vcc, s[2:3]
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[2:3]
-; GFX940-NEXT:    s_cbranch_execnz .LBB50_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[2:3]
-; GFX940-NEXT:    v_mov_b32_e32 v0, v3
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fsub_ret_v2bf16:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    flat_load_dword v3, v[0:1]
+; GFX942-NEXT:    s_mov_b64 s[2:3], 0
+; GFX942-NEXT:    v_lshlrev_b32_e32 v4, 16, v2
+; GFX942-NEXT:    s_movk_i32 s4, 0x7fff
+; GFX942-NEXT:    v_and_b32_e32 v2, 0xffff0000, v2
+; GFX942-NEXT:    s_mov_b32 s5, 0x7060302
+; GFX942-NEXT:  .LBB50_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v7, v3
+; GFX942-NEXT:    v_lshlrev_b32_e32 v3, 16, v7
+; GFX942-NEXT:    v_and_b32_e32 v5, 0xffff0000, v7
+; GFX942-NEXT:    v_sub_f32_e32 v3, v3, v4
+; GFX942-NEXT:    v_sub_f32_e32 v5, v5, v2
+; GFX942-NEXT:    v_bfe_u32 v6, v3, 16, 1
+; GFX942-NEXT:    v_bfe_u32 v9, v5, 16, 1
+; GFX942-NEXT:    v_or_b32_e32 v8, 0x400000, v3
+; GFX942-NEXT:    v_or_b32_e32 v10, 0x400000, v5
+; GFX942-NEXT:    v_add3_u32 v6, v6, v3, s4
+; GFX942-NEXT:    v_add3_u32 v9, v9, v5, s4
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
+; GFX942-NEXT:    v_cmp_u_f32_e64 s[0:1], v3, v3
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v5, v9, v10, vcc
+; GFX942-NEXT:    v_cndmask_b32_e64 v3, v6, v8, s[0:1]
+; GFX942-NEXT:    v_perm_b32 v6, v5, v3, s5
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v3, v[0:1], v[6:7] sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v7
+; GFX942-NEXT:    s_or_b64 s[2:3], vcc, s[2:3]
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[2:3]
+; GFX942-NEXT:    s_cbranch_execnz .LBB50_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[2:3]
+; GFX942-NEXT:    v_mov_b32_e32 v0, v3
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fsub_ret_v2bf16:
 ; GFX11:       ; %bb.0:
@@ -14115,47 +14115,47 @@ define <2 x bfloat> @flat_agent_atomic_fsub_ret_v2bf16__offset12b_pos(ptr %ptr,
 ; GFX12-NEXT:    v_mov_b32_e32 v0, v3
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fsub_ret_v2bf16__offset12b_pos:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    flat_load_dword v3, v[0:1] offset:2044
-; GFX940-NEXT:    s_mov_b64 s[2:3], 0
-; GFX940-NEXT:    v_lshlrev_b32_e32 v4, 16, v2
-; GFX940-NEXT:    s_movk_i32 s4, 0x7fff
-; GFX940-NEXT:    v_and_b32_e32 v2, 0xffff0000, v2
-; GFX940-NEXT:    s_mov_b32 s5, 0x7060302
-; GFX940-NEXT:  .LBB51_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v7, v3
-; GFX940-NEXT:    v_lshlrev_b32_e32 v3, 16, v7
-; GFX940-NEXT:    v_and_b32_e32 v5, 0xffff0000, v7
-; GFX940-NEXT:    v_sub_f32_e32 v3, v3, v4
-; GFX940-NEXT:    v_sub_f32_e32 v5, v5, v2
-; GFX940-NEXT:    v_bfe_u32 v6, v3, 16, 1
-; GFX940-NEXT:    v_bfe_u32 v9, v5, 16, 1
-; GFX940-NEXT:    v_or_b32_e32 v8, 0x400000, v3
-; GFX940-NEXT:    v_or_b32_e32 v10, 0x400000, v5
-; GFX940-NEXT:    v_add3_u32 v6, v6, v3, s4
-; GFX940-NEXT:    v_add3_u32 v9, v9, v5, s4
-; GFX940-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
-; GFX940-NEXT:    v_cmp_u_f32_e64 s[0:1], v3, v3
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cndmask_b32_e32 v5, v9, v10, vcc
-; GFX940-NEXT:    v_cndmask_b32_e64 v3, v6, v8, s[0:1]
-; GFX940-NEXT:    v_perm_b32 v6, v5, v3, s5
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v3, v[0:1], v[6:7] offset:2044 sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v7
-; GFX940-NEXT:    s_or_b64 s[2:3], vcc, s[2:3]
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[2:3]
-; GFX940-NEXT:    s_cbranch_execnz .LBB51_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[2:3]
-; GFX940-NEXT:    v_mov_b32_e32 v0, v3
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fsub_ret_v2bf16__offset12b_pos:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    flat_load_dword v3, v[0:1] offset:2044
+; GFX942-NEXT:    s_mov_b64 s[2:3], 0
+; GFX942-NEXT:    v_lshlrev_b32_e32 v4, 16, v2
+; GFX942-NEXT:    s_movk_i32 s4, 0x7fff
+; GFX942-NEXT:    v_and_b32_e32 v2, 0xffff0000, v2
+; GFX942-NEXT:    s_mov_b32 s5, 0x7060302
+; GFX942-NEXT:  .LBB51_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v7, v3
+; GFX942-NEXT:    v_lshlrev_b32_e32 v3, 16, v7
+; GFX942-NEXT:    v_and_b32_e32 v5, 0xffff0000, v7
+; GFX942-NEXT:    v_sub_f32_e32 v3, v3, v4
+; GFX942-NEXT:    v_sub_f32_e32 v5, v5, v2
+; GFX942-NEXT:    v_bfe_u32 v6, v3, 16, 1
+; GFX942-NEXT:    v_bfe_u32 v9, v5, 16, 1
+; GFX942-NEXT:    v_or_b32_e32 v8, 0x400000, v3
+; GFX942-NEXT:    v_or_b32_e32 v10, 0x400000, v5
+; GFX942-NEXT:    v_add3_u32 v6, v6, v3, s4
+; GFX942-NEXT:    v_add3_u32 v9, v9, v5, s4
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
+; GFX942-NEXT:    v_cmp_u_f32_e64 s[0:1], v3, v3
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v5, v9, v10, vcc
+; GFX942-NEXT:    v_cndmask_b32_e64 v3, v6, v8, s[0:1]
+; GFX942-NEXT:    v_perm_b32 v6, v5, v3, s5
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v3, v[0:1], v[6:7] offset:2044 sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v7
+; GFX942-NEXT:    s_or_b64 s[2:3], vcc, s[2:3]
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[2:3]
+; GFX942-NEXT:    s_cbranch_execnz .LBB51_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[2:3]
+; GFX942-NEXT:    v_mov_b32_e32 v0, v3
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fsub_ret_v2bf16__offset12b_pos:
 ; GFX11:       ; %bb.0:
@@ -14464,54 +14464,54 @@ define <2 x bfloat> @flat_agent_atomic_fsub_ret_v2bf16__offset12b_neg(ptr %ptr,
 ; GFX12-NEXT:    v_mov_b32_e32 v0, v3
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fsub_ret_v2bf16__offset12b_neg:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v4, v0
-; GFX940-NEXT:    v_mov_b32_e32 v5, v1
-; GFX940-NEXT:    v_add_co_u32_e32 v0, vcc, 0xfffff800, v4
-; GFX940-NEXT:    s_movk_i32 s0, 0xf800
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v5, vcc
-; GFX940-NEXT:    flat_load_dword v0, v[0:1]
-; GFX940-NEXT:    s_mov_b32 s1, -1
-; GFX940-NEXT:    v_lshl_add_u64 v[4:5], v[4:5], 0, s[0:1]
-; GFX940-NEXT:    s_mov_b64 s[2:3], 0
-; GFX940-NEXT:    v_lshlrev_b32_e32 v1, 16, v2
-; GFX940-NEXT:    s_movk_i32 s4, 0x7fff
-; GFX940-NEXT:    v_and_b32_e32 v2, 0xffff0000, v2
-; GFX940-NEXT:    s_mov_b32 s5, 0x7060302
-; GFX940-NEXT:  .LBB52_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v7, v0
-; GFX940-NEXT:    v_lshlrev_b32_e32 v0, 16, v7
-; GFX940-NEXT:    v_and_b32_e32 v3, 0xffff0000, v7
-; GFX940-NEXT:    v_sub_f32_e32 v0, v0, v1
-; GFX940-NEXT:    v_sub_f32_e32 v3, v3, v2
-; GFX940-NEXT:    v_bfe_u32 v6, v0, 16, 1
-; GFX940-NEXT:    v_bfe_u32 v9, v3, 16, 1
-; GFX940-NEXT:    v_or_b32_e32 v8, 0x400000, v0
-; GFX940-NEXT:    v_or_b32_e32 v10, 0x400000, v3
-; GFX940-NEXT:    v_add3_u32 v6, v6, v0, s4
-; GFX940-NEXT:    v_add3_u32 v9, v9, v3, s4
-; GFX940-NEXT:    v_cmp_u_f32_e32 vcc, v3, v3
-; GFX940-NEXT:    v_cmp_u_f32_e64 s[0:1], v0, v0
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cndmask_b32_e32 v3, v9, v10, vcc
-; GFX940-NEXT:    v_cndmask_b32_e64 v0, v6, v8, s[0:1]
-; GFX940-NEXT:    v_perm_b32 v6, v3, v0, s5
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v0, v[4:5], v[6:7] sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v0, v7
-; GFX940-NEXT:    s_or_b64 s[2:3], vcc, s[2:3]
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[2:3]
-; GFX940-NEXT:    s_cbranch_execnz .LBB52_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[2:3]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fsub_ret_v2bf16__offset12b_neg:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v4, v0
+; GFX942-NEXT:    v_mov_b32_e32 v5, v1
+; GFX942-NEXT:    v_add_co_u32_e32 v0, vcc, 0xfffff800, v4
+; GFX942-NEXT:    s_movk_i32 s0, 0xf800
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v5, vcc
+; GFX942-NEXT:    flat_load_dword v0, v[0:1]
+; GFX942-NEXT:    s_mov_b32 s1, -1
+; GFX942-NEXT:    v_lshl_add_u64 v[4:5], v[4:5], 0, s[0:1]
+; GFX942-NEXT:    s_mov_b64 s[2:3], 0
+; GFX942-NEXT:    v_lshlrev_b32_e32 v1, 16, v2
+; GFX942-NEXT:    s_movk_i32 s4, 0x7fff
+; GFX942-NEXT:    v_and_b32_e32 v2, 0xffff0000, v2
+; GFX942-NEXT:    s_mov_b32 s5, 0x7060302
+; GFX942-NEXT:  .LBB52_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v7, v0
+; GFX942-NEXT:    v_lshlrev_b32_e32 v0, 16, v7
+; GFX942-NEXT:    v_and_b32_e32 v3, 0xffff0000, v7
+; GFX942-NEXT:    v_sub_f32_e32 v0, v0, v1
+; GFX942-NEXT:    v_sub_f32_e32 v3, v3, v2
+; GFX942-NEXT:    v_bfe_u32 v6, v0, 16, 1
+; GFX942-NEXT:    v_bfe_u32 v9, v3, 16, 1
+; GFX942-NEXT:    v_or_b32_e32 v8, 0x400000, v0
+; GFX942-NEXT:    v_or_b32_e32 v10, 0x400000, v3
+; GFX942-NEXT:    v_add3_u32 v6, v6, v0, s4
+; GFX942-NEXT:    v_add3_u32 v9, v9, v3, s4
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v3, v3
+; GFX942-NEXT:    v_cmp_u_f32_e64 s[0:1], v0, v0
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v9, v10, vcc
+; GFX942-NEXT:    v_cndmask_b32_e64 v0, v6, v8, s[0:1]
+; GFX942-NEXT:    v_perm_b32 v6, v3, v0, s5
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v0, v[4:5], v[6:7] sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v0, v7
+; GFX942-NEXT:    s_or_b64 s[2:3], vcc, s[2:3]
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[2:3]
+; GFX942-NEXT:    s_cbranch_execnz .LBB52_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[2:3]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fsub_ret_v2bf16__offset12b_neg:
 ; GFX11:       ; %bb.0:
@@ -14830,46 +14830,46 @@ define void @flat_agent_atomic_fsub_noret_v2bf16(ptr %ptr, <2 x bfloat> %val) #0
 ; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s1
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fsub_noret_v2bf16:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    flat_load_dword v3, v[0:1]
-; GFX940-NEXT:    s_mov_b64 s[2:3], 0
-; GFX940-NEXT:    v_lshlrev_b32_e32 v4, 16, v2
-; GFX940-NEXT:    s_movk_i32 s4, 0x7fff
-; GFX940-NEXT:    v_and_b32_e32 v5, 0xffff0000, v2
-; GFX940-NEXT:    s_mov_b32 s5, 0x7060302
-; GFX940-NEXT:  .LBB53_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_lshlrev_b32_e32 v2, 16, v3
-; GFX940-NEXT:    v_and_b32_e32 v6, 0xffff0000, v3
-; GFX940-NEXT:    v_sub_f32_e32 v2, v2, v4
-; GFX940-NEXT:    v_sub_f32_e32 v6, v6, v5
-; GFX940-NEXT:    v_bfe_u32 v7, v2, 16, 1
-; GFX940-NEXT:    v_bfe_u32 v9, v6, 16, 1
-; GFX940-NEXT:    v_or_b32_e32 v8, 0x400000, v2
-; GFX940-NEXT:    v_or_b32_e32 v10, 0x400000, v6
-; GFX940-NEXT:    v_add3_u32 v7, v7, v2, s4
-; GFX940-NEXT:    v_add3_u32 v9, v9, v6, s4
-; GFX940-NEXT:    v_cmp_u_f32_e32 vcc, v6, v6
-; GFX940-NEXT:    v_cmp_u_f32_e64 s[0:1], v2, v2
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cndmask_b32_e32 v6, v9, v10, vcc
-; GFX940-NEXT:    v_cndmask_b32_e64 v2, v7, v8, s[0:1]
-; GFX940-NEXT:    v_perm_b32 v2, v6, v2, s5
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
-; GFX940-NEXT:    s_or_b64 s[2:3], vcc, s[2:3]
-; GFX940-NEXT:    v_mov_b32_e32 v3, v2
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[2:3]
-; GFX940-NEXT:    s_cbranch_execnz .LBB53_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[2:3]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fsub_noret_v2bf16:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    flat_load_dword v3, v[0:1]
+; GFX942-NEXT:    s_mov_b64 s[2:3], 0
+; GFX942-NEXT:    v_lshlrev_b32_e32 v4, 16, v2
+; GFX942-NEXT:    s_movk_i32 s4, 0x7fff
+; GFX942-NEXT:    v_and_b32_e32 v5, 0xffff0000, v2
+; GFX942-NEXT:    s_mov_b32 s5, 0x7060302
+; GFX942-NEXT:  .LBB53_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_lshlrev_b32_e32 v2, 16, v3
+; GFX942-NEXT:    v_and_b32_e32 v6, 0xffff0000, v3
+; GFX942-NEXT:    v_sub_f32_e32 v2, v2, v4
+; GFX942-NEXT:    v_sub_f32_e32 v6, v6, v5
+; GFX942-NEXT:    v_bfe_u32 v7, v2, 16, 1
+; GFX942-NEXT:    v_bfe_u32 v9, v6, 16, 1
+; GFX942-NEXT:    v_or_b32_e32 v8, 0x400000, v2
+; GFX942-NEXT:    v_or_b32_e32 v10, 0x400000, v6
+; GFX942-NEXT:    v_add3_u32 v7, v7, v2, s4
+; GFX942-NEXT:    v_add3_u32 v9, v9, v6, s4
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v6, v6
+; GFX942-NEXT:    v_cmp_u_f32_e64 s[0:1], v2, v2
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v6, v9, v10, vcc
+; GFX942-NEXT:    v_cndmask_b32_e64 v2, v7, v8, s[0:1]
+; GFX942-NEXT:    v_perm_b32 v2, v6, v2, s5
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX942-NEXT:    s_or_b64 s[2:3], vcc, s[2:3]
+; GFX942-NEXT:    v_mov_b32_e32 v3, v2
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[2:3]
+; GFX942-NEXT:    s_cbranch_execnz .LBB53_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[2:3]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fsub_noret_v2bf16:
 ; GFX11:       ; %bb.0:
@@ -15166,46 +15166,46 @@ define void @flat_agent_atomic_fsub_noret_v2bf16__offset12b_pos(ptr %ptr, <2 x b
 ; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s1
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fsub_noret_v2bf16__offset12b_pos:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    flat_load_dword v3, v[0:1] offset:2044
-; GFX940-NEXT:    s_mov_b64 s[2:3], 0
-; GFX940-NEXT:    v_lshlrev_b32_e32 v4, 16, v2
-; GFX940-NEXT:    s_movk_i32 s4, 0x7fff
-; GFX940-NEXT:    v_and_b32_e32 v5, 0xffff0000, v2
-; GFX940-NEXT:    s_mov_b32 s5, 0x7060302
-; GFX940-NEXT:  .LBB54_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_lshlrev_b32_e32 v2, 16, v3
-; GFX940-NEXT:    v_and_b32_e32 v6, 0xffff0000, v3
-; GFX940-NEXT:    v_sub_f32_e32 v2, v2, v4
-; GFX940-NEXT:    v_sub_f32_e32 v6, v6, v5
-; GFX940-NEXT:    v_bfe_u32 v7, v2, 16, 1
-; GFX940-NEXT:    v_bfe_u32 v9, v6, 16, 1
-; GFX940-NEXT:    v_or_b32_e32 v8, 0x400000, v2
-; GFX940-NEXT:    v_or_b32_e32 v10, 0x400000, v6
-; GFX940-NEXT:    v_add3_u32 v7, v7, v2, s4
-; GFX940-NEXT:    v_add3_u32 v9, v9, v6, s4
-; GFX940-NEXT:    v_cmp_u_f32_e32 vcc, v6, v6
-; GFX940-NEXT:    v_cmp_u_f32_e64 s[0:1], v2, v2
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cndmask_b32_e32 v6, v9, v10, vcc
-; GFX940-NEXT:    v_cndmask_b32_e64 v2, v7, v8, s[0:1]
-; GFX940-NEXT:    v_perm_b32 v2, v6, v2, s5
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2044 sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
-; GFX940-NEXT:    s_or_b64 s[2:3], vcc, s[2:3]
-; GFX940-NEXT:    v_mov_b32_e32 v3, v2
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[2:3]
-; GFX940-NEXT:    s_cbranch_execnz .LBB54_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[2:3]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fsub_noret_v2bf16__offset12b_pos:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    flat_load_dword v3, v[0:1] offset:2044
+; GFX942-NEXT:    s_mov_b64 s[2:3], 0
+; GFX942-NEXT:    v_lshlrev_b32_e32 v4, 16, v2
+; GFX942-NEXT:    s_movk_i32 s4, 0x7fff
+; GFX942-NEXT:    v_and_b32_e32 v5, 0xffff0000, v2
+; GFX942-NEXT:    s_mov_b32 s5, 0x7060302
+; GFX942-NEXT:  .LBB54_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_lshlrev_b32_e32 v2, 16, v3
+; GFX942-NEXT:    v_and_b32_e32 v6, 0xffff0000, v3
+; GFX942-NEXT:    v_sub_f32_e32 v2, v2, v4
+; GFX942-NEXT:    v_sub_f32_e32 v6, v6, v5
+; GFX942-NEXT:    v_bfe_u32 v7, v2, 16, 1
+; GFX942-NEXT:    v_bfe_u32 v9, v6, 16, 1
+; GFX942-NEXT:    v_or_b32_e32 v8, 0x400000, v2
+; GFX942-NEXT:    v_or_b32_e32 v10, 0x400000, v6
+; GFX942-NEXT:    v_add3_u32 v7, v7, v2, s4
+; GFX942-NEXT:    v_add3_u32 v9, v9, v6, s4
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v6, v6
+; GFX942-NEXT:    v_cmp_u_f32_e64 s[0:1], v2, v2
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v6, v9, v10, vcc
+; GFX942-NEXT:    v_cndmask_b32_e64 v2, v7, v8, s[0:1]
+; GFX942-NEXT:    v_perm_b32 v2, v6, v2, s5
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2044 sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX942-NEXT:    s_or_b64 s[2:3], vcc, s[2:3]
+; GFX942-NEXT:    v_mov_b32_e32 v3, v2
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[2:3]
+; GFX942-NEXT:    s_cbranch_execnz .LBB54_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[2:3]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fsub_noret_v2bf16__offset12b_pos:
 ; GFX11:       ; %bb.0:
@@ -15509,52 +15509,52 @@ define void @flat_agent_atomic_fsub_noret_v2bf16__offset12b_neg(ptr %ptr, <2 x b
 ; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s1
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_agent_atomic_fsub_noret_v2bf16__offset12b_neg:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_add_co_u32_e32 v4, vcc, 0xfffff800, v0
-; GFX940-NEXT:    s_movk_i32 s0, 0xf800
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_addc_co_u32_e32 v5, vcc, -1, v1, vcc
-; GFX940-NEXT:    flat_load_dword v3, v[4:5]
-; GFX940-NEXT:    s_mov_b32 s1, -1
-; GFX940-NEXT:    v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1]
-; GFX940-NEXT:    s_mov_b64 s[2:3], 0
-; GFX940-NEXT:    v_lshlrev_b32_e32 v4, 16, v2
-; GFX940-NEXT:    s_movk_i32 s4, 0x7fff
-; GFX940-NEXT:    v_and_b32_e32 v5, 0xffff0000, v2
-; GFX940-NEXT:    s_mov_b32 s5, 0x7060302
-; GFX940-NEXT:  .LBB55_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_lshlrev_b32_e32 v2, 16, v3
-; GFX940-NEXT:    v_and_b32_e32 v6, 0xffff0000, v3
-; GFX940-NEXT:    v_sub_f32_e32 v2, v2, v4
-; GFX940-NEXT:    v_sub_f32_e32 v6, v6, v5
-; GFX940-NEXT:    v_bfe_u32 v7, v2, 16, 1
-; GFX940-NEXT:    v_bfe_u32 v9, v6, 16, 1
-; GFX940-NEXT:    v_or_b32_e32 v8, 0x400000, v2
-; GFX940-NEXT:    v_or_b32_e32 v10, 0x400000, v6
-; GFX940-NEXT:    v_add3_u32 v7, v7, v2, s4
-; GFX940-NEXT:    v_add3_u32 v9, v9, v6, s4
-; GFX940-NEXT:    v_cmp_u_f32_e32 vcc, v6, v6
-; GFX940-NEXT:    v_cmp_u_f32_e64 s[0:1], v2, v2
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cndmask_b32_e32 v6, v9, v10, vcc
-; GFX940-NEXT:    v_cndmask_b32_e64 v2, v7, v8, s[0:1]
-; GFX940-NEXT:    v_perm_b32 v2, v6, v2, s5
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
-; GFX940-NEXT:    s_or_b64 s[2:3], vcc, s[2:3]
-; GFX940-NEXT:    v_mov_b32_e32 v3, v2
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[2:3]
-; GFX940-NEXT:    s_cbranch_execnz .LBB55_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[2:3]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_agent_atomic_fsub_noret_v2bf16__offset12b_neg:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_add_co_u32_e32 v4, vcc, 0xfffff800, v0
+; GFX942-NEXT:    s_movk_i32 s0, 0xf800
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_addc_co_u32_e32 v5, vcc, -1, v1, vcc
+; GFX942-NEXT:    flat_load_dword v3, v[4:5]
+; GFX942-NEXT:    s_mov_b32 s1, -1
+; GFX942-NEXT:    v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1]
+; GFX942-NEXT:    s_mov_b64 s[2:3], 0
+; GFX942-NEXT:    v_lshlrev_b32_e32 v4, 16, v2
+; GFX942-NEXT:    s_movk_i32 s4, 0x7fff
+; GFX942-NEXT:    v_and_b32_e32 v5, 0xffff0000, v2
+; GFX942-NEXT:    s_mov_b32 s5, 0x7060302
+; GFX942-NEXT:  .LBB55_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_lshlrev_b32_e32 v2, 16, v3
+; GFX942-NEXT:    v_and_b32_e32 v6, 0xffff0000, v3
+; GFX942-NEXT:    v_sub_f32_e32 v2, v2, v4
+; GFX942-NEXT:    v_sub_f32_e32 v6, v6, v5
+; GFX942-NEXT:    v_bfe_u32 v7, v2, 16, 1
+; GFX942-NEXT:    v_bfe_u32 v9, v6, 16, 1
+; GFX942-NEXT:    v_or_b32_e32 v8, 0x400000, v2
+; GFX942-NEXT:    v_or_b32_e32 v10, 0x400000, v6
+; GFX942-NEXT:    v_add3_u32 v7, v7, v2, s4
+; GFX942-NEXT:    v_add3_u32 v9, v9, v6, s4
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v6, v6
+; GFX942-NEXT:    v_cmp_u_f32_e64 s[0:1], v2, v2
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v6, v9, v10, vcc
+; GFX942-NEXT:    v_cndmask_b32_e64 v2, v7, v8, s[0:1]
+; GFX942-NEXT:    v_perm_b32 v2, v6, v2, s5
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX942-NEXT:    s_or_b64 s[2:3], vcc, s[2:3]
+; GFX942-NEXT:    v_mov_b32_e32 v3, v2
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[2:3]
+; GFX942-NEXT:    s_cbranch_execnz .LBB55_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[2:3]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_agent_atomic_fsub_noret_v2bf16__offset12b_neg:
 ; GFX11:       ; %bb.0:
@@ -15872,47 +15872,47 @@ define <2 x bfloat> @flat_system_atomic_fsub_ret_v2bf16__offset12b_pos(ptr %ptr,
 ; GFX12-NEXT:    v_mov_b32_e32 v0, v3
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_system_atomic_fsub_ret_v2bf16__offset12b_pos:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    flat_load_dword v3, v[0:1] offset:2044
-; GFX940-NEXT:    s_mov_b64 s[2:3], 0
-; GFX940-NEXT:    v_lshlrev_b32_e32 v4, 16, v2
-; GFX940-NEXT:    s_movk_i32 s4, 0x7fff
-; GFX940-NEXT:    v_and_b32_e32 v2, 0xffff0000, v2
-; GFX940-NEXT:    s_mov_b32 s5, 0x7060302
-; GFX940-NEXT:  .LBB56_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v7, v3
-; GFX940-NEXT:    v_lshlrev_b32_e32 v3, 16, v7
-; GFX940-NEXT:    v_and_b32_e32 v5, 0xffff0000, v7
-; GFX940-NEXT:    v_sub_f32_e32 v3, v3, v4
-; GFX940-NEXT:    v_sub_f32_e32 v5, v5, v2
-; GFX940-NEXT:    v_bfe_u32 v6, v3, 16, 1
-; GFX940-NEXT:    v_bfe_u32 v9, v5, 16, 1
-; GFX940-NEXT:    v_or_b32_e32 v8, 0x400000, v3
-; GFX940-NEXT:    v_or_b32_e32 v10, 0x400000, v5
-; GFX940-NEXT:    v_add3_u32 v6, v6, v3, s4
-; GFX940-NEXT:    v_add3_u32 v9, v9, v5, s4
-; GFX940-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
-; GFX940-NEXT:    v_cmp_u_f32_e64 s[0:1], v3, v3
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cndmask_b32_e32 v5, v9, v10, vcc
-; GFX940-NEXT:    v_cndmask_b32_e64 v3, v6, v8, s[0:1]
-; GFX940-NEXT:    v_perm_b32 v6, v5, v3, s5
-; GFX940-NEXT:    buffer_wbl2 sc0 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v3, v[0:1], v[6:7] offset:2044 sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc0 sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v7
-; GFX940-NEXT:    s_or_b64 s[2:3], vcc, s[2:3]
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[2:3]
-; GFX940-NEXT:    s_cbranch_execnz .LBB56_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[2:3]
-; GFX940-NEXT:    v_mov_b32_e32 v0, v3
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_system_atomic_fsub_ret_v2bf16__offset12b_pos:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    flat_load_dword v3, v[0:1] offset:2044
+; GFX942-NEXT:    s_mov_b64 s[2:3], 0
+; GFX942-NEXT:    v_lshlrev_b32_e32 v4, 16, v2
+; GFX942-NEXT:    s_movk_i32 s4, 0x7fff
+; GFX942-NEXT:    v_and_b32_e32 v2, 0xffff0000, v2
+; GFX942-NEXT:    s_mov_b32 s5, 0x7060302
+; GFX942-NEXT:  .LBB56_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v7, v3
+; GFX942-NEXT:    v_lshlrev_b32_e32 v3, 16, v7
+; GFX942-NEXT:    v_and_b32_e32 v5, 0xffff0000, v7
+; GFX942-NEXT:    v_sub_f32_e32 v3, v3, v4
+; GFX942-NEXT:    v_sub_f32_e32 v5, v5, v2
+; GFX942-NEXT:    v_bfe_u32 v6, v3, 16, 1
+; GFX942-NEXT:    v_bfe_u32 v9, v5, 16, 1
+; GFX942-NEXT:    v_or_b32_e32 v8, 0x400000, v3
+; GFX942-NEXT:    v_or_b32_e32 v10, 0x400000, v5
+; GFX942-NEXT:    v_add3_u32 v6, v6, v3, s4
+; GFX942-NEXT:    v_add3_u32 v9, v9, v5, s4
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
+; GFX942-NEXT:    v_cmp_u_f32_e64 s[0:1], v3, v3
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v5, v9, v10, vcc
+; GFX942-NEXT:    v_cndmask_b32_e64 v3, v6, v8, s[0:1]
+; GFX942-NEXT:    v_perm_b32 v6, v5, v3, s5
+; GFX942-NEXT:    buffer_wbl2 sc0 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v3, v[0:1], v[6:7] offset:2044 sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc0 sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v7
+; GFX942-NEXT:    s_or_b64 s[2:3], vcc, s[2:3]
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[2:3]
+; GFX942-NEXT:    s_cbranch_execnz .LBB56_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[2:3]
+; GFX942-NEXT:    v_mov_b32_e32 v0, v3
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_system_atomic_fsub_ret_v2bf16__offset12b_pos:
 ; GFX11:       ; %bb.0:
@@ -16223,46 +16223,46 @@ define void @flat_system_atomic_fsub_noret_v2bf16__offset12b_pos(ptr %ptr, <2 x
 ; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s1
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_system_atomic_fsub_noret_v2bf16__offset12b_pos:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    flat_load_dword v3, v[0:1] offset:2044
-; GFX940-NEXT:    s_mov_b64 s[2:3], 0
-; GFX940-NEXT:    v_lshlrev_b32_e32 v4, 16, v2
-; GFX940-NEXT:    s_movk_i32 s4, 0x7fff
-; GFX940-NEXT:    v_and_b32_e32 v5, 0xffff0000, v2
-; GFX940-NEXT:    s_mov_b32 s5, 0x7060302
-; GFX940-NEXT:  .LBB57_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_lshlrev_b32_e32 v2, 16, v3
-; GFX940-NEXT:    v_and_b32_e32 v6, 0xffff0000, v3
-; GFX940-NEXT:    v_sub_f32_e32 v2, v2, v4
-; GFX940-NEXT:    v_sub_f32_e32 v6, v6, v5
-; GFX940-NEXT:    v_bfe_u32 v7, v2, 16, 1
-; GFX940-NEXT:    v_bfe_u32 v9, v6, 16, 1
-; GFX940-NEXT:    v_or_b32_e32 v8, 0x400000, v2
-; GFX940-NEXT:    v_or_b32_e32 v10, 0x400000, v6
-; GFX940-NEXT:    v_add3_u32 v7, v7, v2, s4
-; GFX940-NEXT:    v_add3_u32 v9, v9, v6, s4
-; GFX940-NEXT:    v_cmp_u_f32_e32 vcc, v6, v6
-; GFX940-NEXT:    v_cmp_u_f32_e64 s[0:1], v2, v2
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cndmask_b32_e32 v6, v9, v10, vcc
-; GFX940-NEXT:    v_cndmask_b32_e64 v2, v7, v8, s[0:1]
-; GFX940-NEXT:    v_perm_b32 v2, v6, v2, s5
-; GFX940-NEXT:    buffer_wbl2 sc0 sc1
-; GFX940-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2044 sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc0 sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
-; GFX940-NEXT:    s_or_b64 s[2:3], vcc, s[2:3]
-; GFX940-NEXT:    v_mov_b32_e32 v3, v2
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[2:3]
-; GFX940-NEXT:    s_cbranch_execnz .LBB57_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[2:3]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_system_atomic_fsub_noret_v2bf16__offset12b_pos:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    flat_load_dword v3, v[0:1] offset:2044
+; GFX942-NEXT:    s_mov_b64 s[2:3], 0
+; GFX942-NEXT:    v_lshlrev_b32_e32 v4, 16, v2
+; GFX942-NEXT:    s_movk_i32 s4, 0x7fff
+; GFX942-NEXT:    v_and_b32_e32 v5, 0xffff0000, v2
+; GFX942-NEXT:    s_mov_b32 s5, 0x7060302
+; GFX942-NEXT:  .LBB57_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_lshlrev_b32_e32 v2, 16, v3
+; GFX942-NEXT:    v_and_b32_e32 v6, 0xffff0000, v3
+; GFX942-NEXT:    v_sub_f32_e32 v2, v2, v4
+; GFX942-NEXT:    v_sub_f32_e32 v6, v6, v5
+; GFX942-NEXT:    v_bfe_u32 v7, v2, 16, 1
+; GFX942-NEXT:    v_bfe_u32 v9, v6, 16, 1
+; GFX942-NEXT:    v_or_b32_e32 v8, 0x400000, v2
+; GFX942-NEXT:    v_or_b32_e32 v10, 0x400000, v6
+; GFX942-NEXT:    v_add3_u32 v7, v7, v2, s4
+; GFX942-NEXT:    v_add3_u32 v9, v9, v6, s4
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v6, v6
+; GFX942-NEXT:    v_cmp_u_f32_e64 s[0:1], v2, v2
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v6, v9, v10, vcc
+; GFX942-NEXT:    v_cndmask_b32_e64 v2, v7, v8, s[0:1]
+; GFX942-NEXT:    v_perm_b32 v2, v6, v2, s5
+; GFX942-NEXT:    buffer_wbl2 sc0 sc1
+; GFX942-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2044 sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc0 sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX942-NEXT:    s_or_b64 s[2:3], vcc, s[2:3]
+; GFX942-NEXT:    v_mov_b32_e32 v3, v2
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[2:3]
+; GFX942-NEXT:    s_cbranch_execnz .LBB57_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[2:3]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: flat_system_atomic_fsub_noret_v2bf16__offset12b_pos:
 ; GFX11:       ; %bb.0:
diff --git llvm/test/CodeGen/AMDGPU/flat-scratch-svs.ll llvm/test/CodeGen/AMDGPU/flat-scratch-svs.ll
index ef3657433e8b..346b69c362c0 100644
--- llvm/test/CodeGen/AMDGPU/flat-scratch-svs.ll
+++ llvm/test/CodeGen/AMDGPU/flat-scratch-svs.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx940 < %s | FileCheck %s -check-prefixes=GFX940-SDAG
-; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx940 < %s | FileCheck %s -check-prefixes=GFX940-GISEL
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx942 < %s | FileCheck %s -check-prefixes=GFX942-SDAG
+; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx942 < %s | FileCheck %s -check-prefixes=GFX942-GISEL
 ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck %s -check-prefixes=GFX11-SDAG
 ; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck %s -check-prefixes=GFX11-GISEL
 ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck %s -check-prefixes=GFX12-SDAG
@@ -12,46 +12,46 @@
 declare i32 @llvm.amdgcn.workitem.id.x()
 
 define amdgpu_kernel void @soff1_voff1(i32 %soff) {
-; GFX940-SDAG-LABEL: soff1_voff1:
-; GFX940-SDAG:       ; %bb.0: ; %bb
-; GFX940-SDAG-NEXT:    s_load_dword s0, s[4:5], 0x24
-; GFX940-SDAG-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
-; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 1
-; GFX940-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-SDAG-NEXT:    v_add_u32_e32 v0, s0, v0
-; GFX940-SDAG-NEXT:    v_add_u32_e32 v2, 1, v0
-; GFX940-SDAG-NEXT:    v_add_u32_e32 v3, 2, v0
-; GFX940-SDAG-NEXT:    scratch_store_byte v2, v1, off sc0 sc1
-; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 2
-; GFX940-SDAG-NEXT:    scratch_store_byte v3, v1, off sc0 sc1
-; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-SDAG-NEXT:    v_add_u32_e32 v0, 4, v0
-; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 4
-; GFX940-SDAG-NEXT:    scratch_store_byte v0, v1, off sc0 sc1
-; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-SDAG-NEXT:    s_endpgm
+; GFX942-SDAG-LABEL: soff1_voff1:
+; GFX942-SDAG:       ; %bb.0: ; %bb
+; GFX942-SDAG-NEXT:    s_load_dword s0, s[4:5], 0x24
+; GFX942-SDAG-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
+; GFX942-SDAG-NEXT:    v_mov_b32_e32 v1, 1
+; GFX942-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-SDAG-NEXT:    v_add_u32_e32 v0, s0, v0
+; GFX942-SDAG-NEXT:    v_add_u32_e32 v2, 1, v0
+; GFX942-SDAG-NEXT:    v_add_u32_e32 v3, 2, v0
+; GFX942-SDAG-NEXT:    scratch_store_byte v2, v1, off sc0 sc1
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    v_mov_b32_e32 v1, 2
+; GFX942-SDAG-NEXT:    scratch_store_byte v3, v1, off sc0 sc1
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    v_add_u32_e32 v0, 4, v0
+; GFX942-SDAG-NEXT:    v_mov_b32_e32 v1, 4
+; GFX942-SDAG-NEXT:    scratch_store_byte v0, v1, off sc0 sc1
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    s_endpgm
 ;
-; GFX940-GISEL-LABEL: soff1_voff1:
-; GFX940-GISEL:       ; %bb.0: ; %bb
-; GFX940-GISEL-NEXT:    s_load_dword s0, s[4:5], 0x24
-; GFX940-GISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
-; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 1
-; GFX940-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-GISEL-NEXT:    s_add_u32 s0, 0, s0
-; GFX940-GISEL-NEXT:    v_add_u32_e32 v0, s0, v0
-; GFX940-GISEL-NEXT:    v_add_u32_e32 v2, 1, v0
-; GFX940-GISEL-NEXT:    v_add_u32_e32 v3, 2, v0
-; GFX940-GISEL-NEXT:    scratch_store_byte v2, v1, off sc0 sc1
-; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 2
-; GFX940-GISEL-NEXT:    scratch_store_byte v3, v1, off sc0 sc1
-; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-GISEL-NEXT:    v_add_u32_e32 v0, 4, v0
-; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 4
-; GFX940-GISEL-NEXT:    scratch_store_byte v0, v1, off sc0 sc1
-; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-GISEL-NEXT:    s_endpgm
+; GFX942-GISEL-LABEL: soff1_voff1:
+; GFX942-GISEL:       ; %bb.0: ; %bb
+; GFX942-GISEL-NEXT:    s_load_dword s0, s[4:5], 0x24
+; GFX942-GISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
+; GFX942-GISEL-NEXT:    v_mov_b32_e32 v1, 1
+; GFX942-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-GISEL-NEXT:    s_add_u32 s0, 0, s0
+; GFX942-GISEL-NEXT:    v_add_u32_e32 v0, s0, v0
+; GFX942-GISEL-NEXT:    v_add_u32_e32 v2, 1, v0
+; GFX942-GISEL-NEXT:    v_add_u32_e32 v3, 2, v0
+; GFX942-GISEL-NEXT:    scratch_store_byte v2, v1, off sc0 sc1
+; GFX942-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-GISEL-NEXT:    v_mov_b32_e32 v1, 2
+; GFX942-GISEL-NEXT:    scratch_store_byte v3, v1, off sc0 sc1
+; GFX942-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-GISEL-NEXT:    v_add_u32_e32 v0, 4, v0
+; GFX942-GISEL-NEXT:    v_mov_b32_e32 v1, 4
+; GFX942-GISEL-NEXT:    scratch_store_byte v0, v1, off sc0 sc1
+; GFX942-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-GISEL-NEXT:    s_endpgm
 ;
 ; GFX11-SDAG-LABEL: soff1_voff1:
 ; GFX11-SDAG:       ; %bb.0: ; %bb
@@ -140,48 +140,48 @@ bb:
 }
 
 define amdgpu_kernel void @soff1_voff2(i32 %soff) {
-; GFX940-SDAG-LABEL: soff1_voff2:
-; GFX940-SDAG:       ; %bb.0: ; %bb
-; GFX940-SDAG-NEXT:    s_load_dword s0, s[4:5], 0x24
-; GFX940-SDAG-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
-; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 1
-; GFX940-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-SDAG-NEXT:    v_mov_b32_e32 v2, s0
-; GFX940-SDAG-NEXT:    v_lshl_add_u32 v0, v0, 1, v2
-; GFX940-SDAG-NEXT:    v_add_u32_e32 v2, 1, v0
-; GFX940-SDAG-NEXT:    v_add_u32_e32 v3, 2, v0
-; GFX940-SDAG-NEXT:    scratch_store_byte v2, v1, off sc0 sc1
-; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 2
-; GFX940-SDAG-NEXT:    scratch_store_byte v3, v1, off sc0 sc1
-; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-SDAG-NEXT:    v_add_u32_e32 v0, 4, v0
-; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 4
-; GFX940-SDAG-NEXT:    scratch_store_byte v0, v1, off sc0 sc1
-; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-SDAG-NEXT:    s_endpgm
+; GFX942-SDAG-LABEL: soff1_voff2:
+; GFX942-SDAG:       ; %bb.0: ; %bb
+; GFX942-SDAG-NEXT:    s_load_dword s0, s[4:5], 0x24
+; GFX942-SDAG-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
+; GFX942-SDAG-NEXT:    v_mov_b32_e32 v1, 1
+; GFX942-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-SDAG-NEXT:    v_mov_b32_e32 v2, s0
+; GFX942-SDAG-NEXT:    v_lshl_add_u32 v0, v0, 1, v2
+; GFX942-SDAG-NEXT:    v_add_u32_e32 v2, 1, v0
+; GFX942-SDAG-NEXT:    v_add_u32_e32 v3, 2, v0
+; GFX942-SDAG-NEXT:    scratch_store_byte v2, v1, off sc0 sc1
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    v_mov_b32_e32 v1, 2
+; GFX942-SDAG-NEXT:    scratch_store_byte v3, v1, off sc0 sc1
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    v_add_u32_e32 v0, 4, v0
+; GFX942-SDAG-NEXT:    v_mov_b32_e32 v1, 4
+; GFX942-SDAG-NEXT:    scratch_store_byte v0, v1, off sc0 sc1
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    s_endpgm
 ;
-; GFX940-GISEL-LABEL: soff1_voff2:
-; GFX940-GISEL:       ; %bb.0: ; %bb
-; GFX940-GISEL-NEXT:    s_load_dword s0, s[4:5], 0x24
-; GFX940-GISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
-; GFX940-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
-; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 1
-; GFX940-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-GISEL-NEXT:    s_add_u32 s0, 0, s0
-; GFX940-GISEL-NEXT:    v_add_u32_e32 v0, s0, v0
-; GFX940-GISEL-NEXT:    v_add_u32_e32 v2, 1, v0
-; GFX940-GISEL-NEXT:    scratch_store_byte v2, v1, off sc0 sc1
-; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-GISEL-NEXT:    v_add_u32_e32 v1, 2, v0
-; GFX940-GISEL-NEXT:    v_mov_b32_e32 v2, 2
-; GFX940-GISEL-NEXT:    scratch_store_byte v1, v2, off sc0 sc1
-; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-GISEL-NEXT:    v_add_u32_e32 v0, 4, v0
-; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 4
-; GFX940-GISEL-NEXT:    scratch_store_byte v0, v1, off sc0 sc1
-; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-GISEL-NEXT:    s_endpgm
+; GFX942-GISEL-LABEL: soff1_voff2:
+; GFX942-GISEL:       ; %bb.0: ; %bb
+; GFX942-GISEL-NEXT:    s_load_dword s0, s[4:5], 0x24
+; GFX942-GISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
+; GFX942-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
+; GFX942-GISEL-NEXT:    v_mov_b32_e32 v1, 1
+; GFX942-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-GISEL-NEXT:    s_add_u32 s0, 0, s0
+; GFX942-GISEL-NEXT:    v_add_u32_e32 v0, s0, v0
+; GFX942-GISEL-NEXT:    v_add_u32_e32 v2, 1, v0
+; GFX942-GISEL-NEXT:    scratch_store_byte v2, v1, off sc0 sc1
+; GFX942-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-GISEL-NEXT:    v_add_u32_e32 v1, 2, v0
+; GFX942-GISEL-NEXT:    v_mov_b32_e32 v2, 2
+; GFX942-GISEL-NEXT:    scratch_store_byte v1, v2, off sc0 sc1
+; GFX942-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-GISEL-NEXT:    v_add_u32_e32 v0, 4, v0
+; GFX942-GISEL-NEXT:    v_mov_b32_e32 v1, 4
+; GFX942-GISEL-NEXT:    scratch_store_byte v0, v1, off sc0 sc1
+; GFX942-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-GISEL-NEXT:    s_endpgm
 ;
 ; GFX11-SDAG-LABEL: soff1_voff2:
 ; GFX11-SDAG:       ; %bb.0: ; %bb
@@ -276,48 +276,48 @@ bb:
 }
 
 define amdgpu_kernel void @soff1_voff4(i32 %soff) {
-; GFX940-SDAG-LABEL: soff1_voff4:
-; GFX940-SDAG:       ; %bb.0: ; %bb
-; GFX940-SDAG-NEXT:    s_load_dword s0, s[4:5], 0x24
-; GFX940-SDAG-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
-; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 1
-; GFX940-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-SDAG-NEXT:    v_mov_b32_e32 v2, s0
-; GFX940-SDAG-NEXT:    v_lshl_add_u32 v0, v0, 2, v2
-; GFX940-SDAG-NEXT:    v_add_u32_e32 v2, 1, v0
-; GFX940-SDAG-NEXT:    v_add_u32_e32 v3, 2, v0
-; GFX940-SDAG-NEXT:    scratch_store_byte v2, v1, off sc0 sc1
-; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 2
-; GFX940-SDAG-NEXT:    scratch_store_byte v3, v1, off sc0 sc1
-; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-SDAG-NEXT:    v_add_u32_e32 v0, 4, v0
-; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 4
-; GFX940-SDAG-NEXT:    scratch_store_byte v0, v1, off sc0 sc1
-; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-SDAG-NEXT:    s_endpgm
+; GFX942-SDAG-LABEL: soff1_voff4:
+; GFX942-SDAG:       ; %bb.0: ; %bb
+; GFX942-SDAG-NEXT:    s_load_dword s0, s[4:5], 0x24
+; GFX942-SDAG-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
+; GFX942-SDAG-NEXT:    v_mov_b32_e32 v1, 1
+; GFX942-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-SDAG-NEXT:    v_mov_b32_e32 v2, s0
+; GFX942-SDAG-NEXT:    v_lshl_add_u32 v0, v0, 2, v2
+; GFX942-SDAG-NEXT:    v_add_u32_e32 v2, 1, v0
+; GFX942-SDAG-NEXT:    v_add_u32_e32 v3, 2, v0
+; GFX942-SDAG-NEXT:    scratch_store_byte v2, v1, off sc0 sc1
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    v_mov_b32_e32 v1, 2
+; GFX942-SDAG-NEXT:    scratch_store_byte v3, v1, off sc0 sc1
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    v_add_u32_e32 v0, 4, v0
+; GFX942-SDAG-NEXT:    v_mov_b32_e32 v1, 4
+; GFX942-SDAG-NEXT:    scratch_store_byte v0, v1, off sc0 sc1
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    s_endpgm
 ;
-; GFX940-GISEL-LABEL: soff1_voff4:
-; GFX940-GISEL:       ; %bb.0: ; %bb
-; GFX940-GISEL-NEXT:    s_load_dword s0, s[4:5], 0x24
-; GFX940-GISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
-; GFX940-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 1
-; GFX940-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-GISEL-NEXT:    s_add_u32 s0, 0, s0
-; GFX940-GISEL-NEXT:    v_add_u32_e32 v0, s0, v0
-; GFX940-GISEL-NEXT:    v_add_u32_e32 v2, 1, v0
-; GFX940-GISEL-NEXT:    scratch_store_byte v2, v1, off sc0 sc1
-; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-GISEL-NEXT:    v_add_u32_e32 v1, 2, v0
-; GFX940-GISEL-NEXT:    v_mov_b32_e32 v2, 2
-; GFX940-GISEL-NEXT:    scratch_store_byte v1, v2, off sc0 sc1
-; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-GISEL-NEXT:    v_add_u32_e32 v0, 4, v0
-; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 4
-; GFX940-GISEL-NEXT:    scratch_store_byte v0, v1, off sc0 sc1
-; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-GISEL-NEXT:    s_endpgm
+; GFX942-GISEL-LABEL: soff1_voff4:
+; GFX942-GISEL:       ; %bb.0: ; %bb
+; GFX942-GISEL-NEXT:    s_load_dword s0, s[4:5], 0x24
+; GFX942-GISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
+; GFX942-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX942-GISEL-NEXT:    v_mov_b32_e32 v1, 1
+; GFX942-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-GISEL-NEXT:    s_add_u32 s0, 0, s0
+; GFX942-GISEL-NEXT:    v_add_u32_e32 v0, s0, v0
+; GFX942-GISEL-NEXT:    v_add_u32_e32 v2, 1, v0
+; GFX942-GISEL-NEXT:    scratch_store_byte v2, v1, off sc0 sc1
+; GFX942-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-GISEL-NEXT:    v_add_u32_e32 v1, 2, v0
+; GFX942-GISEL-NEXT:    v_mov_b32_e32 v2, 2
+; GFX942-GISEL-NEXT:    scratch_store_byte v1, v2, off sc0 sc1
+; GFX942-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-GISEL-NEXT:    v_add_u32_e32 v0, 4, v0
+; GFX942-GISEL-NEXT:    v_mov_b32_e32 v1, 4
+; GFX942-GISEL-NEXT:    scratch_store_byte v0, v1, off sc0 sc1
+; GFX942-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-GISEL-NEXT:    s_endpgm
 ;
 ; GFX11-SDAG-LABEL: soff1_voff4:
 ; GFX11-SDAG:       ; %bb.0: ; %bb
@@ -412,48 +412,48 @@ bb:
 }
 
 define amdgpu_kernel void @soff2_voff1(i32 %soff) {
-; GFX940-SDAG-LABEL: soff2_voff1:
-; GFX940-SDAG:       ; %bb.0: ; %bb
-; GFX940-SDAG-NEXT:    s_load_dword s0, s[4:5], 0x24
-; GFX940-SDAG-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
-; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 1
-; GFX940-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-SDAG-NEXT:    s_lshl_b32 s0, s0, 1
-; GFX940-SDAG-NEXT:    v_add_u32_e32 v0, s0, v0
-; GFX940-SDAG-NEXT:    v_add_u32_e32 v2, 1, v0
-; GFX940-SDAG-NEXT:    v_add_u32_e32 v3, 2, v0
-; GFX940-SDAG-NEXT:    scratch_store_byte v2, v1, off sc0 sc1
-; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 2
-; GFX940-SDAG-NEXT:    scratch_store_byte v3, v1, off sc0 sc1
-; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-SDAG-NEXT:    v_add_u32_e32 v0, 4, v0
-; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 4
-; GFX940-SDAG-NEXT:    scratch_store_byte v0, v1, off sc0 sc1
-; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-SDAG-NEXT:    s_endpgm
+; GFX942-SDAG-LABEL: soff2_voff1:
+; GFX942-SDAG:       ; %bb.0: ; %bb
+; GFX942-SDAG-NEXT:    s_load_dword s0, s[4:5], 0x24
+; GFX942-SDAG-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
+; GFX942-SDAG-NEXT:    v_mov_b32_e32 v1, 1
+; GFX942-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-SDAG-NEXT:    s_lshl_b32 s0, s0, 1
+; GFX942-SDAG-NEXT:    v_add_u32_e32 v0, s0, v0
+; GFX942-SDAG-NEXT:    v_add_u32_e32 v2, 1, v0
+; GFX942-SDAG-NEXT:    v_add_u32_e32 v3, 2, v0
+; GFX942-SDAG-NEXT:    scratch_store_byte v2, v1, off sc0 sc1
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    v_mov_b32_e32 v1, 2
+; GFX942-SDAG-NEXT:    scratch_store_byte v3, v1, off sc0 sc1
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    v_add_u32_e32 v0, 4, v0
+; GFX942-SDAG-NEXT:    v_mov_b32_e32 v1, 4
+; GFX942-SDAG-NEXT:    scratch_store_byte v0, v1, off sc0 sc1
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    s_endpgm
 ;
-; GFX940-GISEL-LABEL: soff2_voff1:
-; GFX940-GISEL:       ; %bb.0: ; %bb
-; GFX940-GISEL-NEXT:    s_load_dword s0, s[4:5], 0x24
-; GFX940-GISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
-; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 1
-; GFX940-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-GISEL-NEXT:    s_lshl_b32 s0, s0, 1
-; GFX940-GISEL-NEXT:    s_add_u32 s0, 0, s0
-; GFX940-GISEL-NEXT:    v_add_u32_e32 v0, s0, v0
-; GFX940-GISEL-NEXT:    v_add_u32_e32 v2, 1, v0
-; GFX940-GISEL-NEXT:    v_add_u32_e32 v3, 2, v0
-; GFX940-GISEL-NEXT:    scratch_store_byte v2, v1, off sc0 sc1
-; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 2
-; GFX940-GISEL-NEXT:    scratch_store_byte v3, v1, off sc0 sc1
-; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-GISEL-NEXT:    v_add_u32_e32 v0, 4, v0
-; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 4
-; GFX940-GISEL-NEXT:    scratch_store_byte v0, v1, off sc0 sc1
-; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-GISEL-NEXT:    s_endpgm
+; GFX942-GISEL-LABEL: soff2_voff1:
+; GFX942-GISEL:       ; %bb.0: ; %bb
+; GFX942-GISEL-NEXT:    s_load_dword s0, s[4:5], 0x24
+; GFX942-GISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
+; GFX942-GISEL-NEXT:    v_mov_b32_e32 v1, 1
+; GFX942-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-GISEL-NEXT:    s_lshl_b32 s0, s0, 1
+; GFX942-GISEL-NEXT:    s_add_u32 s0, 0, s0
+; GFX942-GISEL-NEXT:    v_add_u32_e32 v0, s0, v0
+; GFX942-GISEL-NEXT:    v_add_u32_e32 v2, 1, v0
+; GFX942-GISEL-NEXT:    v_add_u32_e32 v3, 2, v0
+; GFX942-GISEL-NEXT:    scratch_store_byte v2, v1, off sc0 sc1
+; GFX942-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-GISEL-NEXT:    v_mov_b32_e32 v1, 2
+; GFX942-GISEL-NEXT:    scratch_store_byte v3, v1, off sc0 sc1
+; GFX942-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-GISEL-NEXT:    v_add_u32_e32 v0, 4, v0
+; GFX942-GISEL-NEXT:    v_mov_b32_e32 v1, 4
+; GFX942-GISEL-NEXT:    scratch_store_byte v0, v1, off sc0 sc1
+; GFX942-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-GISEL-NEXT:    s_endpgm
 ;
 ; GFX11-SDAG-LABEL: soff2_voff1:
 ; GFX11-SDAG:       ; %bb.0: ; %bb
@@ -547,49 +547,49 @@ bb:
 }
 
 define amdgpu_kernel void @soff2_voff2(i32 %soff) {
-; GFX940-SDAG-LABEL: soff2_voff2:
-; GFX940-SDAG:       ; %bb.0: ; %bb
-; GFX940-SDAG-NEXT:    s_load_dword s0, s[4:5], 0x24
-; GFX940-SDAG-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
-; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 1
-; GFX940-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-SDAG-NEXT:    s_lshl_b32 s0, s0, 1
-; GFX940-SDAG-NEXT:    v_mov_b32_e32 v2, s0
-; GFX940-SDAG-NEXT:    v_lshl_add_u32 v0, v0, 1, v2
-; GFX940-SDAG-NEXT:    scratch_store_byte v0, v1, off offset:1 sc0 sc1
-; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-SDAG-NEXT:    v_add_u32_e32 v1, 2, v0
-; GFX940-SDAG-NEXT:    v_mov_b32_e32 v2, 2
-; GFX940-SDAG-NEXT:    scratch_store_byte v1, v2, off sc0 sc1
-; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-SDAG-NEXT:    v_add_u32_e32 v0, 4, v0
-; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 4
-; GFX940-SDAG-NEXT:    scratch_store_byte v0, v1, off sc0 sc1
-; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-SDAG-NEXT:    s_endpgm
+; GFX942-SDAG-LABEL: soff2_voff2:
+; GFX942-SDAG:       ; %bb.0: ; %bb
+; GFX942-SDAG-NEXT:    s_load_dword s0, s[4:5], 0x24
+; GFX942-SDAG-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
+; GFX942-SDAG-NEXT:    v_mov_b32_e32 v1, 1
+; GFX942-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-SDAG-NEXT:    s_lshl_b32 s0, s0, 1
+; GFX942-SDAG-NEXT:    v_mov_b32_e32 v2, s0
+; GFX942-SDAG-NEXT:    v_lshl_add_u32 v0, v0, 1, v2
+; GFX942-SDAG-NEXT:    scratch_store_byte v0, v1, off offset:1 sc0 sc1
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    v_add_u32_e32 v1, 2, v0
+; GFX942-SDAG-NEXT:    v_mov_b32_e32 v2, 2
+; GFX942-SDAG-NEXT:    scratch_store_byte v1, v2, off sc0 sc1
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    v_add_u32_e32 v0, 4, v0
+; GFX942-SDAG-NEXT:    v_mov_b32_e32 v1, 4
+; GFX942-SDAG-NEXT:    scratch_store_byte v0, v1, off sc0 sc1
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    s_endpgm
 ;
-; GFX940-GISEL-LABEL: soff2_voff2:
-; GFX940-GISEL:       ; %bb.0: ; %bb
-; GFX940-GISEL-NEXT:    s_load_dword s0, s[4:5], 0x24
-; GFX940-GISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
-; GFX940-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
-; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 1
-; GFX940-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-GISEL-NEXT:    s_lshl_b32 s0, s0, 1
-; GFX940-GISEL-NEXT:    s_add_u32 s0, 0, s0
-; GFX940-GISEL-NEXT:    v_add_u32_e32 v0, s0, v0
-; GFX940-GISEL-NEXT:    v_add_u32_e32 v2, 1, v0
-; GFX940-GISEL-NEXT:    scratch_store_byte v2, v1, off sc0 sc1
-; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-GISEL-NEXT:    v_add_u32_e32 v1, 2, v0
-; GFX940-GISEL-NEXT:    v_mov_b32_e32 v2, 2
-; GFX940-GISEL-NEXT:    scratch_store_byte v1, v2, off sc0 sc1
-; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-GISEL-NEXT:    v_add_u32_e32 v0, 4, v0
-; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 4
-; GFX940-GISEL-NEXT:    scratch_store_byte v0, v1, off sc0 sc1
-; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-GISEL-NEXT:    s_endpgm
+; GFX942-GISEL-LABEL: soff2_voff2:
+; GFX942-GISEL:       ; %bb.0: ; %bb
+; GFX942-GISEL-NEXT:    s_load_dword s0, s[4:5], 0x24
+; GFX942-GISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
+; GFX942-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
+; GFX942-GISEL-NEXT:    v_mov_b32_e32 v1, 1
+; GFX942-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-GISEL-NEXT:    s_lshl_b32 s0, s0, 1
+; GFX942-GISEL-NEXT:    s_add_u32 s0, 0, s0
+; GFX942-GISEL-NEXT:    v_add_u32_e32 v0, s0, v0
+; GFX942-GISEL-NEXT:    v_add_u32_e32 v2, 1, v0
+; GFX942-GISEL-NEXT:    scratch_store_byte v2, v1, off sc0 sc1
+; GFX942-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-GISEL-NEXT:    v_add_u32_e32 v1, 2, v0
+; GFX942-GISEL-NEXT:    v_mov_b32_e32 v2, 2
+; GFX942-GISEL-NEXT:    scratch_store_byte v1, v2, off sc0 sc1
+; GFX942-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-GISEL-NEXT:    v_add_u32_e32 v0, 4, v0
+; GFX942-GISEL-NEXT:    v_mov_b32_e32 v1, 4
+; GFX942-GISEL-NEXT:    scratch_store_byte v0, v1, off sc0 sc1
+; GFX942-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-GISEL-NEXT:    s_endpgm
 ;
 ; GFX11-SDAG-LABEL: soff2_voff2:
 ; GFX11-SDAG:       ; %bb.0: ; %bb
@@ -688,49 +688,49 @@ bb:
 }
 
 define amdgpu_kernel void @soff2_voff4(i32 %soff) {
-; GFX940-SDAG-LABEL: soff2_voff4:
-; GFX940-SDAG:       ; %bb.0: ; %bb
-; GFX940-SDAG-NEXT:    s_load_dword s0, s[4:5], 0x24
-; GFX940-SDAG-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
-; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 1
-; GFX940-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-SDAG-NEXT:    s_lshl_b32 s0, s0, 1
-; GFX940-SDAG-NEXT:    v_mov_b32_e32 v2, s0
-; GFX940-SDAG-NEXT:    v_lshl_add_u32 v0, v0, 2, v2
-; GFX940-SDAG-NEXT:    scratch_store_byte v0, v1, off offset:1 sc0 sc1
-; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-SDAG-NEXT:    v_add_u32_e32 v1, 2, v0
-; GFX940-SDAG-NEXT:    v_mov_b32_e32 v2, 2
-; GFX940-SDAG-NEXT:    scratch_store_byte v1, v2, off sc0 sc1
-; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-SDAG-NEXT:    v_add_u32_e32 v0, 4, v0
-; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 4
-; GFX940-SDAG-NEXT:    scratch_store_byte v0, v1, off sc0 sc1
-; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-SDAG-NEXT:    s_endpgm
+; GFX942-SDAG-LABEL: soff2_voff4:
+; GFX942-SDAG:       ; %bb.0: ; %bb
+; GFX942-SDAG-NEXT:    s_load_dword s0, s[4:5], 0x24
+; GFX942-SDAG-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
+; GFX942-SDAG-NEXT:    v_mov_b32_e32 v1, 1
+; GFX942-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-SDAG-NEXT:    s_lshl_b32 s0, s0, 1
+; GFX942-SDAG-NEXT:    v_mov_b32_e32 v2, s0
+; GFX942-SDAG-NEXT:    v_lshl_add_u32 v0, v0, 2, v2
+; GFX942-SDAG-NEXT:    scratch_store_byte v0, v1, off offset:1 sc0 sc1
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    v_add_u32_e32 v1, 2, v0
+; GFX942-SDAG-NEXT:    v_mov_b32_e32 v2, 2
+; GFX942-SDAG-NEXT:    scratch_store_byte v1, v2, off sc0 sc1
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    v_add_u32_e32 v0, 4, v0
+; GFX942-SDAG-NEXT:    v_mov_b32_e32 v1, 4
+; GFX942-SDAG-NEXT:    scratch_store_byte v0, v1, off sc0 sc1
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    s_endpgm
 ;
-; GFX940-GISEL-LABEL: soff2_voff4:
-; GFX940-GISEL:       ; %bb.0: ; %bb
-; GFX940-GISEL-NEXT:    s_load_dword s0, s[4:5], 0x24
-; GFX940-GISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
-; GFX940-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 1
-; GFX940-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-GISEL-NEXT:    s_lshl_b32 s0, s0, 1
-; GFX940-GISEL-NEXT:    s_add_u32 s0, 0, s0
-; GFX940-GISEL-NEXT:    v_add_u32_e32 v0, s0, v0
-; GFX940-GISEL-NEXT:    v_add_u32_e32 v2, 1, v0
-; GFX940-GISEL-NEXT:    scratch_store_byte v2, v1, off sc0 sc1
-; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-GISEL-NEXT:    v_add_u32_e32 v1, 2, v0
-; GFX940-GISEL-NEXT:    v_mov_b32_e32 v2, 2
-; GFX940-GISEL-NEXT:    scratch_store_byte v1, v2, off sc0 sc1
-; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-GISEL-NEXT:    v_add_u32_e32 v0, 4, v0
-; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 4
-; GFX940-GISEL-NEXT:    scratch_store_byte v0, v1, off sc0 sc1
-; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-GISEL-NEXT:    s_endpgm
+; GFX942-GISEL-LABEL: soff2_voff4:
+; GFX942-GISEL:       ; %bb.0: ; %bb
+; GFX942-GISEL-NEXT:    s_load_dword s0, s[4:5], 0x24
+; GFX942-GISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
+; GFX942-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX942-GISEL-NEXT:    v_mov_b32_e32 v1, 1
+; GFX942-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-GISEL-NEXT:    s_lshl_b32 s0, s0, 1
+; GFX942-GISEL-NEXT:    s_add_u32 s0, 0, s0
+; GFX942-GISEL-NEXT:    v_add_u32_e32 v0, s0, v0
+; GFX942-GISEL-NEXT:    v_add_u32_e32 v2, 1, v0
+; GFX942-GISEL-NEXT:    scratch_store_byte v2, v1, off sc0 sc1
+; GFX942-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-GISEL-NEXT:    v_add_u32_e32 v1, 2, v0
+; GFX942-GISEL-NEXT:    v_mov_b32_e32 v2, 2
+; GFX942-GISEL-NEXT:    scratch_store_byte v1, v2, off sc0 sc1
+; GFX942-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-GISEL-NEXT:    v_add_u32_e32 v0, 4, v0
+; GFX942-GISEL-NEXT:    v_mov_b32_e32 v1, 4
+; GFX942-GISEL-NEXT:    scratch_store_byte v0, v1, off sc0 sc1
+; GFX942-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-GISEL-NEXT:    s_endpgm
 ;
 ; GFX11-SDAG-LABEL: soff2_voff4:
 ; GFX11-SDAG:       ; %bb.0: ; %bb
@@ -829,48 +829,48 @@ bb:
 }
 
 define amdgpu_kernel void @soff4_voff1(i32 %soff) {
-; GFX940-SDAG-LABEL: soff4_voff1:
-; GFX940-SDAG:       ; %bb.0: ; %bb
-; GFX940-SDAG-NEXT:    s_load_dword s0, s[4:5], 0x24
-; GFX940-SDAG-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
-; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 1
-; GFX940-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-SDAG-NEXT:    s_lshl_b32 s0, s0, 2
-; GFX940-SDAG-NEXT:    v_add_u32_e32 v0, s0, v0
-; GFX940-SDAG-NEXT:    v_add_u32_e32 v2, 1, v0
-; GFX940-SDAG-NEXT:    v_add_u32_e32 v3, 2, v0
-; GFX940-SDAG-NEXT:    scratch_store_byte v2, v1, off sc0 sc1
-; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 2
-; GFX940-SDAG-NEXT:    scratch_store_byte v3, v1, off sc0 sc1
-; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-SDAG-NEXT:    v_add_u32_e32 v0, 4, v0
-; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 4
-; GFX940-SDAG-NEXT:    scratch_store_byte v0, v1, off sc0 sc1
-; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-SDAG-NEXT:    s_endpgm
+; GFX942-SDAG-LABEL: soff4_voff1:
+; GFX942-SDAG:       ; %bb.0: ; %bb
+; GFX942-SDAG-NEXT:    s_load_dword s0, s[4:5], 0x24
+; GFX942-SDAG-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
+; GFX942-SDAG-NEXT:    v_mov_b32_e32 v1, 1
+; GFX942-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-SDAG-NEXT:    s_lshl_b32 s0, s0, 2
+; GFX942-SDAG-NEXT:    v_add_u32_e32 v0, s0, v0
+; GFX942-SDAG-NEXT:    v_add_u32_e32 v2, 1, v0
+; GFX942-SDAG-NEXT:    v_add_u32_e32 v3, 2, v0
+; GFX942-SDAG-NEXT:    scratch_store_byte v2, v1, off sc0 sc1
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    v_mov_b32_e32 v1, 2
+; GFX942-SDAG-NEXT:    scratch_store_byte v3, v1, off sc0 sc1
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    v_add_u32_e32 v0, 4, v0
+; GFX942-SDAG-NEXT:    v_mov_b32_e32 v1, 4
+; GFX942-SDAG-NEXT:    scratch_store_byte v0, v1, off sc0 sc1
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    s_endpgm
 ;
-; GFX940-GISEL-LABEL: soff4_voff1:
-; GFX940-GISEL:       ; %bb.0: ; %bb
-; GFX940-GISEL-NEXT:    s_load_dword s0, s[4:5], 0x24
-; GFX940-GISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
-; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 1
-; GFX940-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-GISEL-NEXT:    s_lshl_b32 s0, s0, 2
-; GFX940-GISEL-NEXT:    s_add_u32 s0, 0, s0
-; GFX940-GISEL-NEXT:    v_add_u32_e32 v0, s0, v0
-; GFX940-GISEL-NEXT:    v_add_u32_e32 v2, 1, v0
-; GFX940-GISEL-NEXT:    v_add_u32_e32 v3, 2, v0
-; GFX940-GISEL-NEXT:    scratch_store_byte v2, v1, off sc0 sc1
-; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 2
-; GFX940-GISEL-NEXT:    scratch_store_byte v3, v1, off sc0 sc1
-; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-GISEL-NEXT:    v_add_u32_e32 v0, 4, v0
-; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 4
-; GFX940-GISEL-NEXT:    scratch_store_byte v0, v1, off sc0 sc1
-; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-GISEL-NEXT:    s_endpgm
+; GFX942-GISEL-LABEL: soff4_voff1:
+; GFX942-GISEL:       ; %bb.0: ; %bb
+; GFX942-GISEL-NEXT:    s_load_dword s0, s[4:5], 0x24
+; GFX942-GISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
+; GFX942-GISEL-NEXT:    v_mov_b32_e32 v1, 1
+; GFX942-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-GISEL-NEXT:    s_lshl_b32 s0, s0, 2
+; GFX942-GISEL-NEXT:    s_add_u32 s0, 0, s0
+; GFX942-GISEL-NEXT:    v_add_u32_e32 v0, s0, v0
+; GFX942-GISEL-NEXT:    v_add_u32_e32 v2, 1, v0
+; GFX942-GISEL-NEXT:    v_add_u32_e32 v3, 2, v0
+; GFX942-GISEL-NEXT:    scratch_store_byte v2, v1, off sc0 sc1
+; GFX942-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-GISEL-NEXT:    v_mov_b32_e32 v1, 2
+; GFX942-GISEL-NEXT:    scratch_store_byte v3, v1, off sc0 sc1
+; GFX942-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-GISEL-NEXT:    v_add_u32_e32 v0, 4, v0
+; GFX942-GISEL-NEXT:    v_mov_b32_e32 v1, 4
+; GFX942-GISEL-NEXT:    scratch_store_byte v0, v1, off sc0 sc1
+; GFX942-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-GISEL-NEXT:    s_endpgm
 ;
 ; GFX11-SDAG-LABEL: soff4_voff1:
 ; GFX11-SDAG:       ; %bb.0: ; %bb
@@ -964,49 +964,49 @@ bb:
 }
 
 define amdgpu_kernel void @soff4_voff2(i32 %soff) {
-; GFX940-SDAG-LABEL: soff4_voff2:
-; GFX940-SDAG:       ; %bb.0: ; %bb
-; GFX940-SDAG-NEXT:    s_load_dword s0, s[4:5], 0x24
-; GFX940-SDAG-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
-; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 1
-; GFX940-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-SDAG-NEXT:    s_lshl_b32 s0, s0, 2
-; GFX940-SDAG-NEXT:    v_mov_b32_e32 v2, s0
-; GFX940-SDAG-NEXT:    v_lshl_add_u32 v0, v0, 1, v2
-; GFX940-SDAG-NEXT:    scratch_store_byte v0, v1, off offset:1 sc0 sc1
-; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-SDAG-NEXT:    v_add_u32_e32 v1, 2, v0
-; GFX940-SDAG-NEXT:    v_mov_b32_e32 v2, 2
-; GFX940-SDAG-NEXT:    scratch_store_byte v1, v2, off sc0 sc1
-; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-SDAG-NEXT:    v_add_u32_e32 v0, 4, v0
-; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 4
-; GFX940-SDAG-NEXT:    scratch_store_byte v0, v1, off sc0 sc1
-; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-SDAG-NEXT:    s_endpgm
+; GFX942-SDAG-LABEL: soff4_voff2:
+; GFX942-SDAG:       ; %bb.0: ; %bb
+; GFX942-SDAG-NEXT:    s_load_dword s0, s[4:5], 0x24
+; GFX942-SDAG-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
+; GFX942-SDAG-NEXT:    v_mov_b32_e32 v1, 1
+; GFX942-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-SDAG-NEXT:    s_lshl_b32 s0, s0, 2
+; GFX942-SDAG-NEXT:    v_mov_b32_e32 v2, s0
+; GFX942-SDAG-NEXT:    v_lshl_add_u32 v0, v0, 1, v2
+; GFX942-SDAG-NEXT:    scratch_store_byte v0, v1, off offset:1 sc0 sc1
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    v_add_u32_e32 v1, 2, v0
+; GFX942-SDAG-NEXT:    v_mov_b32_e32 v2, 2
+; GFX942-SDAG-NEXT:    scratch_store_byte v1, v2, off sc0 sc1
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    v_add_u32_e32 v0, 4, v0
+; GFX942-SDAG-NEXT:    v_mov_b32_e32 v1, 4
+; GFX942-SDAG-NEXT:    scratch_store_byte v0, v1, off sc0 sc1
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    s_endpgm
 ;
-; GFX940-GISEL-LABEL: soff4_voff2:
-; GFX940-GISEL:       ; %bb.0: ; %bb
-; GFX940-GISEL-NEXT:    s_load_dword s0, s[4:5], 0x24
-; GFX940-GISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
-; GFX940-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
-; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 1
-; GFX940-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-GISEL-NEXT:    s_lshl_b32 s0, s0, 2
-; GFX940-GISEL-NEXT:    s_add_u32 s0, 0, s0
-; GFX940-GISEL-NEXT:    v_add_u32_e32 v0, s0, v0
-; GFX940-GISEL-NEXT:    v_add_u32_e32 v2, 1, v0
-; GFX940-GISEL-NEXT:    scratch_store_byte v2, v1, off sc0 sc1
-; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-GISEL-NEXT:    v_add_u32_e32 v1, 2, v0
-; GFX940-GISEL-NEXT:    v_mov_b32_e32 v2, 2
-; GFX940-GISEL-NEXT:    scratch_store_byte v1, v2, off sc0 sc1
-; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-GISEL-NEXT:    v_add_u32_e32 v0, 4, v0
-; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 4
-; GFX940-GISEL-NEXT:    scratch_store_byte v0, v1, off sc0 sc1
-; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-GISEL-NEXT:    s_endpgm
+; GFX942-GISEL-LABEL: soff4_voff2:
+; GFX942-GISEL:       ; %bb.0: ; %bb
+; GFX942-GISEL-NEXT:    s_load_dword s0, s[4:5], 0x24
+; GFX942-GISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
+; GFX942-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
+; GFX942-GISEL-NEXT:    v_mov_b32_e32 v1, 1
+; GFX942-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-GISEL-NEXT:    s_lshl_b32 s0, s0, 2
+; GFX942-GISEL-NEXT:    s_add_u32 s0, 0, s0
+; GFX942-GISEL-NEXT:    v_add_u32_e32 v0, s0, v0
+; GFX942-GISEL-NEXT:    v_add_u32_e32 v2, 1, v0
+; GFX942-GISEL-NEXT:    scratch_store_byte v2, v1, off sc0 sc1
+; GFX942-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-GISEL-NEXT:    v_add_u32_e32 v1, 2, v0
+; GFX942-GISEL-NEXT:    v_mov_b32_e32 v2, 2
+; GFX942-GISEL-NEXT:    scratch_store_byte v1, v2, off sc0 sc1
+; GFX942-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-GISEL-NEXT:    v_add_u32_e32 v0, 4, v0
+; GFX942-GISEL-NEXT:    v_mov_b32_e32 v1, 4
+; GFX942-GISEL-NEXT:    scratch_store_byte v0, v1, off sc0 sc1
+; GFX942-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-GISEL-NEXT:    s_endpgm
 ;
 ; GFX11-SDAG-LABEL: soff4_voff2:
 ; GFX11-SDAG:       ; %bb.0: ; %bb
@@ -1105,48 +1105,48 @@ bb:
 }
 
 define amdgpu_kernel void @soff4_voff4(i32 %soff) {
-; GFX940-SDAG-LABEL: soff4_voff4:
-; GFX940-SDAG:       ; %bb.0: ; %bb
-; GFX940-SDAG-NEXT:    s_load_dword s0, s[4:5], 0x24
-; GFX940-SDAG-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
-; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 1
-; GFX940-SDAG-NEXT:    v_mov_b32_e32 v2, 2
-; GFX940-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-SDAG-NEXT:    s_lshl_b32 s0, s0, 2
-; GFX940-SDAG-NEXT:    v_mov_b32_e32 v3, s0
-; GFX940-SDAG-NEXT:    v_lshl_add_u32 v0, v0, 2, v3
-; GFX940-SDAG-NEXT:    scratch_store_byte v0, v1, off offset:1 sc0 sc1
-; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-SDAG-NEXT:    scratch_store_byte v0, v2, off offset:2 sc0 sc1
-; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-SDAG-NEXT:    v_add_u32_e32 v0, 4, v0
-; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 4
-; GFX940-SDAG-NEXT:    scratch_store_byte v0, v1, off sc0 sc1
-; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-SDAG-NEXT:    s_endpgm
+; GFX942-SDAG-LABEL: soff4_voff4:
+; GFX942-SDAG:       ; %bb.0: ; %bb
+; GFX942-SDAG-NEXT:    s_load_dword s0, s[4:5], 0x24
+; GFX942-SDAG-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
+; GFX942-SDAG-NEXT:    v_mov_b32_e32 v1, 1
+; GFX942-SDAG-NEXT:    v_mov_b32_e32 v2, 2
+; GFX942-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-SDAG-NEXT:    s_lshl_b32 s0, s0, 2
+; GFX942-SDAG-NEXT:    v_mov_b32_e32 v3, s0
+; GFX942-SDAG-NEXT:    v_lshl_add_u32 v0, v0, 2, v3
+; GFX942-SDAG-NEXT:    scratch_store_byte v0, v1, off offset:1 sc0 sc1
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    scratch_store_byte v0, v2, off offset:2 sc0 sc1
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    v_add_u32_e32 v0, 4, v0
+; GFX942-SDAG-NEXT:    v_mov_b32_e32 v1, 4
+; GFX942-SDAG-NEXT:    scratch_store_byte v0, v1, off sc0 sc1
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    s_endpgm
 ;
-; GFX940-GISEL-LABEL: soff4_voff4:
-; GFX940-GISEL:       ; %bb.0: ; %bb
-; GFX940-GISEL-NEXT:    s_load_dword s0, s[4:5], 0x24
-; GFX940-GISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
-; GFX940-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 1
-; GFX940-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-GISEL-NEXT:    s_lshl_b32 s0, s0, 2
-; GFX940-GISEL-NEXT:    s_add_u32 s0, 0, s0
-; GFX940-GISEL-NEXT:    v_add_u32_e32 v0, s0, v0
-; GFX940-GISEL-NEXT:    v_add_u32_e32 v2, 1, v0
-; GFX940-GISEL-NEXT:    scratch_store_byte v2, v1, off sc0 sc1
-; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-GISEL-NEXT:    v_add_u32_e32 v1, 2, v0
-; GFX940-GISEL-NEXT:    v_mov_b32_e32 v2, 2
-; GFX940-GISEL-NEXT:    scratch_store_byte v1, v2, off sc0 sc1
-; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-GISEL-NEXT:    v_add_u32_e32 v0, 4, v0
-; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 4
-; GFX940-GISEL-NEXT:    scratch_store_byte v0, v1, off sc0 sc1
-; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-GISEL-NEXT:    s_endpgm
+; GFX942-GISEL-LABEL: soff4_voff4:
+; GFX942-GISEL:       ; %bb.0: ; %bb
+; GFX942-GISEL-NEXT:    s_load_dword s0, s[4:5], 0x24
+; GFX942-GISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
+; GFX942-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX942-GISEL-NEXT:    v_mov_b32_e32 v1, 1
+; GFX942-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-GISEL-NEXT:    s_lshl_b32 s0, s0, 2
+; GFX942-GISEL-NEXT:    s_add_u32 s0, 0, s0
+; GFX942-GISEL-NEXT:    v_add_u32_e32 v0, s0, v0
+; GFX942-GISEL-NEXT:    v_add_u32_e32 v2, 1, v0
+; GFX942-GISEL-NEXT:    scratch_store_byte v2, v1, off sc0 sc1
+; GFX942-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-GISEL-NEXT:    v_add_u32_e32 v1, 2, v0
+; GFX942-GISEL-NEXT:    v_mov_b32_e32 v2, 2
+; GFX942-GISEL-NEXT:    scratch_store_byte v1, v2, off sc0 sc1
+; GFX942-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-GISEL-NEXT:    v_add_u32_e32 v0, 4, v0
+; GFX942-GISEL-NEXT:    v_mov_b32_e32 v1, 4
+; GFX942-GISEL-NEXT:    scratch_store_byte v0, v1, off sc0 sc1
+; GFX942-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-GISEL-NEXT:    s_endpgm
 ;
 ; GFX11-SDAG-LABEL: soff4_voff4:
 ; GFX11-SDAG:       ; %bb.0: ; %bb
@@ -1245,29 +1245,29 @@ bb:
 }
 
 define amdgpu_kernel void @soff1_voff1_negative(i32 %soff) {
-; GFX940-SDAG-LABEL: soff1_voff1_negative:
-; GFX940-SDAG:       ; %bb.0: ; %bb
-; GFX940-SDAG-NEXT:    s_load_dword s0, s[4:5], 0x24
-; GFX940-SDAG-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
-; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 1
-; GFX940-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-SDAG-NEXT:    v_add_u32_e32 v0, s0, v0
-; GFX940-SDAG-NEXT:    v_add_u32_e32 v0, -1, v0
-; GFX940-SDAG-NEXT:    scratch_store_byte v0, v1, off sc0 sc1
-; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-SDAG-NEXT:    s_endpgm
+; GFX942-SDAG-LABEL: soff1_voff1_negative:
+; GFX942-SDAG:       ; %bb.0: ; %bb
+; GFX942-SDAG-NEXT:    s_load_dword s0, s[4:5], 0x24
+; GFX942-SDAG-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
+; GFX942-SDAG-NEXT:    v_mov_b32_e32 v1, 1
+; GFX942-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-SDAG-NEXT:    v_add_u32_e32 v0, s0, v0
+; GFX942-SDAG-NEXT:    v_add_u32_e32 v0, -1, v0
+; GFX942-SDAG-NEXT:    scratch_store_byte v0, v1, off sc0 sc1
+; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT:    s_endpgm
 ;
-; GFX940-GISEL-LABEL: soff1_voff1_negative:
-; GFX940-GISEL:       ; %bb.0: ; %bb
-; GFX940-GISEL-NEXT:    s_load_dword s0, s[4:5], 0x24
-; GFX940-GISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
-; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 1
-; GFX940-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-GISEL-NEXT:    s_add_u32 s0, 0, s0
-; GFX940-GISEL-NEXT:    v_add3_u32 v0, s0, v0, -1
-; GFX940-GISEL-NEXT:    scratch_store_byte v0, v1, off sc0 sc1
-; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-GISEL-NEXT:    s_endpgm
+; GFX942-GISEL-LABEL: soff1_voff1_negative:
+; GFX942-GISEL:       ; %bb.0: ; %bb
+; GFX942-GISEL-NEXT:    s_load_dword s0, s[4:5], 0x24
+; GFX942-GISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
+; GFX942-GISEL-NEXT:    v_mov_b32_e32 v1, 1
+; GFX942-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-GISEL-NEXT:    s_add_u32 s0, 0, s0
+; GFX942-GISEL-NEXT:    v_add3_u32 v0, s0, v0, -1
+; GFX942-GISEL-NEXT:    scratch_store_byte v0, v1, off sc0 sc1
+; GFX942-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-GISEL-NEXT:    s_endpgm
 ;
 ; GFX11-SDAG-LABEL: soff1_voff1_negative:
 ; GFX11-SDAG:       ; %bb.0: ; %bb
diff --git llvm/test/CodeGen/AMDGPU/flat-scratch.ll llvm/test/CodeGen/AMDGPU/flat-scratch.ll
index 5415af02ef89..d44422e3ee35 100644
--- llvm/test/CodeGen/AMDGPU/flat-scratch.ll
+++ llvm/test/CodeGen/AMDGPU/flat-scratch.ll
@@ -4,7 +4,7 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-promote-alloca -mattr=+enable-flat-scratch < %s | FileCheck --check-prefix=GFX11 %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=-promote-alloca -mattr=+enable-flat-scratch < %s | FileCheck --check-prefix=GFX12 %s
 ; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -mattr=-promote-alloca -mattr=+enable-flat-scratch < %s | FileCheck --check-prefix=GFX9-PAL %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx940 -mattr=-promote-alloca < %s | FileCheck -check-prefixes=GFX940 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -mattr=-promote-alloca < %s | FileCheck -check-prefixes=GFX942 %s
 ; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1010 -mattr=-promote-alloca -mattr=+enable-flat-scratch < %s | FileCheck --check-prefixes=GFX10-PAL,GFX1010-PAL %s
 ; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1030 -mattr=-promote-alloca -mattr=+enable-flat-scratch < %s | FileCheck --check-prefixes=GFX10-PAL,GFX1030-PAL %s
 ; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1100 -mattr=-promote-alloca -mattr=+enable-flat-scratch < %s | FileCheck --check-prefixes=GFX11-PAL %s
@@ -104,19 +104,19 @@ define amdgpu_kernel void @zero_init_kernel() {
 ; GFX9-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], s0
 ; GFX9-PAL-NEXT:    s_endpgm
 ;
-; GFX940-LABEL: zero_init_kernel:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_mov_b32 s0, 0
-; GFX940-NEXT:    s_mov_b32 s1, s0
-; GFX940-NEXT:    s_mov_b32 s2, s0
-; GFX940-NEXT:    s_mov_b32 s3, s0
-; GFX940-NEXT:    v_mov_b64_e32 v[0:1], s[0:1]
-; GFX940-NEXT:    v_mov_b64_e32 v[2:3], s[2:3]
-; GFX940-NEXT:    scratch_store_dwordx4 off, v[0:3], off offset:48 sc0 sc1
-; GFX940-NEXT:    scratch_store_dwordx4 off, v[0:3], off offset:32 sc0 sc1
-; GFX940-NEXT:    scratch_store_dwordx4 off, v[0:3], off offset:16 sc0 sc1
-; GFX940-NEXT:    scratch_store_dwordx4 off, v[0:3], off sc0 sc1
-; GFX940-NEXT:    s_endpgm
+; GFX942-LABEL: zero_init_kernel:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_mov_b32 s0, 0
+; GFX942-NEXT:    s_mov_b32 s1, s0
+; GFX942-NEXT:    s_mov_b32 s2, s0
+; GFX942-NEXT:    s_mov_b32 s3, s0
+; GFX942-NEXT:    v_mov_b64_e32 v[0:1], s[0:1]
+; GFX942-NEXT:    v_mov_b64_e32 v[2:3], s[2:3]
+; GFX942-NEXT:    scratch_store_dwordx4 off, v[0:3], off offset:48 sc0 sc1
+; GFX942-NEXT:    scratch_store_dwordx4 off, v[0:3], off offset:32 sc0 sc1
+; GFX942-NEXT:    scratch_store_dwordx4 off, v[0:3], off offset:16 sc0 sc1
+; GFX942-NEXT:    scratch_store_dwordx4 off, v[0:3], off sc0 sc1
+; GFX942-NEXT:    s_endpgm
 ;
 ; GFX1010-PAL-LABEL: zero_init_kernel:
 ; GFX1010-PAL:       ; %bb.0:
@@ -297,21 +297,21 @@ define void @zero_init_foo() {
 ; GFX9-PAL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-PAL-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: zero_init_foo:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    s_mov_b32 s0, 0
-; GFX940-NEXT:    s_mov_b32 s1, s0
-; GFX940-NEXT:    s_mov_b32 s2, s0
-; GFX940-NEXT:    s_mov_b32 s3, s0
-; GFX940-NEXT:    v_mov_b64_e32 v[0:1], s[0:1]
-; GFX940-NEXT:    v_mov_b64_e32 v[2:3], s[2:3]
-; GFX940-NEXT:    scratch_store_dwordx4 off, v[0:3], s32 offset:48 sc0 sc1
-; GFX940-NEXT:    scratch_store_dwordx4 off, v[0:3], s32 offset:32 sc0 sc1
-; GFX940-NEXT:    scratch_store_dwordx4 off, v[0:3], s32 offset:16 sc0 sc1
-; GFX940-NEXT:    scratch_store_dwordx4 off, v[0:3], s32 sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: zero_init_foo:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    s_mov_b32 s0, 0
+; GFX942-NEXT:    s_mov_b32 s1, s0
+; GFX942-NEXT:    s_mov_b32 s2, s0
+; GFX942-NEXT:    s_mov_b32 s3, s0
+; GFX942-NEXT:    v_mov_b64_e32 v[0:1], s[0:1]
+; GFX942-NEXT:    v_mov_b64_e32 v[2:3], s[2:3]
+; GFX942-NEXT:    scratch_store_dwordx4 off, v[0:3], s32 offset:48 sc0 sc1
+; GFX942-NEXT:    scratch_store_dwordx4 off, v[0:3], s32 offset:32 sc0 sc1
+; GFX942-NEXT:    scratch_store_dwordx4 off, v[0:3], s32 offset:16 sc0 sc1
+; GFX942-NEXT:    scratch_store_dwordx4 off, v[0:3], s32 sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-PAL-LABEL: zero_init_foo:
 ; GFX10-PAL:       ; %bb.0:
@@ -456,19 +456,19 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) {
 ; GFX9-PAL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-PAL-NEXT:    s_endpgm
 ;
-; GFX940-LABEL: store_load_sindex_kernel:
-; GFX940:       ; %bb.0: ; %bb
-; GFX940-NEXT:    s_load_dword s0, s[4:5], 0x24
-; GFX940-NEXT:    v_mov_b32_e32 v0, 15
-; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-NEXT:    s_lshl_b32 s1, s0, 2
-; GFX940-NEXT:    s_and_b32 s0, s0, 15
-; GFX940-NEXT:    s_lshl_b32 s0, s0, 2
-; GFX940-NEXT:    scratch_store_dword off, v0, s1 sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    scratch_load_dword v0, off, s0 sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    s_endpgm
+; GFX942-LABEL: store_load_sindex_kernel:
+; GFX942:       ; %bb.0: ; %bb
+; GFX942-NEXT:    s_load_dword s0, s[4:5], 0x24
+; GFX942-NEXT:    v_mov_b32_e32 v0, 15
+; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-NEXT:    s_lshl_b32 s1, s0, 2
+; GFX942-NEXT:    s_and_b32 s0, s0, 15
+; GFX942-NEXT:    s_lshl_b32 s0, s0, 2
+; GFX942-NEXT:    scratch_store_dword off, v0, s1 sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    scratch_load_dword v0, off, s0 sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    s_endpgm
 ;
 ; GFX10-PAL-LABEL: store_load_sindex_kernel:
 ; GFX10-PAL:       ; %bb.0: ; %bb
@@ -604,17 +604,17 @@ define amdgpu_ps void @store_load_sindex_foo(i32 inreg %idx) {
 ; GFX9-PAL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-PAL-NEXT:    s_endpgm
 ;
-; GFX940-LABEL: store_load_sindex_foo:
-; GFX940:       ; %bb.0: ; %bb
-; GFX940-NEXT:    s_lshl_b32 s1, s0, 2
-; GFX940-NEXT:    v_mov_b32_e32 v0, 15
-; GFX940-NEXT:    s_and_b32 s0, s0, 15
-; GFX940-NEXT:    scratch_store_dword off, v0, s1 sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    s_lshl_b32 s0, s0, 2
-; GFX940-NEXT:    scratch_load_dword v0, off, s0 sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    s_endpgm
+; GFX942-LABEL: store_load_sindex_foo:
+; GFX942:       ; %bb.0: ; %bb
+; GFX942-NEXT:    s_lshl_b32 s1, s0, 2
+; GFX942-NEXT:    v_mov_b32_e32 v0, 15
+; GFX942-NEXT:    s_and_b32 s0, s0, 15
+; GFX942-NEXT:    scratch_store_dword off, v0, s1 sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    s_lshl_b32 s0, s0, 2
+; GFX942-NEXT:    scratch_load_dword v0, off, s0 sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    s_endpgm
 ;
 ; GFX10-PAL-LABEL: store_load_sindex_foo:
 ; GFX10-PAL:       ; %bb.0: ; %bb
@@ -744,17 +744,17 @@ define amdgpu_kernel void @store_load_vindex_kernel() {
 ; GFX9-PAL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-PAL-NEXT:    s_endpgm
 ;
-; GFX940-LABEL: store_load_vindex_kernel:
-; GFX940:       ; %bb.0: ; %bb
-; GFX940-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX940-NEXT:    v_and_b32_e32 v0, 0xffc, v0
-; GFX940-NEXT:    v_mov_b32_e32 v1, 15
-; GFX940-NEXT:    scratch_store_dword v0, v1, off sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_sub_u32_e32 v0, 0, v0
-; GFX940-NEXT:    scratch_load_dword v0, v0, off offset:124 sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    s_endpgm
+; GFX942-LABEL: store_load_vindex_kernel:
+; GFX942:       ; %bb.0: ; %bb
+; GFX942-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX942-NEXT:    v_and_b32_e32 v0, 0xffc, v0
+; GFX942-NEXT:    v_mov_b32_e32 v1, 15
+; GFX942-NEXT:    scratch_store_dword v0, v1, off sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_sub_u32_e32 v0, 0, v0
+; GFX942-NEXT:    scratch_load_dword v0, v0, off offset:124 sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    s_endpgm
 ;
 ; GFX10-PAL-LABEL: store_load_vindex_kernel:
 ; GFX10-PAL:       ; %bb.0: ; %bb
@@ -885,19 +885,19 @@ define void @store_load_vindex_foo(i32 %idx) {
 ; GFX9-PAL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-PAL-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: store_load_vindex_foo:
-; GFX940:       ; %bb.0: ; %bb
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v1, s32
-; GFX940-NEXT:    v_lshl_add_u32 v1, v0, 2, v1
-; GFX940-NEXT:    v_mov_b32_e32 v2, 15
-; GFX940-NEXT:    v_and_b32_e32 v0, 15, v0
-; GFX940-NEXT:    scratch_store_dword v1, v2, off sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX940-NEXT:    scratch_load_dword v0, v0, s32 sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: store_load_vindex_foo:
+; GFX942:       ; %bb.0: ; %bb
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v1, s32
+; GFX942-NEXT:    v_lshl_add_u32 v1, v0, 2, v1
+; GFX942-NEXT:    v_mov_b32_e32 v2, 15
+; GFX942-NEXT:    v_and_b32_e32 v0, 15, v0
+; GFX942-NEXT:    scratch_store_dword v1, v2, off sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX942-NEXT:    scratch_load_dword v0, v0, s32 sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-PAL-LABEL: store_load_vindex_foo:
 ; GFX10-PAL:       ; %bb.0: ; %bb
@@ -994,13 +994,13 @@ define void @private_ptr_foo(ptr addrspace(5) nocapture %arg) {
 ; GFX9-PAL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-PAL-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: private_ptr_foo:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v1, 0x41200000
-; GFX940-NEXT:    scratch_store_dword v0, v1, off offset:4 sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: private_ptr_foo:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v1, 0x41200000
+; GFX942-NEXT:    scratch_store_dword v0, v1, off offset:4 sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-PAL-LABEL: private_ptr_foo:
 ; GFX10-PAL:       ; %bb.0:
@@ -1135,21 +1135,21 @@ define amdgpu_kernel void @zero_init_small_offset_kernel() {
 ; GFX9-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], s0 offset:304
 ; GFX9-PAL-NEXT:    s_endpgm
 ;
-; GFX940-LABEL: zero_init_small_offset_kernel:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    scratch_load_dword v0, off, off sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    s_mov_b32 s0, 0
-; GFX940-NEXT:    s_mov_b32 s1, s0
-; GFX940-NEXT:    s_mov_b32 s2, s0
-; GFX940-NEXT:    s_mov_b32 s3, s0
-; GFX940-NEXT:    v_mov_b64_e32 v[0:1], s[0:1]
-; GFX940-NEXT:    v_mov_b64_e32 v[2:3], s[2:3]
-; GFX940-NEXT:    scratch_store_dwordx4 off, v[0:3], off offset:256 sc0 sc1
-; GFX940-NEXT:    scratch_store_dwordx4 off, v[0:3], off offset:272 sc0 sc1
-; GFX940-NEXT:    scratch_store_dwordx4 off, v[0:3], off offset:288 sc0 sc1
-; GFX940-NEXT:    scratch_store_dwordx4 off, v[0:3], off offset:304 sc0 sc1
-; GFX940-NEXT:    s_endpgm
+; GFX942-LABEL: zero_init_small_offset_kernel:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    scratch_load_dword v0, off, off sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    s_mov_b32 s0, 0
+; GFX942-NEXT:    s_mov_b32 s1, s0
+; GFX942-NEXT:    s_mov_b32 s2, s0
+; GFX942-NEXT:    s_mov_b32 s3, s0
+; GFX942-NEXT:    v_mov_b64_e32 v[0:1], s[0:1]
+; GFX942-NEXT:    v_mov_b64_e32 v[2:3], s[2:3]
+; GFX942-NEXT:    scratch_store_dwordx4 off, v[0:3], off offset:256 sc0 sc1
+; GFX942-NEXT:    scratch_store_dwordx4 off, v[0:3], off offset:272 sc0 sc1
+; GFX942-NEXT:    scratch_store_dwordx4 off, v[0:3], off offset:288 sc0 sc1
+; GFX942-NEXT:    scratch_store_dwordx4 off, v[0:3], off offset:304 sc0 sc1
+; GFX942-NEXT:    s_endpgm
 ;
 ; GFX1010-PAL-LABEL: zero_init_small_offset_kernel:
 ; GFX1010-PAL:       ; %bb.0:
@@ -1351,23 +1351,23 @@ define void @zero_init_small_offset_foo() {
 ; GFX9-PAL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-PAL-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: zero_init_small_offset_foo:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    scratch_load_dword v0, off, s32 sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    s_mov_b32 s0, 0
-; GFX940-NEXT:    s_mov_b32 s1, s0
-; GFX940-NEXT:    s_mov_b32 s2, s0
-; GFX940-NEXT:    s_mov_b32 s3, s0
-; GFX940-NEXT:    v_mov_b64_e32 v[0:1], s[0:1]
-; GFX940-NEXT:    v_mov_b64_e32 v[2:3], s[2:3]
-; GFX940-NEXT:    scratch_store_dwordx4 off, v[0:3], s32 offset:256 sc0 sc1
-; GFX940-NEXT:    scratch_store_dwordx4 off, v[0:3], s32 offset:272 sc0 sc1
-; GFX940-NEXT:    scratch_store_dwordx4 off, v[0:3], s32 offset:288 sc0 sc1
-; GFX940-NEXT:    scratch_store_dwordx4 off, v[0:3], s32 offset:304 sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: zero_init_small_offset_foo:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    scratch_load_dword v0, off, s32 sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    s_mov_b32 s0, 0
+; GFX942-NEXT:    s_mov_b32 s1, s0
+; GFX942-NEXT:    s_mov_b32 s2, s0
+; GFX942-NEXT:    s_mov_b32 s3, s0
+; GFX942-NEXT:    v_mov_b64_e32 v[0:1], s[0:1]
+; GFX942-NEXT:    v_mov_b64_e32 v[2:3], s[2:3]
+; GFX942-NEXT:    scratch_store_dwordx4 off, v[0:3], s32 offset:256 sc0 sc1
+; GFX942-NEXT:    scratch_store_dwordx4 off, v[0:3], s32 offset:272 sc0 sc1
+; GFX942-NEXT:    scratch_store_dwordx4 off, v[0:3], s32 offset:288 sc0 sc1
+; GFX942-NEXT:    scratch_store_dwordx4 off, v[0:3], s32 offset:304 sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-PAL-LABEL: zero_init_small_offset_foo:
 ; GFX10-PAL:       ; %bb.0:
@@ -1542,23 +1542,23 @@ define amdgpu_kernel void @store_load_sindex_small_offset_kernel(i32 %idx) {
 ; GFX9-PAL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-PAL-NEXT:    s_endpgm
 ;
-; GFX940-LABEL: store_load_sindex_small_offset_kernel:
-; GFX940:       ; %bb.0: ; %bb
-; GFX940-NEXT:    s_load_dword s0, s[4:5], 0x24
-; GFX940-NEXT:    scratch_load_dword v0, off, off sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v0, 15
-; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-NEXT:    s_lshl_b32 s1, s0, 2
-; GFX940-NEXT:    s_and_b32 s0, s0, 15
-; GFX940-NEXT:    s_addk_i32 s1, 0x100
-; GFX940-NEXT:    s_lshl_b32 s0, s0, 2
-; GFX940-NEXT:    scratch_store_dword off, v0, s1 sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    s_addk_i32 s0, 0x100
-; GFX940-NEXT:    scratch_load_dword v0, off, s0 sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    s_endpgm
+; GFX942-LABEL: store_load_sindex_small_offset_kernel:
+; GFX942:       ; %bb.0: ; %bb
+; GFX942-NEXT:    s_load_dword s0, s[4:5], 0x24
+; GFX942-NEXT:    scratch_load_dword v0, off, off sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v0, 15
+; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-NEXT:    s_lshl_b32 s1, s0, 2
+; GFX942-NEXT:    s_and_b32 s0, s0, 15
+; GFX942-NEXT:    s_addk_i32 s1, 0x100
+; GFX942-NEXT:    s_lshl_b32 s0, s0, 2
+; GFX942-NEXT:    scratch_store_dword off, v0, s1 sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    s_addk_i32 s0, 0x100
+; GFX942-NEXT:    scratch_load_dword v0, off, s0 sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    s_endpgm
 ;
 ; GFX1010-PAL-LABEL: store_load_sindex_small_offset_kernel:
 ; GFX1010-PAL:       ; %bb.0: ; %bb
@@ -1759,21 +1759,21 @@ define amdgpu_ps void @store_load_sindex_small_offset_foo(i32 inreg %idx) {
 ; GFX9-PAL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-PAL-NEXT:    s_endpgm
 ;
-; GFX940-LABEL: store_load_sindex_small_offset_foo:
-; GFX940:       ; %bb.0: ; %bb
-; GFX940-NEXT:    scratch_load_dword v0, off, off sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    s_lshl_b32 s1, s0, 2
-; GFX940-NEXT:    s_and_b32 s0, s0, 15
-; GFX940-NEXT:    s_addk_i32 s1, 0x100
-; GFX940-NEXT:    v_mov_b32_e32 v0, 15
-; GFX940-NEXT:    s_lshl_b32 s0, s0, 2
-; GFX940-NEXT:    scratch_store_dword off, v0, s1 sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    s_addk_i32 s0, 0x100
-; GFX940-NEXT:    scratch_load_dword v0, off, s0 sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    s_endpgm
+; GFX942-LABEL: store_load_sindex_small_offset_foo:
+; GFX942:       ; %bb.0: ; %bb
+; GFX942-NEXT:    scratch_load_dword v0, off, off sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    s_lshl_b32 s1, s0, 2
+; GFX942-NEXT:    s_and_b32 s0, s0, 15
+; GFX942-NEXT:    s_addk_i32 s1, 0x100
+; GFX942-NEXT:    v_mov_b32_e32 v0, 15
+; GFX942-NEXT:    s_lshl_b32 s0, s0, 2
+; GFX942-NEXT:    scratch_store_dword off, v0, s1 sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    s_addk_i32 s0, 0x100
+; GFX942-NEXT:    scratch_load_dword v0, off, s0 sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    s_endpgm
 ;
 ; GFX1010-PAL-LABEL: store_load_sindex_small_offset_foo:
 ; GFX1010-PAL:       ; %bb.0: ; %bb
@@ -1956,19 +1956,19 @@ define amdgpu_kernel void @store_load_vindex_small_offset_kernel() {
 ; GFX9-PAL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-PAL-NEXT:    s_endpgm
 ;
-; GFX940-LABEL: store_load_vindex_small_offset_kernel:
-; GFX940:       ; %bb.0: ; %bb
-; GFX940-NEXT:    scratch_load_dword v1, off, off sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX940-NEXT:    v_and_b32_e32 v0, 0xffc, v0
-; GFX940-NEXT:    v_mov_b32_e32 v1, 15
-; GFX940-NEXT:    scratch_store_dword v0, v1, off offset:256 sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_sub_u32_e32 v0, 0x100, v0
-; GFX940-NEXT:    scratch_load_dword v0, v0, off offset:124 sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    s_endpgm
+; GFX942-LABEL: store_load_vindex_small_offset_kernel:
+; GFX942:       ; %bb.0: ; %bb
+; GFX942-NEXT:    scratch_load_dword v1, off, off sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX942-NEXT:    v_and_b32_e32 v0, 0xffc, v0
+; GFX942-NEXT:    v_mov_b32_e32 v1, 15
+; GFX942-NEXT:    scratch_store_dword v0, v1, off offset:256 sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_sub_u32_e32 v0, 0x100, v0
+; GFX942-NEXT:    scratch_load_dword v0, v0, off offset:124 sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    s_endpgm
 ;
 ; GFX1010-PAL-LABEL: store_load_vindex_small_offset_kernel:
 ; GFX1010-PAL:       ; %bb.0: ; %bb
@@ -2145,22 +2145,22 @@ define void @store_load_vindex_small_offset_foo(i32 %idx) {
 ; GFX9-PAL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-PAL-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: store_load_vindex_small_offset_foo:
-; GFX940:       ; %bb.0: ; %bb
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    scratch_load_dword v1, off, s32 sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    s_add_i32 s0, s32, 0x100
-; GFX940-NEXT:    v_mov_b32_e32 v1, s0
-; GFX940-NEXT:    v_lshl_add_u32 v1, v0, 2, v1
-; GFX940-NEXT:    v_mov_b32_e32 v2, 15
-; GFX940-NEXT:    v_and_b32_e32 v0, 15, v0
-; GFX940-NEXT:    scratch_store_dword v1, v2, off sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX940-NEXT:    scratch_load_dword v0, v0, s32 offset:256 sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: store_load_vindex_small_offset_foo:
+; GFX942:       ; %bb.0: ; %bb
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    scratch_load_dword v1, off, s32 sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    s_add_i32 s0, s32, 0x100
+; GFX942-NEXT:    v_mov_b32_e32 v1, s0
+; GFX942-NEXT:    v_lshl_add_u32 v1, v0, 2, v1
+; GFX942-NEXT:    v_mov_b32_e32 v2, 15
+; GFX942-NEXT:    v_and_b32_e32 v0, 15, v0
+; GFX942-NEXT:    scratch_store_dword v1, v2, off sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX942-NEXT:    scratch_load_dword v0, v0, s32 offset:256 sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-PAL-LABEL: store_load_vindex_small_offset_foo:
 ; GFX10-PAL:       ; %bb.0: ; %bb
@@ -2333,22 +2333,22 @@ define amdgpu_kernel void @zero_init_large_offset_kernel() {
 ; GFX9-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], s0 offset:48
 ; GFX9-PAL-NEXT:    s_endpgm
 ;
-; GFX940-LABEL: zero_init_large_offset_kernel:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    scratch_load_dword v0, off, off offset:4 sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    s_mov_b32 s0, 0
-; GFX940-NEXT:    s_mov_b32 s1, s0
-; GFX940-NEXT:    s_mov_b32 s2, s0
-; GFX940-NEXT:    s_mov_b32 s3, s0
-; GFX940-NEXT:    v_mov_b64_e32 v[0:1], s[0:1]
-; GFX940-NEXT:    v_mov_b64_e32 v[2:3], s[2:3]
-; GFX940-NEXT:    s_movk_i32 s0, 0x4004
-; GFX940-NEXT:    scratch_store_dwordx4 off, v[0:3], s0 sc0 sc1
-; GFX940-NEXT:    scratch_store_dwordx4 off, v[0:3], s0 offset:16 sc0 sc1
-; GFX940-NEXT:    scratch_store_dwordx4 off, v[0:3], s0 offset:32 sc0 sc1
-; GFX940-NEXT:    scratch_store_dwordx4 off, v[0:3], s0 offset:48 sc0 sc1
-; GFX940-NEXT:    s_endpgm
+; GFX942-LABEL: zero_init_large_offset_kernel:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    scratch_load_dword v0, off, off offset:4 sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    s_mov_b32 s0, 0
+; GFX942-NEXT:    s_mov_b32 s1, s0
+; GFX942-NEXT:    s_mov_b32 s2, s0
+; GFX942-NEXT:    s_mov_b32 s3, s0
+; GFX942-NEXT:    v_mov_b64_e32 v[0:1], s[0:1]
+; GFX942-NEXT:    v_mov_b64_e32 v[2:3], s[2:3]
+; GFX942-NEXT:    s_movk_i32 s0, 0x4004
+; GFX942-NEXT:    scratch_store_dwordx4 off, v[0:3], s0 sc0 sc1
+; GFX942-NEXT:    scratch_store_dwordx4 off, v[0:3], s0 offset:16 sc0 sc1
+; GFX942-NEXT:    scratch_store_dwordx4 off, v[0:3], s0 offset:32 sc0 sc1
+; GFX942-NEXT:    scratch_store_dwordx4 off, v[0:3], s0 offset:48 sc0 sc1
+; GFX942-NEXT:    s_endpgm
 ;
 ; GFX1010-PAL-LABEL: zero_init_large_offset_kernel:
 ; GFX1010-PAL:       ; %bb.0:
@@ -2568,27 +2568,27 @@ define void @zero_init_large_offset_foo() {
 ; GFX9-PAL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-PAL-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: zero_init_large_offset_foo:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    scratch_load_dword v0, off, s32 offset:4 sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    s_mov_b32 s0, 0
-; GFX940-NEXT:    s_mov_b32 s1, s0
-; GFX940-NEXT:    s_mov_b32 s2, s0
-; GFX940-NEXT:    s_mov_b32 s3, s0
-; GFX940-NEXT:    v_mov_b64_e32 v[0:1], s[0:1]
-; GFX940-NEXT:    v_mov_b64_e32 v[2:3], s[2:3]
-; GFX940-NEXT:    s_add_i32 s0, s32, 0x4004
-; GFX940-NEXT:    scratch_store_dwordx4 off, v[0:3], s0 sc0 sc1
-; GFX940-NEXT:    s_add_i32 s0, s32, 0x4004
-; GFX940-NEXT:    scratch_store_dwordx4 off, v[0:3], s0 offset:16 sc0 sc1
-; GFX940-NEXT:    s_add_i32 s0, s32, 0x4004
-; GFX940-NEXT:    scratch_store_dwordx4 off, v[0:3], s0 offset:32 sc0 sc1
-; GFX940-NEXT:    s_add_i32 s0, s32, 0x4004
-; GFX940-NEXT:    scratch_store_dwordx4 off, v[0:3], s0 offset:48 sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: zero_init_large_offset_foo:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    scratch_load_dword v0, off, s32 offset:4 sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    s_mov_b32 s0, 0
+; GFX942-NEXT:    s_mov_b32 s1, s0
+; GFX942-NEXT:    s_mov_b32 s2, s0
+; GFX942-NEXT:    s_mov_b32 s3, s0
+; GFX942-NEXT:    v_mov_b64_e32 v[0:1], s[0:1]
+; GFX942-NEXT:    v_mov_b64_e32 v[2:3], s[2:3]
+; GFX942-NEXT:    s_add_i32 s0, s32, 0x4004
+; GFX942-NEXT:    scratch_store_dwordx4 off, v[0:3], s0 sc0 sc1
+; GFX942-NEXT:    s_add_i32 s0, s32, 0x4004
+; GFX942-NEXT:    scratch_store_dwordx4 off, v[0:3], s0 offset:16 sc0 sc1
+; GFX942-NEXT:    s_add_i32 s0, s32, 0x4004
+; GFX942-NEXT:    scratch_store_dwordx4 off, v[0:3], s0 offset:32 sc0 sc1
+; GFX942-NEXT:    s_add_i32 s0, s32, 0x4004
+; GFX942-NEXT:    scratch_store_dwordx4 off, v[0:3], s0 offset:48 sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX1010-PAL-LABEL: zero_init_large_offset_foo:
 ; GFX1010-PAL:       ; %bb.0:
@@ -2796,23 +2796,23 @@ define amdgpu_kernel void @store_load_sindex_large_offset_kernel(i32 %idx) {
 ; GFX9-PAL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-PAL-NEXT:    s_endpgm
 ;
-; GFX940-LABEL: store_load_sindex_large_offset_kernel:
-; GFX940:       ; %bb.0: ; %bb
-; GFX940-NEXT:    s_load_dword s0, s[4:5], 0x24
-; GFX940-NEXT:    scratch_load_dword v0, off, off offset:4 sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v0, 15
-; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-NEXT:    s_lshl_b32 s1, s0, 2
-; GFX940-NEXT:    s_and_b32 s0, s0, 15
-; GFX940-NEXT:    s_addk_i32 s1, 0x4004
-; GFX940-NEXT:    s_lshl_b32 s0, s0, 2
-; GFX940-NEXT:    scratch_store_dword off, v0, s1 sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    s_addk_i32 s0, 0x4004
-; GFX940-NEXT:    scratch_load_dword v0, off, s0 sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    s_endpgm
+; GFX942-LABEL: store_load_sindex_large_offset_kernel:
+; GFX942:       ; %bb.0: ; %bb
+; GFX942-NEXT:    s_load_dword s0, s[4:5], 0x24
+; GFX942-NEXT:    scratch_load_dword v0, off, off offset:4 sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v0, 15
+; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-NEXT:    s_lshl_b32 s1, s0, 2
+; GFX942-NEXT:    s_and_b32 s0, s0, 15
+; GFX942-NEXT:    s_addk_i32 s1, 0x4004
+; GFX942-NEXT:    s_lshl_b32 s0, s0, 2
+; GFX942-NEXT:    scratch_store_dword off, v0, s1 sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    s_addk_i32 s0, 0x4004
+; GFX942-NEXT:    scratch_load_dword v0, off, s0 sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    s_endpgm
 ;
 ; GFX1010-PAL-LABEL: store_load_sindex_large_offset_kernel:
 ; GFX1010-PAL:       ; %bb.0: ; %bb
@@ -3013,21 +3013,21 @@ define amdgpu_ps void @store_load_sindex_large_offset_foo(i32 inreg %idx) {
 ; GFX9-PAL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-PAL-NEXT:    s_endpgm
 ;
-; GFX940-LABEL: store_load_sindex_large_offset_foo:
-; GFX940:       ; %bb.0: ; %bb
-; GFX940-NEXT:    scratch_load_dword v0, off, off offset:4 sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    s_lshl_b32 s1, s0, 2
-; GFX940-NEXT:    s_and_b32 s0, s0, 15
-; GFX940-NEXT:    s_addk_i32 s1, 0x4004
-; GFX940-NEXT:    v_mov_b32_e32 v0, 15
-; GFX940-NEXT:    s_lshl_b32 s0, s0, 2
-; GFX940-NEXT:    scratch_store_dword off, v0, s1 sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    s_addk_i32 s0, 0x4004
-; GFX940-NEXT:    scratch_load_dword v0, off, s0 sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    s_endpgm
+; GFX942-LABEL: store_load_sindex_large_offset_foo:
+; GFX942:       ; %bb.0: ; %bb
+; GFX942-NEXT:    scratch_load_dword v0, off, off offset:4 sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    s_lshl_b32 s1, s0, 2
+; GFX942-NEXT:    s_and_b32 s0, s0, 15
+; GFX942-NEXT:    s_addk_i32 s1, 0x4004
+; GFX942-NEXT:    v_mov_b32_e32 v0, 15
+; GFX942-NEXT:    s_lshl_b32 s0, s0, 2
+; GFX942-NEXT:    scratch_store_dword off, v0, s1 sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    s_addk_i32 s0, 0x4004
+; GFX942-NEXT:    scratch_load_dword v0, off, s0 sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    s_endpgm
 ;
 ; GFX1010-PAL-LABEL: store_load_sindex_large_offset_foo:
 ; GFX1010-PAL:       ; %bb.0: ; %bb
@@ -3211,20 +3211,20 @@ define amdgpu_kernel void @store_load_vindex_large_offset_kernel() {
 ; GFX9-PAL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-PAL-NEXT:    s_endpgm
 ;
-; GFX940-LABEL: store_load_vindex_large_offset_kernel:
-; GFX940:       ; %bb.0: ; %bb
-; GFX940-NEXT:    scratch_load_dword v1, off, off offset:4 sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX940-NEXT:    v_and_b32_e32 v0, 0xffc, v0
-; GFX940-NEXT:    v_mov_b32_e32 v1, 15
-; GFX940-NEXT:    s_movk_i32 s0, 0x4004
-; GFX940-NEXT:    scratch_store_dword v0, v1, s0 sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_sub_u32_e32 v0, 0x4004, v0
-; GFX940-NEXT:    scratch_load_dword v0, v0, off offset:124 sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    s_endpgm
+; GFX942-LABEL: store_load_vindex_large_offset_kernel:
+; GFX942:       ; %bb.0: ; %bb
+; GFX942-NEXT:    scratch_load_dword v1, off, off offset:4 sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX942-NEXT:    v_and_b32_e32 v0, 0xffc, v0
+; GFX942-NEXT:    v_mov_b32_e32 v1, 15
+; GFX942-NEXT:    s_movk_i32 s0, 0x4004
+; GFX942-NEXT:    scratch_store_dword v0, v1, s0 sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_sub_u32_e32 v0, 0x4004, v0
+; GFX942-NEXT:    scratch_load_dword v0, v0, off offset:124 sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    s_endpgm
 ;
 ; GFX1010-PAL-LABEL: store_load_vindex_large_offset_kernel:
 ; GFX1010-PAL:       ; %bb.0: ; %bb
@@ -3403,23 +3403,23 @@ define void @store_load_vindex_large_offset_foo(i32 %idx) {
 ; GFX9-PAL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-PAL-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: store_load_vindex_large_offset_foo:
-; GFX940:       ; %bb.0: ; %bb
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    scratch_load_dword v1, off, s32 offset:4 sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    s_add_i32 s0, s32, 0x4004
-; GFX940-NEXT:    v_mov_b32_e32 v1, s0
-; GFX940-NEXT:    v_lshl_add_u32 v1, v0, 2, v1
-; GFX940-NEXT:    v_mov_b32_e32 v2, 15
-; GFX940-NEXT:    v_and_b32_e32 v0, 15, v0
-; GFX940-NEXT:    scratch_store_dword v1, v2, off sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX940-NEXT:    s_add_i32 s0, s32, 0x4004
-; GFX940-NEXT:    scratch_load_dword v0, v0, s0 sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: store_load_vindex_large_offset_foo:
+; GFX942:       ; %bb.0: ; %bb
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    scratch_load_dword v1, off, s32 offset:4 sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    s_add_i32 s0, s32, 0x4004
+; GFX942-NEXT:    v_mov_b32_e32 v1, s0
+; GFX942-NEXT:    v_lshl_add_u32 v1, v0, 2, v1
+; GFX942-NEXT:    v_mov_b32_e32 v2, 15
+; GFX942-NEXT:    v_and_b32_e32 v0, 15, v0
+; GFX942-NEXT:    scratch_store_dword v1, v2, off sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX942-NEXT:    s_add_i32 s0, s32, 0x4004
+; GFX942-NEXT:    scratch_load_dword v0, v0, s0 sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-PAL-LABEL: store_load_vindex_large_offset_foo:
 ; GFX10-PAL:       ; %bb.0: ; %bb
@@ -3566,18 +3566,18 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
 ; GFX9-PAL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-PAL-NEXT:    s_endpgm
 ;
-; GFX940-LABEL: store_load_large_imm_offset_kernel:
-; GFX940:       ; %bb.0: ; %bb
-; GFX940-NEXT:    v_mov_b32_e32 v0, 13
-; GFX940-NEXT:    scratch_store_dword off, v0, off offset:4 sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v0, 0x3000
-; GFX940-NEXT:    v_mov_b32_e32 v1, 15
-; GFX940-NEXT:    scratch_store_dword v0, v1, off offset:3716 sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    scratch_load_dword v0, v0, off offset:3716 sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    s_endpgm
+; GFX942-LABEL: store_load_large_imm_offset_kernel:
+; GFX942:       ; %bb.0: ; %bb
+; GFX942-NEXT:    v_mov_b32_e32 v0, 13
+; GFX942-NEXT:    scratch_store_dword off, v0, off offset:4 sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v0, 0x3000
+; GFX942-NEXT:    v_mov_b32_e32 v1, 15
+; GFX942-NEXT:    scratch_store_dword v0, v1, off offset:3716 sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    scratch_load_dword v0, v0, off offset:3716 sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    s_endpgm
 ;
 ; GFX1010-PAL-LABEL: store_load_large_imm_offset_kernel:
 ; GFX1010-PAL:       ; %bb.0: ; %bb
@@ -3738,19 +3738,19 @@ define void @store_load_large_imm_offset_foo() {
 ; GFX9-PAL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-PAL-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: store_load_large_imm_offset_foo:
-; GFX940:       ; %bb.0: ; %bb
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v0, 13
-; GFX940-NEXT:    scratch_store_dword off, v0, s32 offset:4 sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v0, 0x3000
-; GFX940-NEXT:    v_mov_b32_e32 v1, 15
-; GFX940-NEXT:    scratch_store_dword v0, v1, s32 offset:3716 sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    scratch_load_dword v0, v0, s32 offset:3716 sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: store_load_large_imm_offset_foo:
+; GFX942:       ; %bb.0: ; %bb
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v0, 13
+; GFX942-NEXT:    scratch_store_dword off, v0, s32 offset:4 sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v0, 0x3000
+; GFX942-NEXT:    v_mov_b32_e32 v1, 15
+; GFX942-NEXT:    scratch_store_dword v0, v1, s32 offset:3716 sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    scratch_load_dword v0, v0, s32 offset:3716 sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-PAL-LABEL: store_load_large_imm_offset_foo:
 ; GFX10-PAL:       ; %bb.0: ; %bb
@@ -3889,20 +3889,20 @@ define amdgpu_kernel void @store_load_vidx_sidx_offset(i32 %sidx) {
 ; GFX9-PAL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-PAL-NEXT:    s_endpgm
 ;
-; GFX940-LABEL: store_load_vidx_sidx_offset:
-; GFX940:       ; %bb.0: ; %bb
-; GFX940-NEXT:    s_load_dword s0, s[4:5], 0x24
-; GFX940-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
-; GFX940-NEXT:    v_mov_b32_e32 v1, 0
-; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-NEXT:    v_add_u32_e32 v0, s0, v0
-; GFX940-NEXT:    v_lshl_add_u32 v0, v0, 2, v1
-; GFX940-NEXT:    v_mov_b32_e32 v1, 15
-; GFX940-NEXT:    scratch_store_dword v0, v1, off offset:1024 sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    scratch_load_dword v0, v0, off offset:1024 sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    s_endpgm
+; GFX942-LABEL: store_load_vidx_sidx_offset:
+; GFX942:       ; %bb.0: ; %bb
+; GFX942-NEXT:    s_load_dword s0, s[4:5], 0x24
+; GFX942-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
+; GFX942-NEXT:    v_mov_b32_e32 v1, 0
+; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-NEXT:    v_add_u32_e32 v0, s0, v0
+; GFX942-NEXT:    v_lshl_add_u32 v0, v0, 2, v1
+; GFX942-NEXT:    v_mov_b32_e32 v1, 15
+; GFX942-NEXT:    scratch_store_dword v0, v1, off offset:1024 sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    scratch_load_dword v0, v0, off offset:1024 sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    s_endpgm
 ;
 ; GFX10-PAL-LABEL: store_load_vidx_sidx_offset:
 ; GFX10-PAL:       ; %bb.0: ; %bb
@@ -4022,16 +4022,16 @@ define void @store_load_i64_aligned(ptr addrspace(5) nocapture %arg) {
 ; GFX9-PAL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-PAL-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: store_load_i64_aligned:
-; GFX940:       ; %bb.0: ; %bb
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v2, 15
-; GFX940-NEXT:    v_mov_b32_e32 v3, 0
-; GFX940-NEXT:    scratch_store_dwordx2 v0, v[2:3], off sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    scratch_load_dwordx2 v[0:1], v0, off sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: store_load_i64_aligned:
+; GFX942:       ; %bb.0: ; %bb
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v2, 15
+; GFX942-NEXT:    v_mov_b32_e32 v3, 0
+; GFX942-NEXT:    scratch_store_dwordx2 v0, v[2:3], off sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    scratch_load_dwordx2 v[0:1], v0, off sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-PAL-LABEL: store_load_i64_aligned:
 ; GFX10-PAL:       ; %bb.0: ; %bb
@@ -4133,16 +4133,16 @@ define void @store_load_i64_unaligned(ptr addrspace(5) nocapture %arg) {
 ; GFX9-PAL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-PAL-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: store_load_i64_unaligned:
-; GFX940:       ; %bb.0: ; %bb
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v2, 15
-; GFX940-NEXT:    v_mov_b32_e32 v3, 0
-; GFX940-NEXT:    scratch_store_dwordx2 v0, v[2:3], off sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    scratch_load_dwordx2 v[0:1], v0, off sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: store_load_i64_unaligned:
+; GFX942:       ; %bb.0: ; %bb
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v2, 15
+; GFX942-NEXT:    v_mov_b32_e32 v3, 0
+; GFX942-NEXT:    scratch_store_dwordx2 v0, v[2:3], off sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    scratch_load_dwordx2 v[0:1], v0, off sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-PAL-LABEL: store_load_i64_unaligned:
 ; GFX10-PAL:       ; %bb.0: ; %bb
@@ -4249,17 +4249,17 @@ define void @store_load_v3i32_unaligned(ptr addrspace(5) nocapture %arg) {
 ; GFX9-PAL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-PAL-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: store_load_v3i32_unaligned:
-; GFX940:       ; %bb.0: ; %bb
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v2, 1
-; GFX940-NEXT:    v_mov_b32_e32 v3, 2
-; GFX940-NEXT:    v_mov_b32_e32 v4, 3
-; GFX940-NEXT:    scratch_store_dwordx3 v0, v[2:4], off sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    scratch_load_dwordx3 v[0:2], v0, off sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: store_load_v3i32_unaligned:
+; GFX942:       ; %bb.0: ; %bb
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v2, 1
+; GFX942-NEXT:    v_mov_b32_e32 v3, 2
+; GFX942-NEXT:    v_mov_b32_e32 v4, 3
+; GFX942-NEXT:    scratch_store_dwordx3 v0, v[2:4], off sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    scratch_load_dwordx3 v[0:2], v0, off sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-PAL-LABEL: store_load_v3i32_unaligned:
 ; GFX10-PAL:       ; %bb.0: ; %bb
@@ -4372,18 +4372,18 @@ define void @store_load_v4i32_unaligned(ptr addrspace(5) nocapture %arg) {
 ; GFX9-PAL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-PAL-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: store_load_v4i32_unaligned:
-; GFX940:       ; %bb.0: ; %bb
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v2, 1
-; GFX940-NEXT:    v_mov_b32_e32 v3, 2
-; GFX940-NEXT:    v_mov_b32_e32 v4, 3
-; GFX940-NEXT:    v_mov_b32_e32 v5, 4
-; GFX940-NEXT:    scratch_store_dwordx4 v0, v[2:5], off sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    scratch_load_dwordx4 v[0:3], v0, off sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: store_load_v4i32_unaligned:
+; GFX942:       ; %bb.0: ; %bb
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v2, 1
+; GFX942-NEXT:    v_mov_b32_e32 v3, 2
+; GFX942-NEXT:    v_mov_b32_e32 v4, 3
+; GFX942-NEXT:    v_mov_b32_e32 v5, 4
+; GFX942-NEXT:    scratch_store_dwordx4 v0, v[2:5], off sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    scratch_load_dwordx4 v[0:3], v0, off sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-PAL-LABEL: store_load_v4i32_unaligned:
 ; GFX10-PAL:       ; %bb.0: ; %bb
@@ -4488,16 +4488,16 @@ define void @store_load_i32_negative_unaligned(ptr addrspace(5) nocapture %arg)
 ; GFX9-PAL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-PAL-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: store_load_i32_negative_unaligned:
-; GFX940:       ; %bb.0: ; %bb
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_add_u32_e32 v0, -1, v0
-; GFX940-NEXT:    v_mov_b32_e32 v1, 1
-; GFX940-NEXT:    scratch_store_byte v0, v1, off sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    scratch_load_ubyte v0, v0, off sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: store_load_i32_negative_unaligned:
+; GFX942:       ; %bb.0: ; %bb
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_add_u32_e32 v0, -1, v0
+; GFX942-NEXT:    v_mov_b32_e32 v1, 1
+; GFX942-NEXT:    scratch_store_byte v0, v1, off sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    scratch_load_ubyte v0, v0, off sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX1010-PAL-LABEL: store_load_i32_negative_unaligned:
 ; GFX1010-PAL:       ; %bb.0: ; %bb
@@ -4610,16 +4610,16 @@ define void @store_load_i32_large_negative_unaligned(ptr addrspace(5) nocapture
 ; GFX9-PAL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-PAL-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: store_load_i32_large_negative_unaligned:
-; GFX940:       ; %bb.0: ; %bb
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_add_u32_e32 v0, 0xffffef7f, v0
-; GFX940-NEXT:    v_mov_b32_e32 v1, 1
-; GFX940-NEXT:    scratch_store_byte v0, v1, off sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    scratch_load_ubyte v0, v0, off sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: store_load_i32_large_negative_unaligned:
+; GFX942:       ; %bb.0: ; %bb
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_add_u32_e32 v0, 0xffffef7f, v0
+; GFX942-NEXT:    v_mov_b32_e32 v1, 1
+; GFX942-NEXT:    scratch_store_byte v0, v1, off sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    scratch_load_ubyte v0, v0, off sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX1010-PAL-LABEL: store_load_i32_large_negative_unaligned:
 ; GFX1010-PAL:       ; %bb.0: ; %bb
@@ -4792,25 +4792,25 @@ define amdgpu_ps void @large_offset() {
 ; GFX9-PAL-NEXT:    ;;#ASMEND
 ; GFX9-PAL-NEXT:    s_endpgm
 ;
-; GFX940-LABEL: large_offset:
-; GFX940:       ; %bb.0: ; %bb
-; GFX940-NEXT:    v_mov_b32_e32 v0, 0
-; GFX940-NEXT:    v_mov_b32_e32 v1, v0
-; GFX940-NEXT:    v_mov_b32_e32 v2, v0
-; GFX940-NEXT:    v_mov_b32_e32 v3, v0
-; GFX940-NEXT:    scratch_store_dwordx4 off, v[0:3], off offset:3024 sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    scratch_load_dwordx4 v[0:3], off, off offset:3024 sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    s_mov_b32 s0, 16
-; GFX940-NEXT:    ;;#ASMSTART
-; GFX940-NEXT:    ; use s0
-; GFX940-NEXT:    ;;#ASMEND
-; GFX940-NEXT:    s_movk_i32 s0, 0x810
-; GFX940-NEXT:    ;;#ASMSTART
-; GFX940-NEXT:    ; use s0
-; GFX940-NEXT:    ;;#ASMEND
-; GFX940-NEXT:    s_endpgm
+; GFX942-LABEL: large_offset:
+; GFX942:       ; %bb.0: ; %bb
+; GFX942-NEXT:    v_mov_b32_e32 v0, 0
+; GFX942-NEXT:    v_mov_b32_e32 v1, v0
+; GFX942-NEXT:    v_mov_b32_e32 v2, v0
+; GFX942-NEXT:    v_mov_b32_e32 v3, v0
+; GFX942-NEXT:    scratch_store_dwordx4 off, v[0:3], off offset:3024 sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    scratch_load_dwordx4 v[0:3], off, off offset:3024 sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    s_mov_b32 s0, 16
+; GFX942-NEXT:    ;;#ASMSTART
+; GFX942-NEXT:    ; use s0
+; GFX942-NEXT:    ;;#ASMEND
+; GFX942-NEXT:    s_movk_i32 s0, 0x810
+; GFX942-NEXT:    ;;#ASMSTART
+; GFX942-NEXT:    ; use s0
+; GFX942-NEXT:    ;;#ASMEND
+; GFX942-NEXT:    s_endpgm
 ;
 ; GFX1010-PAL-LABEL: large_offset:
 ; GFX1010-PAL:       ; %bb.0: ; %bb
@@ -4977,13 +4977,13 @@ define amdgpu_gs void @sgpr_base_large_offset(ptr addrspace(1) %out, ptr addrspa
 ; GFX9-PAL-NEXT:    global_store_dword v[0:1], v2, off
 ; GFX9-PAL-NEXT:    s_endpgm
 ;
-; GFX940-LABEL: sgpr_base_large_offset:
-; GFX940:       ; %bb.0: ; %entry
-; GFX940-NEXT:    s_add_i32 s0, s0, 0xffe8
-; GFX940-NEXT:    scratch_load_dword v2, off, s0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    global_store_dword v[0:1], v2, off sc0 sc1
-; GFX940-NEXT:    s_endpgm
+; GFX942-LABEL: sgpr_base_large_offset:
+; GFX942:       ; %bb.0: ; %entry
+; GFX942-NEXT:    s_add_i32 s0, s0, 0xffe8
+; GFX942-NEXT:    scratch_load_dword v2, off, s0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    global_store_dword v[0:1], v2, off sc0 sc1
+; GFX942-NEXT:    s_endpgm
 ;
 ; GFX10-PAL-LABEL: sgpr_base_large_offset:
 ; GFX10-PAL:       ; %bb.0: ; %entry
@@ -5082,14 +5082,14 @@ define amdgpu_gs void @sgpr_base_large_offset_split(ptr addrspace(1) %out, ptr a
 ; GFX9-PAL-NEXT:    global_store_dword v[0:1], v2, off
 ; GFX9-PAL-NEXT:    s_endpgm
 ;
-; GFX940-LABEL: sgpr_base_large_offset_split:
-; GFX940:       ; %bb.0: ; %entry
-; GFX940-NEXT:    s_and_b32 s0, s0, -4
-; GFX940-NEXT:    v_mov_b32_e32 v2, 0x100f000
-; GFX940-NEXT:    scratch_load_dword v2, v2, s0 offset:4072 sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    global_store_dword v[0:1], v2, off sc0 sc1
-; GFX940-NEXT:    s_endpgm
+; GFX942-LABEL: sgpr_base_large_offset_split:
+; GFX942:       ; %bb.0: ; %entry
+; GFX942-NEXT:    s_and_b32 s0, s0, -4
+; GFX942-NEXT:    v_mov_b32_e32 v2, 0x100f000
+; GFX942-NEXT:    scratch_load_dword v2, v2, s0 offset:4072 sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    global_store_dword v[0:1], v2, off sc0 sc1
+; GFX942-NEXT:    s_endpgm
 ;
 ; GFX10-PAL-LABEL: sgpr_base_large_offset_split:
 ; GFX10-PAL:       ; %bb.0: ; %entry
@@ -5197,15 +5197,15 @@ define amdgpu_gs void @sgpr_base_plus_sgpr_plus_vgpr_plus_large_imm_offset(ptr a
 ; GFX9-PAL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-PAL-NEXT:    s_endpgm
 ;
-; GFX940-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_large_imm_offset:
-; GFX940:       ; %bb.0: ; %bb
-; GFX940-NEXT:    s_add_i32 s0, s0, s1
-; GFX940-NEXT:    v_add_u32_e32 v0, s0, v0
-; GFX940-NEXT:    v_add_u32_e32 v0, 0xffe8, v0
-; GFX940-NEXT:    v_mov_b32_e32 v1, 15
-; GFX940-NEXT:    scratch_store_dword v0, v1, off sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    s_endpgm
+; GFX942-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_large_imm_offset:
+; GFX942:       ; %bb.0: ; %bb
+; GFX942-NEXT:    s_add_i32 s0, s0, s1
+; GFX942-NEXT:    v_add_u32_e32 v0, s0, v0
+; GFX942-NEXT:    v_add_u32_e32 v0, 0xffe8, v0
+; GFX942-NEXT:    v_mov_b32_e32 v1, 15
+; GFX942-NEXT:    scratch_store_dword v0, v1, off sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    s_endpgm
 ;
 ; GFX10-PAL-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_large_imm_offset:
 ; GFX10-PAL:       ; %bb.0: ; %bb
@@ -5307,15 +5307,15 @@ define amdgpu_gs void @sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset(pt
 ; GFX9-PAL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-PAL-NEXT:    s_endpgm
 ;
-; GFX940-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset:
-; GFX940:       ; %bb.0: ; %bb
-; GFX940-NEXT:    s_add_i32 s0, s0, s1
-; GFX940-NEXT:    v_add_u32_e32 v0, s0, v0
-; GFX940-NEXT:    v_add_u32_e32 v0, -16, v0
-; GFX940-NEXT:    v_mov_b32_e32 v1, 15
-; GFX940-NEXT:    scratch_store_dword v0, v1, off sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    s_endpgm
+; GFX942-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset:
+; GFX942:       ; %bb.0: ; %bb
+; GFX942-NEXT:    s_add_i32 s0, s0, s1
+; GFX942-NEXT:    v_add_u32_e32 v0, s0, v0
+; GFX942-NEXT:    v_add_u32_e32 v0, -16, v0
+; GFX942-NEXT:    v_mov_b32_e32 v1, 15
+; GFX942-NEXT:    scratch_store_dword v0, v1, off sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    s_endpgm
 ;
 ; GFX10-PAL-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset:
 ; GFX10-PAL:       ; %bb.0: ; %bb
@@ -5408,13 +5408,13 @@ define amdgpu_gs void @sgpr_base_negative_offset(ptr addrspace(1) %out, ptr addr
 ; GFX9-PAL-NEXT:    global_store_dword v[0:1], v2, off
 ; GFX9-PAL-NEXT:    s_endpgm
 ;
-; GFX940-LABEL: sgpr_base_negative_offset:
-; GFX940:       ; %bb.0: ; %entry
-; GFX940-NEXT:    s_addk_i32 s0, 0xffe8
-; GFX940-NEXT:    scratch_load_dword v2, off, s0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    global_store_dword v[0:1], v2, off sc0 sc1
-; GFX940-NEXT:    s_endpgm
+; GFX942-LABEL: sgpr_base_negative_offset:
+; GFX942:       ; %bb.0: ; %entry
+; GFX942-NEXT:    s_addk_i32 s0, 0xffe8
+; GFX942-NEXT:    scratch_load_dword v2, off, s0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    global_store_dword v[0:1], v2, off sc0 sc1
+; GFX942-NEXT:    s_endpgm
 ;
 ; GFX10-PAL-LABEL: sgpr_base_negative_offset:
 ; GFX10-PAL:       ; %bb.0: ; %entry
diff --git llvm/test/CodeGen/AMDGPU/fmaximum3.ll llvm/test/CodeGen/AMDGPU/fmaximum3.ll
index c26e2911ab3e..66de7d535db4 100644
--- llvm/test/CodeGen/AMDGPU/fmaximum3.ll
+++ llvm/test/CodeGen/AMDGPU/fmaximum3.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck -check-prefix=GFX12 %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX9,GFX940 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx942 < %s | FileCheck -check-prefixes=GFX9,GFX942 %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx950 < %s | FileCheck -check-prefixes=GFX9,GFX950 %s
 
 define float @v_fmaximum3_f32(float %a, float %b, float %c) {
@@ -14,19 +14,19 @@ define float @v_fmaximum3_f32(float %a, float %b, float %c) {
 ; GFX12-NEXT:    v_maximum3_f32 v0, v0, v1, v2
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: v_fmaximum3_f32:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_max_f32_e32 v3, v0, v1
-; GFX940-NEXT:    v_mov_b32_e32 v4, 0x7fc00000
-; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX940-NEXT:    v_max_f32_e32 v1, v0, v2
-; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v0, v2
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: v_fmaximum3_f32:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_max_f32_e32 v3, v0, v1
+; GFX942-NEXT:    v_mov_b32_e32 v4, 0x7fc00000
+; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
+; GFX942-NEXT:    v_max_f32_e32 v1, v0, v2
+; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v0, v2
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fmaximum3_f32:
 ; GFX950:       ; %bb.0:
@@ -49,19 +49,19 @@ define float @v_fmaximum3_f32_commute(float %a, float %b, float %c) {
 ; GFX12-NEXT:    v_maximum3_f32 v0, v2, v0, v1
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: v_fmaximum3_f32_commute:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_max_f32_e32 v3, v0, v1
-; GFX940-NEXT:    v_mov_b32_e32 v4, 0x7fc00000
-; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX940-NEXT:    v_max_f32_e32 v1, v2, v0
-; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v2, v0
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: v_fmaximum3_f32_commute:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_max_f32_e32 v3, v0, v1
+; GFX942-NEXT:    v_mov_b32_e32 v4, 0x7fc00000
+; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
+; GFX942-NEXT:    v_max_f32_e32 v1, v2, v0
+; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v2, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fmaximum3_f32_commute:
 ; GFX950:       ; %bb.0:
@@ -83,21 +83,21 @@ define amdgpu_ps i32 @s_fmaximum3_f32(float inreg %a, float inreg %b, float inre
 ; GFX12-NEXT:    s_wait_alu 0xf1ff
 ; GFX12-NEXT:    ; return to shader part epilog
 ;
-; GFX940-LABEL: s_fmaximum3_f32:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    v_mov_b32_e32 v0, s1
-; GFX940-NEXT:    v_max_f32_e32 v1, s0, v0
-; GFX940-NEXT:    v_mov_b32_e32 v2, 0x7fc00000
-; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, s0, v0
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
-; GFX940-NEXT:    v_max_f32_e32 v1, s2, v0
-; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, s2, v0
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_readfirstlane_b32 s0, v0
-; GFX940-NEXT:    ; return to shader part epilog
+; GFX942-LABEL: s_fmaximum3_f32:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    v_mov_b32_e32 v0, s1
+; GFX942-NEXT:    v_max_f32_e32 v1, s0, v0
+; GFX942-NEXT:    v_mov_b32_e32 v2, 0x7fc00000
+; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, s0, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX942-NEXT:    v_max_f32_e32 v1, s2, v0
+; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, s2, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_readfirstlane_b32 s0, v0
+; GFX942-NEXT:    ; return to shader part epilog
 ;
 ; GFX950-LABEL: s_fmaximum3_f32:
 ; GFX950:       ; %bb.0:
@@ -125,19 +125,19 @@ define float @v_fmaximum3_f32_fabs0(float %a, float %b, float %c) {
 ; GFX12-NEXT:    v_maximum3_f32 v0, |v0|, v1, v2
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: v_fmaximum3_f32_fabs0:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_max_f32_e64 v3, |v0|, v1
-; GFX940-NEXT:    v_mov_b32_e32 v4, 0x7fc00000
-; GFX940-NEXT:    v_cmp_o_f32_e64 vcc, |v0|, v1
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX940-NEXT:    v_max_f32_e32 v1, v0, v2
-; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v0, v2
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: v_fmaximum3_f32_fabs0:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_max_f32_e64 v3, |v0|, v1
+; GFX942-NEXT:    v_mov_b32_e32 v4, 0x7fc00000
+; GFX942-NEXT:    v_cmp_o_f32_e64 vcc, |v0|, v1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
+; GFX942-NEXT:    v_max_f32_e32 v1, v0, v2
+; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v0, v2
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fmaximum3_f32_fabs0:
 ; GFX950:       ; %bb.0:
@@ -161,19 +161,19 @@ define float @v_fmaximum3_f32_fabs1(float %a, float %b, float %c) {
 ; GFX12-NEXT:    v_maximum3_f32 v0, v0, |v1|, v2
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: v_fmaximum3_f32_fabs1:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_max_f32_e64 v3, v0, |v1|
-; GFX940-NEXT:    v_mov_b32_e32 v4, 0x7fc00000
-; GFX940-NEXT:    v_cmp_o_f32_e64 vcc, v0, |v1|
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX940-NEXT:    v_max_f32_e32 v1, v0, v2
-; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v0, v2
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: v_fmaximum3_f32_fabs1:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_max_f32_e64 v3, v0, |v1|
+; GFX942-NEXT:    v_mov_b32_e32 v4, 0x7fc00000
+; GFX942-NEXT:    v_cmp_o_f32_e64 vcc, v0, |v1|
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
+; GFX942-NEXT:    v_max_f32_e32 v1, v0, v2
+; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v0, v2
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fmaximum3_f32_fabs1:
 ; GFX950:       ; %bb.0:
@@ -197,19 +197,19 @@ define float @v_fmaximum3_f32_fabs2(float %a, float %b, float %c) {
 ; GFX12-NEXT:    v_maximum3_f32 v0, v0, v1, |v2|
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: v_fmaximum3_f32_fabs2:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_max_f32_e32 v3, v0, v1
-; GFX940-NEXT:    v_mov_b32_e32 v4, 0x7fc00000
-; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX940-NEXT:    v_max_f32_e64 v1, v0, |v2|
-; GFX940-NEXT:    v_cmp_o_f32_e64 vcc, v0, |v2|
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: v_fmaximum3_f32_fabs2:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_max_f32_e32 v3, v0, v1
+; GFX942-NEXT:    v_mov_b32_e32 v4, 0x7fc00000
+; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
+; GFX942-NEXT:    v_max_f32_e64 v1, v0, |v2|
+; GFX942-NEXT:    v_cmp_o_f32_e64 vcc, v0, |v2|
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fmaximum3_f32_fabs2:
 ; GFX950:       ; %bb.0:
@@ -233,19 +233,19 @@ define float @v_fmaximum3_f32_fabs_all(float %a, float %b, float %c) {
 ; GFX12-NEXT:    v_maximum3_f32 v0, |v0|, |v1|, |v2|
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: v_fmaximum3_f32_fabs_all:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_max_f32_e64 v3, |v0|, |v1|
-; GFX940-NEXT:    v_mov_b32_e32 v4, 0x7fc00000
-; GFX940-NEXT:    v_cmp_o_f32_e64 vcc, |v0|, |v1|
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX940-NEXT:    v_max_f32_e64 v1, v0, |v2|
-; GFX940-NEXT:    v_cmp_o_f32_e64 vcc, v0, |v2|
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: v_fmaximum3_f32_fabs_all:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_max_f32_e64 v3, |v0|, |v1|
+; GFX942-NEXT:    v_mov_b32_e32 v4, 0x7fc00000
+; GFX942-NEXT:    v_cmp_o_f32_e64 vcc, |v0|, |v1|
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
+; GFX942-NEXT:    v_max_f32_e64 v1, v0, |v2|
+; GFX942-NEXT:    v_cmp_o_f32_e64 vcc, v0, |v2|
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fmaximum3_f32_fabs_all:
 ; GFX950:       ; %bb.0:
@@ -271,19 +271,19 @@ define float @v_fmaximum3_f32_fneg_all(float %a, float %b, float %c) {
 ; GFX12-NEXT:    v_maximum3_f32 v0, -v0, -v1, -v2
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: v_fmaximum3_f32_fneg_all:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_max_f32_e64 v3, -v0, -v1
-; GFX940-NEXT:    v_mov_b32_e32 v4, 0x7fc00000
-; GFX940-NEXT:    v_cmp_o_f32_e64 vcc, -v0, -v1
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX940-NEXT:    v_max_f32_e64 v1, v0, -v2
-; GFX940-NEXT:    v_cmp_o_f32_e64 vcc, v0, -v2
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: v_fmaximum3_f32_fneg_all:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_max_f32_e64 v3, -v0, -v1
+; GFX942-NEXT:    v_mov_b32_e32 v4, 0x7fc00000
+; GFX942-NEXT:    v_cmp_o_f32_e64 vcc, -v0, -v1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
+; GFX942-NEXT:    v_max_f32_e64 v1, v0, -v2
+; GFX942-NEXT:    v_cmp_o_f32_e64 vcc, v0, -v2
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fmaximum3_f32_fneg_all:
 ; GFX950:       ; %bb.0:
@@ -309,19 +309,19 @@ define float @v_fmaximum3_f32_fneg_fabs_all(float %a, float %b, float %c) {
 ; GFX12-NEXT:    v_maximum3_f32 v0, -|v0|, -|v1|, -|v2|
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: v_fmaximum3_f32_fneg_fabs_all:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_max_f32_e64 v3, -|v0|, -|v1|
-; GFX940-NEXT:    v_mov_b32_e32 v4, 0x7fc00000
-; GFX940-NEXT:    v_cmp_o_f32_e64 vcc, -|v0|, -|v1|
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX940-NEXT:    v_max_f32_e64 v1, v0, -|v2|
-; GFX940-NEXT:    v_cmp_o_f32_e64 vcc, v0, -|v2|
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: v_fmaximum3_f32_fneg_fabs_all:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_max_f32_e64 v3, -|v0|, -|v1|
+; GFX942-NEXT:    v_mov_b32_e32 v4, 0x7fc00000
+; GFX942-NEXT:    v_cmp_o_f32_e64 vcc, -|v0|, -|v1|
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
+; GFX942-NEXT:    v_max_f32_e64 v1, v0, -|v2|
+; GFX942-NEXT:    v_cmp_o_f32_e64 vcc, v0, -|v2|
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fmaximum3_f32_fneg_fabs_all:
 ; GFX950:       ; %bb.0:
@@ -350,19 +350,19 @@ define float @v_fmaximum3_f32_fneg0(float %a, float %b, float %c) {
 ; GFX12-NEXT:    v_maximum3_f32 v0, -v0, v1, v2
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: v_fmaximum3_f32_fneg0:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_max_f32_e64 v3, -v0, v1
-; GFX940-NEXT:    v_mov_b32_e32 v4, 0x7fc00000
-; GFX940-NEXT:    v_cmp_o_f32_e64 vcc, -v0, v1
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX940-NEXT:    v_max_f32_e32 v1, v0, v2
-; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v0, v2
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: v_fmaximum3_f32_fneg0:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_max_f32_e64 v3, -v0, v1
+; GFX942-NEXT:    v_mov_b32_e32 v4, 0x7fc00000
+; GFX942-NEXT:    v_cmp_o_f32_e64 vcc, -v0, v1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
+; GFX942-NEXT:    v_max_f32_e32 v1, v0, v2
+; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v0, v2
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fmaximum3_f32_fneg0:
 ; GFX950:       ; %bb.0:
@@ -386,19 +386,19 @@ define float @v_fmaximum3_f32_fneg1(float %a, float %b, float %c) {
 ; GFX12-NEXT:    v_maximum3_f32 v0, v0, -v1, v2
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: v_fmaximum3_f32_fneg1:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_max_f32_e64 v3, v0, -v1
-; GFX940-NEXT:    v_mov_b32_e32 v4, 0x7fc00000
-; GFX940-NEXT:    v_cmp_o_f32_e64 vcc, v0, -v1
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX940-NEXT:    v_max_f32_e32 v1, v0, v2
-; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v0, v2
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: v_fmaximum3_f32_fneg1:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_max_f32_e64 v3, v0, -v1
+; GFX942-NEXT:    v_mov_b32_e32 v4, 0x7fc00000
+; GFX942-NEXT:    v_cmp_o_f32_e64 vcc, v0, -v1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
+; GFX942-NEXT:    v_max_f32_e32 v1, v0, v2
+; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v0, v2
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fmaximum3_f32_fneg1:
 ; GFX950:       ; %bb.0:
@@ -422,19 +422,19 @@ define float @v_fmaximum3_f32_fneg2(float %a, float %b, float %c) {
 ; GFX12-NEXT:    v_maximum3_f32 v0, v0, v1, -v2
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: v_fmaximum3_f32_fneg2:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_max_f32_e32 v3, v0, v1
-; GFX940-NEXT:    v_mov_b32_e32 v4, 0x7fc00000
-; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX940-NEXT:    v_max_f32_e64 v1, v0, -v2
-; GFX940-NEXT:    v_cmp_o_f32_e64 vcc, v0, -v2
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: v_fmaximum3_f32_fneg2:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_max_f32_e32 v3, v0, v1
+; GFX942-NEXT:    v_mov_b32_e32 v4, 0x7fc00000
+; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
+; GFX942-NEXT:    v_max_f32_e64 v1, v0, -v2
+; GFX942-NEXT:    v_cmp_o_f32_e64 vcc, v0, -v2
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fmaximum3_f32_fneg2:
 ; GFX950:       ; %bb.0:
@@ -458,19 +458,19 @@ define float @v_fmaximum3_f32_const0(float %b, float %c) {
 ; GFX12-NEXT:    v_maximum3_f32 v0, v0, 0x41000000, v1
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: v_fmaximum3_f32_const0:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_max_f32_e32 v2, 0x41000000, v0
-; GFX940-NEXT:    v_mov_b32_e32 v3, 0x7fc00000
-; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v0, v0
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX940-NEXT:    v_max_f32_e32 v2, v0, v1
-; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: v_fmaximum3_f32_const0:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_max_f32_e32 v2, 0x41000000, v0
+; GFX942-NEXT:    v_mov_b32_e32 v3, 0x7fc00000
+; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX942-NEXT:    v_max_f32_e32 v2, v0, v1
+; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fmaximum3_f32_const0:
 ; GFX950:       ; %bb.0:
@@ -494,19 +494,19 @@ define float @v_fmaximum3_f32__const2(float %a, float %b) {
 ; GFX12-NEXT:    v_maximum3_f32 v0, v0, v1, 0x41000000
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: v_fmaximum3_f32__const2:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_max_f32_e32 v2, v0, v1
-; GFX940-NEXT:    v_mov_b32_e32 v3, 0x7fc00000
-; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX940-NEXT:    v_max_f32_e32 v1, 0x41000000, v0
-; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v0, v0
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v3, v1, vcc
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: v_fmaximum3_f32__const2:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_max_f32_e32 v2, v0, v1
+; GFX942-NEXT:    v_mov_b32_e32 v3, 0x7fc00000
+; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX942-NEXT:    v_max_f32_e32 v1, 0x41000000, v0
+; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v3, v1, vcc
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fmaximum3_f32__const2:
 ; GFX950:       ; %bb.0:
@@ -530,19 +530,19 @@ define float @v_fmaximum3_f32_inlineimm0(float %b, float %c) {
 ; GFX12-NEXT:    v_maximum3_f32 v0, v0, 4.0, v1
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: v_fmaximum3_f32_inlineimm0:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_max_f32_e32 v2, 4.0, v0
-; GFX940-NEXT:    v_mov_b32_e32 v3, 0x7fc00000
-; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v0, v0
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX940-NEXT:    v_max_f32_e32 v2, v0, v1
-; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: v_fmaximum3_f32_inlineimm0:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_max_f32_e32 v2, 4.0, v0
+; GFX942-NEXT:    v_mov_b32_e32 v3, 0x7fc00000
+; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX942-NEXT:    v_max_f32_e32 v2, v0, v1
+; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fmaximum3_f32_inlineimm0:
 ; GFX950:       ; %bb.0:
@@ -565,19 +565,19 @@ define float @v_fmaximum3_f32__inlineimm(float %a, float %b) {
 ; GFX12-NEXT:    v_maximum3_f32 v0, v0, v1, 4.0
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: v_fmaximum3_f32__inlineimm:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_max_f32_e32 v2, v0, v1
-; GFX940-NEXT:    v_mov_b32_e32 v3, 0x7fc00000
-; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX940-NEXT:    v_max_f32_e32 v1, 4.0, v0
-; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v0, v0
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v3, v1, vcc
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: v_fmaximum3_f32__inlineimm:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_max_f32_e32 v2, v0, v1
+; GFX942-NEXT:    v_mov_b32_e32 v3, 0x7fc00000
+; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX942-NEXT:    v_max_f32_e32 v1, 4.0, v0
+; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v3, v1, vcc
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fmaximum3_f32__inlineimm:
 ; GFX950:       ; %bb.0:
@@ -602,19 +602,19 @@ define float @v_fmaximum3_f32_const1_const2(float %a) {
 ; GFX12-NEXT:    v_maximum3_f32 v0, v0, s0, 0x41800000
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: v_fmaximum3_f32_const1_const2:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_max_f32_e32 v1, 0x41000000, v0
-; GFX940-NEXT:    v_mov_b32_e32 v2, 0x7fc00000
-; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v0, v0
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
-; GFX940-NEXT:    v_max_f32_e32 v1, 0x41800000, v0
-; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v0, v0
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: v_fmaximum3_f32_const1_const2:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_max_f32_e32 v1, 0x41000000, v0
+; GFX942-NEXT:    v_mov_b32_e32 v2, 0x7fc00000
+; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX942-NEXT:    v_max_f32_e32 v1, 0x41800000, v0
+; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fmaximum3_f32_const1_const2:
 ; GFX950:       ; %bb.0:
@@ -640,27 +640,27 @@ define <2 x float> @v_fmaximum3_v2f32(<2 x float> %a, <2 x float> %b, <2 x float
 ; GFX12-NEXT:    v_maximum3_f32 v1, v5, v1, v3
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: v_fmaximum3_v2f32:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_max_f32_e32 v6, v1, v3
-; GFX940-NEXT:    v_mov_b32_e32 v7, 0x7fc00000
-; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v1, v3
-; GFX940-NEXT:    v_max_f32_e32 v3, v0, v2
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cndmask_b32_e32 v1, v7, v6, vcc
-; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v0, v2
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v7, v3, vcc
-; GFX940-NEXT:    v_max_f32_e32 v2, v4, v0
-; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v4, v0
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
-; GFX940-NEXT:    v_max_f32_e32 v2, v5, v1
-; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v5, v1
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v1, v7, v2, vcc
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: v_fmaximum3_v2f32:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_max_f32_e32 v6, v1, v3
+; GFX942-NEXT:    v_mov_b32_e32 v7, 0x7fc00000
+; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v1, v3
+; GFX942-NEXT:    v_max_f32_e32 v3, v0, v2
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v7, v6, vcc
+; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v0, v2
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v7, v3, vcc
+; GFX942-NEXT:    v_max_f32_e32 v2, v4, v0
+; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v4, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
+; GFX942-NEXT:    v_max_f32_e32 v2, v5, v1
+; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v5, v1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v7, v2, vcc
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fmaximum3_v2f32:
 ; GFX950:       ; %bb.0:
@@ -685,27 +685,27 @@ define <2 x float> @v_fmaximum3_v2f32_commute(<2 x float> %a, <2 x float> %b, <2
 ; GFX12-NEXT:    v_maximum3_f32 v1, v1, v3, v5
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: v_fmaximum3_v2f32_commute:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_max_f32_e32 v6, v1, v3
-; GFX940-NEXT:    v_mov_b32_e32 v7, 0x7fc00000
-; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v1, v3
-; GFX940-NEXT:    v_max_f32_e32 v3, v0, v2
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cndmask_b32_e32 v1, v7, v6, vcc
-; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v0, v2
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v7, v3, vcc
-; GFX940-NEXT:    v_max_f32_e32 v2, v0, v4
-; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v0, v4
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
-; GFX940-NEXT:    v_max_f32_e32 v2, v1, v5
-; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v1, v5
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v1, v7, v2, vcc
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: v_fmaximum3_v2f32_commute:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_max_f32_e32 v6, v1, v3
+; GFX942-NEXT:    v_mov_b32_e32 v7, 0x7fc00000
+; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v1, v3
+; GFX942-NEXT:    v_max_f32_e32 v3, v0, v2
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v7, v6, vcc
+; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v0, v2
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v7, v3, vcc
+; GFX942-NEXT:    v_max_f32_e32 v2, v0, v4
+; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v0, v4
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
+; GFX942-NEXT:    v_max_f32_e32 v2, v1, v5
+; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v1, v5
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v7, v2, vcc
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fmaximum3_v2f32_commute:
 ; GFX950:       ; %bb.0:
@@ -730,27 +730,27 @@ define <2 x float> @v_fmaximum3_v2f32__fabs_all(<2 x float> %a, <2 x float> %b,
 ; GFX12-NEXT:    v_maximum3_f32 v1, |v1|, |v3|, |v5|
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: v_fmaximum3_v2f32__fabs_all:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_max_f32_e64 v6, |v1|, |v3|
-; GFX940-NEXT:    v_mov_b32_e32 v7, 0x7fc00000
-; GFX940-NEXT:    v_cmp_o_f32_e64 vcc, |v1|, |v3|
-; GFX940-NEXT:    v_max_f32_e64 v3, |v0|, |v2|
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cndmask_b32_e32 v1, v7, v6, vcc
-; GFX940-NEXT:    v_cmp_o_f32_e64 vcc, |v0|, |v2|
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v7, v3, vcc
-; GFX940-NEXT:    v_max_f32_e64 v2, v0, |v4|
-; GFX940-NEXT:    v_cmp_o_f32_e64 vcc, v0, |v4|
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
-; GFX940-NEXT:    v_max_f32_e64 v2, v1, |v5|
-; GFX940-NEXT:    v_cmp_o_f32_e64 vcc, v1, |v5|
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v1, v7, v2, vcc
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: v_fmaximum3_v2f32__fabs_all:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_max_f32_e64 v6, |v1|, |v3|
+; GFX942-NEXT:    v_mov_b32_e32 v7, 0x7fc00000
+; GFX942-NEXT:    v_cmp_o_f32_e64 vcc, |v1|, |v3|
+; GFX942-NEXT:    v_max_f32_e64 v3, |v0|, |v2|
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v7, v6, vcc
+; GFX942-NEXT:    v_cmp_o_f32_e64 vcc, |v0|, |v2|
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v7, v3, vcc
+; GFX942-NEXT:    v_max_f32_e64 v2, v0, |v4|
+; GFX942-NEXT:    v_cmp_o_f32_e64 vcc, v0, |v4|
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
+; GFX942-NEXT:    v_max_f32_e64 v2, v1, |v5|
+; GFX942-NEXT:    v_cmp_o_f32_e64 vcc, v1, |v5|
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v7, v2, vcc
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fmaximum3_v2f32__fabs_all:
 ; GFX950:       ; %bb.0:
@@ -778,27 +778,27 @@ define <2 x float> @v_fmaximum3_v2f32__fneg_all(<2 x float> %a, <2 x float> %b,
 ; GFX12-NEXT:    v_maximum3_f32 v1, -v1, -v3, -v5
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: v_fmaximum3_v2f32__fneg_all:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_max_f32_e64 v6, -v1, -v3
-; GFX940-NEXT:    v_mov_b32_e32 v7, 0x7fc00000
-; GFX940-NEXT:    v_cmp_o_f32_e64 vcc, -v1, -v3
-; GFX940-NEXT:    v_max_f32_e64 v3, -v0, -v2
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cndmask_b32_e32 v1, v7, v6, vcc
-; GFX940-NEXT:    v_cmp_o_f32_e64 vcc, -v0, -v2
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v7, v3, vcc
-; GFX940-NEXT:    v_max_f32_e64 v2, v0, -v4
-; GFX940-NEXT:    v_cmp_o_f32_e64 vcc, v0, -v4
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
-; GFX940-NEXT:    v_max_f32_e64 v2, v1, -v5
-; GFX940-NEXT:    v_cmp_o_f32_e64 vcc, v1, -v5
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v1, v7, v2, vcc
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: v_fmaximum3_v2f32__fneg_all:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_max_f32_e64 v6, -v1, -v3
+; GFX942-NEXT:    v_mov_b32_e32 v7, 0x7fc00000
+; GFX942-NEXT:    v_cmp_o_f32_e64 vcc, -v1, -v3
+; GFX942-NEXT:    v_max_f32_e64 v3, -v0, -v2
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v7, v6, vcc
+; GFX942-NEXT:    v_cmp_o_f32_e64 vcc, -v0, -v2
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v7, v3, vcc
+; GFX942-NEXT:    v_max_f32_e64 v2, v0, -v4
+; GFX942-NEXT:    v_cmp_o_f32_e64 vcc, v0, -v4
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
+; GFX942-NEXT:    v_max_f32_e64 v2, v1, -v5
+; GFX942-NEXT:    v_cmp_o_f32_e64 vcc, v1, -v5
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v7, v2, vcc
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fmaximum3_v2f32__fneg_all:
 ; GFX950:       ; %bb.0:
@@ -826,27 +826,27 @@ define <2 x float> @v_fmaximum3_v2f32__inlineimm1(<2 x float> %a, <2 x float> %c
 ; GFX12-NEXT:    v_maximum3_f32 v1, v1, 2.0, v3
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: v_fmaximum3_v2f32__inlineimm1:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_max_f32_e32 v4, 2.0, v1
-; GFX940-NEXT:    v_mov_b32_e32 v5, 0x7fc00000
-; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v1, v1
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v1, v5, v4, vcc
-; GFX940-NEXT:    v_max_f32_e32 v4, 2.0, v0
-; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v0, v0
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v5, v4, vcc
-; GFX940-NEXT:    v_max_f32_e32 v4, v0, v2
-; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v0, v2
-; GFX940-NEXT:    v_max_f32_e32 v2, v1, v3
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v5, v4, vcc
-; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v1, v3
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v1, v5, v2, vcc
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: v_fmaximum3_v2f32__inlineimm1:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_max_f32_e32 v4, 2.0, v1
+; GFX942-NEXT:    v_mov_b32_e32 v5, 0x7fc00000
+; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v1, v1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v5, v4, vcc
+; GFX942-NEXT:    v_max_f32_e32 v4, 2.0, v0
+; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v5, v4, vcc
+; GFX942-NEXT:    v_max_f32_e32 v4, v0, v2
+; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v0, v2
+; GFX942-NEXT:    v_max_f32_e32 v2, v1, v3
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v5, v4, vcc
+; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v1, v3
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v5, v2, vcc
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fmaximum3_v2f32__inlineimm1:
 ; GFX950:       ; %bb.0:
@@ -871,27 +871,27 @@ define <2 x float> @v_fmaximum3_v2f32__inlineimm2(<2 x float> %a, <2 x float> %b
 ; GFX12-NEXT:    v_maximum3_f32 v1, v1, v3, 4.0
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: v_fmaximum3_v2f32__inlineimm2:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_max_f32_e32 v4, v1, v3
-; GFX940-NEXT:    v_mov_b32_e32 v5, 0x7fc00000
-; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v1, v3
-; GFX940-NEXT:    v_max_f32_e32 v3, v0, v2
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cndmask_b32_e32 v1, v5, v4, vcc
-; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v0, v2
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v5, v3, vcc
-; GFX940-NEXT:    v_max_f32_e32 v2, 4.0, v0
-; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v0, v0
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v5, v2, vcc
-; GFX940-NEXT:    v_max_f32_e32 v2, 4.0, v1
-; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v1, v1
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v1, v5, v2, vcc
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: v_fmaximum3_v2f32__inlineimm2:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_max_f32_e32 v4, v1, v3
+; GFX942-NEXT:    v_mov_b32_e32 v5, 0x7fc00000
+; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v1, v3
+; GFX942-NEXT:    v_max_f32_e32 v3, v0, v2
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v5, v4, vcc
+; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v0, v2
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v5, v3, vcc
+; GFX942-NEXT:    v_max_f32_e32 v2, 4.0, v0
+; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v5, v2, vcc
+; GFX942-NEXT:    v_max_f32_e32 v2, 4.0, v1
+; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v1, v1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v5, v2, vcc
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fmaximum3_v2f32__inlineimm2:
 ; GFX950:       ; %bb.0:
@@ -917,35 +917,35 @@ define <3 x float> @v_fmaximum3_v3f32(<3 x float> %a, <3 x float> %b, <3 x float
 ; GFX12-NEXT:    v_maximum3_f32 v2, v8, v2, v5
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: v_fmaximum3_v3f32:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_max_f32_e32 v9, v2, v5
-; GFX940-NEXT:    v_mov_b32_e32 v10, 0x7fc00000
-; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v2, v5
-; GFX940-NEXT:    v_max_f32_e32 v5, v1, v4
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cndmask_b32_e32 v2, v10, v9, vcc
-; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v1, v4
-; GFX940-NEXT:    v_max_f32_e32 v4, v0, v3
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cndmask_b32_e32 v1, v10, v5, vcc
-; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v0, v3
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v10, v4, vcc
-; GFX940-NEXT:    v_max_f32_e32 v3, v6, v0
-; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v6, v0
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v10, v3, vcc
-; GFX940-NEXT:    v_max_f32_e32 v3, v7, v1
-; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v7, v1
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v1, v10, v3, vcc
-; GFX940-NEXT:    v_max_f32_e32 v3, v8, v2
-; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v8, v2
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v2, v10, v3, vcc
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: v_fmaximum3_v3f32:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_max_f32_e32 v9, v2, v5
+; GFX942-NEXT:    v_mov_b32_e32 v10, 0x7fc00000
+; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v2, v5
+; GFX942-NEXT:    v_max_f32_e32 v5, v1, v4
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v10, v9, vcc
+; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v1, v4
+; GFX942-NEXT:    v_max_f32_e32 v4, v0, v3
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v10, v5, vcc
+; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v0, v3
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v10, v4, vcc
+; GFX942-NEXT:    v_max_f32_e32 v3, v6, v0
+; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v6, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v10, v3, vcc
+; GFX942-NEXT:    v_max_f32_e32 v3, v7, v1
+; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v7, v1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v10, v3, vcc
+; GFX942-NEXT:    v_max_f32_e32 v3, v8, v2
+; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v8, v2
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v10, v3, vcc
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fmaximum3_v3f32:
 ; GFX950:       ; %bb.0:
@@ -972,35 +972,35 @@ define <3 x float> @v_fmaximum3_v3f32_commute(<3 x float> %a, <3 x float> %b, <3
 ; GFX12-NEXT:    v_maximum3_f32 v2, v2, v5, v8
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: v_fmaximum3_v3f32_commute:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_max_f32_e32 v9, v2, v5
-; GFX940-NEXT:    v_mov_b32_e32 v10, 0x7fc00000
-; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v2, v5
-; GFX940-NEXT:    v_max_f32_e32 v5, v1, v4
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cndmask_b32_e32 v2, v10, v9, vcc
-; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v1, v4
-; GFX940-NEXT:    v_max_f32_e32 v4, v0, v3
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cndmask_b32_e32 v1, v10, v5, vcc
-; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v0, v3
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v10, v4, vcc
-; GFX940-NEXT:    v_max_f32_e32 v3, v0, v6
-; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v0, v6
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v10, v3, vcc
-; GFX940-NEXT:    v_max_f32_e32 v3, v1, v7
-; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v1, v7
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v1, v10, v3, vcc
-; GFX940-NEXT:    v_max_f32_e32 v3, v2, v8
-; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v2, v8
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v2, v10, v3, vcc
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: v_fmaximum3_v3f32_commute:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_max_f32_e32 v9, v2, v5
+; GFX942-NEXT:    v_mov_b32_e32 v10, 0x7fc00000
+; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v2, v5
+; GFX942-NEXT:    v_max_f32_e32 v5, v1, v4
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v10, v9, vcc
+; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v1, v4
+; GFX942-NEXT:    v_max_f32_e32 v4, v0, v3
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v10, v5, vcc
+; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v0, v3
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v10, v4, vcc
+; GFX942-NEXT:    v_max_f32_e32 v3, v0, v6
+; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v0, v6
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v10, v3, vcc
+; GFX942-NEXT:    v_max_f32_e32 v3, v1, v7
+; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v1, v7
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v10, v3, vcc
+; GFX942-NEXT:    v_max_f32_e32 v3, v2, v8
+; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v2, v8
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v10, v3, vcc
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fmaximum3_v3f32_commute:
 ; GFX950:       ; %bb.0:
@@ -1027,35 +1027,35 @@ define <3 x float> @v_fmaximum3_v3f32__fabs_all(<3 x float> %a, <3 x float> %b,
 ; GFX12-NEXT:    v_maximum3_f32 v2, |v2|, |v5|, |v8|
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: v_fmaximum3_v3f32__fabs_all:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_max_f32_e64 v9, |v2|, |v5|
-; GFX940-NEXT:    v_mov_b32_e32 v10, 0x7fc00000
-; GFX940-NEXT:    v_cmp_o_f32_e64 vcc, |v2|, |v5|
-; GFX940-NEXT:    v_max_f32_e64 v5, |v1|, |v4|
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cndmask_b32_e32 v2, v10, v9, vcc
-; GFX940-NEXT:    v_cmp_o_f32_e64 vcc, |v1|, |v4|
-; GFX940-NEXT:    v_max_f32_e64 v4, |v0|, |v3|
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cndmask_b32_e32 v1, v10, v5, vcc
-; GFX940-NEXT:    v_cmp_o_f32_e64 vcc, |v0|, |v3|
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v10, v4, vcc
-; GFX940-NEXT:    v_max_f32_e64 v3, v0, |v6|
-; GFX940-NEXT:    v_cmp_o_f32_e64 vcc, v0, |v6|
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v10, v3, vcc
-; GFX940-NEXT:    v_max_f32_e64 v3, v1, |v7|
-; GFX940-NEXT:    v_cmp_o_f32_e64 vcc, v1, |v7|
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v1, v10, v3, vcc
-; GFX940-NEXT:    v_max_f32_e64 v3, v2, |v8|
-; GFX940-NEXT:    v_cmp_o_f32_e64 vcc, v2, |v8|
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v2, v10, v3, vcc
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: v_fmaximum3_v3f32__fabs_all:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_max_f32_e64 v9, |v2|, |v5|
+; GFX942-NEXT:    v_mov_b32_e32 v10, 0x7fc00000
+; GFX942-NEXT:    v_cmp_o_f32_e64 vcc, |v2|, |v5|
+; GFX942-NEXT:    v_max_f32_e64 v5, |v1|, |v4|
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v10, v9, vcc
+; GFX942-NEXT:    v_cmp_o_f32_e64 vcc, |v1|, |v4|
+; GFX942-NEXT:    v_max_f32_e64 v4, |v0|, |v3|
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v10, v5, vcc
+; GFX942-NEXT:    v_cmp_o_f32_e64 vcc, |v0|, |v3|
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v10, v4, vcc
+; GFX942-NEXT:    v_max_f32_e64 v3, v0, |v6|
+; GFX942-NEXT:    v_cmp_o_f32_e64 vcc, v0, |v6|
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v10, v3, vcc
+; GFX942-NEXT:    v_max_f32_e64 v3, v1, |v7|
+; GFX942-NEXT:    v_cmp_o_f32_e64 vcc, v1, |v7|
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v10, v3, vcc
+; GFX942-NEXT:    v_max_f32_e64 v3, v2, |v8|
+; GFX942-NEXT:    v_cmp_o_f32_e64 vcc, v2, |v8|
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v10, v3, vcc
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fmaximum3_v3f32__fabs_all:
 ; GFX950:       ; %bb.0:
@@ -1085,35 +1085,35 @@ define <3 x float> @v_fmaximum3_v3f32__fneg_all(<3 x float> %a, <3 x float> %b,
 ; GFX12-NEXT:    v_maximum3_f32 v2, -v2, -v5, -v8
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: v_fmaximum3_v3f32__fneg_all:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_max_f32_e64 v9, -v2, -v5
-; GFX940-NEXT:    v_mov_b32_e32 v10, 0x7fc00000
-; GFX940-NEXT:    v_cmp_o_f32_e64 vcc, -v2, -v5
-; GFX940-NEXT:    v_max_f32_e64 v5, -v1, -v4
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cndmask_b32_e32 v2, v10, v9, vcc
-; GFX940-NEXT:    v_cmp_o_f32_e64 vcc, -v1, -v4
-; GFX940-NEXT:    v_max_f32_e64 v4, -v0, -v3
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cndmask_b32_e32 v1, v10, v5, vcc
-; GFX940-NEXT:    v_cmp_o_f32_e64 vcc, -v0, -v3
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v10, v4, vcc
-; GFX940-NEXT:    v_max_f32_e64 v3, v0, -v6
-; GFX940-NEXT:    v_cmp_o_f32_e64 vcc, v0, -v6
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v10, v3, vcc
-; GFX940-NEXT:    v_max_f32_e64 v3, v1, -v7
-; GFX940-NEXT:    v_cmp_o_f32_e64 vcc, v1, -v7
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v1, v10, v3, vcc
-; GFX940-NEXT:    v_max_f32_e64 v3, v2, -v8
-; GFX940-NEXT:    v_cmp_o_f32_e64 vcc, v2, -v8
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v2, v10, v3, vcc
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: v_fmaximum3_v3f32__fneg_all:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_max_f32_e64 v9, -v2, -v5
+; GFX942-NEXT:    v_mov_b32_e32 v10, 0x7fc00000
+; GFX942-NEXT:    v_cmp_o_f32_e64 vcc, -v2, -v5
+; GFX942-NEXT:    v_max_f32_e64 v5, -v1, -v4
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v10, v9, vcc
+; GFX942-NEXT:    v_cmp_o_f32_e64 vcc, -v1, -v4
+; GFX942-NEXT:    v_max_f32_e64 v4, -v0, -v3
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v10, v5, vcc
+; GFX942-NEXT:    v_cmp_o_f32_e64 vcc, -v0, -v3
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v10, v4, vcc
+; GFX942-NEXT:    v_max_f32_e64 v3, v0, -v6
+; GFX942-NEXT:    v_cmp_o_f32_e64 vcc, v0, -v6
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v10, v3, vcc
+; GFX942-NEXT:    v_max_f32_e64 v3, v1, -v7
+; GFX942-NEXT:    v_cmp_o_f32_e64 vcc, v1, -v7
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v10, v3, vcc
+; GFX942-NEXT:    v_max_f32_e64 v3, v2, -v8
+; GFX942-NEXT:    v_cmp_o_f32_e64 vcc, v2, -v8
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v10, v3, vcc
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fmaximum3_v3f32__fneg_all:
 ; GFX950:       ; %bb.0:
@@ -1143,35 +1143,35 @@ define <3 x float> @v_fmaximum3_v3f32__inlineimm1(<3 x float> %a, <3 x float> %c
 ; GFX12-NEXT:    v_maximum3_f32 v2, v2, 2.0, v5
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: v_fmaximum3_v3f32__inlineimm1:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_max_f32_e32 v6, 2.0, v2
-; GFX940-NEXT:    v_mov_b32_e32 v7, 0x7fc00000
-; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v2, v2
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v2, v7, v6, vcc
-; GFX940-NEXT:    v_max_f32_e32 v6, 2.0, v1
-; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v1, v1
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v1, v7, v6, vcc
-; GFX940-NEXT:    v_max_f32_e32 v6, 2.0, v0
-; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v0, v0
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v7, v6, vcc
-; GFX940-NEXT:    v_max_f32_e32 v6, v0, v3
-; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v0, v3
-; GFX940-NEXT:    v_max_f32_e32 v3, v1, v4
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v7, v6, vcc
-; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v1, v4
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v1, v7, v3, vcc
-; GFX940-NEXT:    v_max_f32_e32 v3, v2, v5
-; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v2, v5
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v2, v7, v3, vcc
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: v_fmaximum3_v3f32__inlineimm1:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_max_f32_e32 v6, 2.0, v2
+; GFX942-NEXT:    v_mov_b32_e32 v7, 0x7fc00000
+; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v2, v2
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v7, v6, vcc
+; GFX942-NEXT:    v_max_f32_e32 v6, 2.0, v1
+; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v1, v1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v7, v6, vcc
+; GFX942-NEXT:    v_max_f32_e32 v6, 2.0, v0
+; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v7, v6, vcc
+; GFX942-NEXT:    v_max_f32_e32 v6, v0, v3
+; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v0, v3
+; GFX942-NEXT:    v_max_f32_e32 v3, v1, v4
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v7, v6, vcc
+; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v1, v4
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v7, v3, vcc
+; GFX942-NEXT:    v_max_f32_e32 v3, v2, v5
+; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v2, v5
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v7, v3, vcc
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fmaximum3_v3f32__inlineimm1:
 ; GFX950:       ; %bb.0:
@@ -1198,35 +1198,35 @@ define <3 x float> @v_fmaximum3_v3f32__inlineimm2(<3 x float> %a, <3 x float> %b
 ; GFX12-NEXT:    v_maximum3_f32 v2, v2, v5, 4.0
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: v_fmaximum3_v3f32__inlineimm2:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_max_f32_e32 v6, v2, v5
-; GFX940-NEXT:    v_mov_b32_e32 v7, 0x7fc00000
-; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v2, v5
-; GFX940-NEXT:    v_max_f32_e32 v5, v1, v4
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cndmask_b32_e32 v2, v7, v6, vcc
-; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v1, v4
-; GFX940-NEXT:    v_max_f32_e32 v4, v0, v3
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cndmask_b32_e32 v1, v7, v5, vcc
-; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v0, v3
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v7, v4, vcc
-; GFX940-NEXT:    v_max_f32_e32 v3, 4.0, v0
-; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v0, v0
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v7, v3, vcc
-; GFX940-NEXT:    v_max_f32_e32 v3, 4.0, v1
-; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v1, v1
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v1, v7, v3, vcc
-; GFX940-NEXT:    v_max_f32_e32 v3, 4.0, v2
-; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v2, v2
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v2, v7, v3, vcc
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: v_fmaximum3_v3f32__inlineimm2:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_max_f32_e32 v6, v2, v5
+; GFX942-NEXT:    v_mov_b32_e32 v7, 0x7fc00000
+; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v2, v5
+; GFX942-NEXT:    v_max_f32_e32 v5, v1, v4
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v7, v6, vcc
+; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v1, v4
+; GFX942-NEXT:    v_max_f32_e32 v4, v0, v3
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v7, v5, vcc
+; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v0, v3
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v7, v4, vcc
+; GFX942-NEXT:    v_max_f32_e32 v3, 4.0, v0
+; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v7, v3, vcc
+; GFX942-NEXT:    v_max_f32_e32 v3, 4.0, v1
+; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v1, v1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v7, v3, vcc
+; GFX942-NEXT:    v_max_f32_e32 v3, 4.0, v2
+; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v2, v2
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v7, v3, vcc
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fmaximum3_v3f32__inlineimm2:
 ; GFX950:       ; %bb.0:
@@ -1774,30 +1774,30 @@ define <2 x half> @v_fmaximum3_v2f16(<2 x half> %a, <2 x half> %b, <2 x half> %c
 ; GFX12-NEXT:    v_pk_maximum_f16 v0, v2, v0
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: v_fmaximum3_v2f16:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_pk_max_f16 v3, v0, v1
-; GFX940-NEXT:    v_mov_b32_e32 v4, 0x7e00
-; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
-; GFX940-NEXT:    s_mov_b32 s0, 0x5040100
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cndmask_b32_e32 v5, v4, v3, vcc
-; GFX940-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
-; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX940-NEXT:    v_perm_b32 v1, v0, v5, s0
-; GFX940-NEXT:    v_pk_max_f16 v1, v2, v1
-; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v2, v5
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v3, v4, v1, vcc
-; GFX940-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
-; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v2, v0 src0_sel:WORD_1 src1_sel:DWORD
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
-; GFX940-NEXT:    v_perm_b32 v0, v0, v3, s0
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: v_fmaximum3_v2f16:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_pk_max_f16 v3, v0, v1
+; GFX942-NEXT:    v_mov_b32_e32 v4, 0x7e00
+; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
+; GFX942-NEXT:    s_mov_b32 s0, 0x5040100
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v5, v4, v3, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
+; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
+; GFX942-NEXT:    v_perm_b32 v1, v0, v5, s0
+; GFX942-NEXT:    v_pk_max_f16 v1, v2, v1
+; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v2, v5
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v4, v1, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
+; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v2, v0 src0_sel:WORD_1 src1_sel:DWORD
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
+; GFX942-NEXT:    v_perm_b32 v0, v0, v3, s0
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fmaximum3_v2f16:
 ; GFX950:       ; %bb.0:
@@ -1824,30 +1824,30 @@ define <2 x half> @v_fmaximum3_v2f16_commute(<2 x half> %a, <2 x half> %b, <2 x
 ; GFX12-NEXT:    v_pk_maximum_f16 v0, v0, v2
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: v_fmaximum3_v2f16_commute:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_pk_max_f16 v3, v0, v1
-; GFX940-NEXT:    v_mov_b32_e32 v4, 0x7e00
-; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
-; GFX940-NEXT:    s_mov_b32 s0, 0x5040100
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cndmask_b32_e32 v5, v4, v3, vcc
-; GFX940-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
-; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX940-NEXT:    v_perm_b32 v1, v0, v5, s0
-; GFX940-NEXT:    v_pk_max_f16 v1, v1, v2
-; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v5, v2
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v3, v4, v1, vcc
-; GFX940-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
-; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:DWORD src1_sel:WORD_1
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
-; GFX940-NEXT:    v_perm_b32 v0, v0, v3, s0
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: v_fmaximum3_v2f16_commute:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_pk_max_f16 v3, v0, v1
+; GFX942-NEXT:    v_mov_b32_e32 v4, 0x7e00
+; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
+; GFX942-NEXT:    s_mov_b32 s0, 0x5040100
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v5, v4, v3, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
+; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
+; GFX942-NEXT:    v_perm_b32 v1, v0, v5, s0
+; GFX942-NEXT:    v_pk_max_f16 v1, v1, v2
+; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v5, v2
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v4, v1, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
+; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:DWORD src1_sel:WORD_1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
+; GFX942-NEXT:    v_perm_b32 v0, v0, v3, s0
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fmaximum3_v2f16_commute:
 ; GFX950:       ; %bb.0:
@@ -1877,32 +1877,32 @@ define <2 x half> @v_fmaximum3_v2f16__fabs_all(<2 x half> %a, <2 x half> %b, <2
 ; GFX12-NEXT:    v_pk_maximum_f16 v0, v0, v2
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: v_fmaximum3_v2f16__fabs_all:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_and_b32_e32 v3, 0x7fff7fff, v0
-; GFX940-NEXT:    v_and_b32_e32 v4, 0x7fff7fff, v1
-; GFX940-NEXT:    v_pk_max_f16 v3, v3, v4
-; GFX940-NEXT:    v_mov_b32_e32 v6, 0x7e00
-; GFX940-NEXT:    v_lshrrev_b32_e32 v4, 16, v3
-; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, |v0|, |v1| src0_sel:WORD_1 src1_sel:WORD_1
-; GFX940-NEXT:    s_mov_b32 s0, 0x5040100
-; GFX940-NEXT:    v_and_b32_e32 v5, 0x7fff7fff, v2
-; GFX940-NEXT:    v_cndmask_b32_e32 v4, v6, v4, vcc
-; GFX940-NEXT:    v_cmp_o_f16_e64 vcc, |v0|, |v1|
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v6, v3, vcc
-; GFX940-NEXT:    v_perm_b32 v1, v4, v0, s0
-; GFX940-NEXT:    v_pk_max_f16 v1, v1, v5
-; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v4, |v2| src0_sel:DWORD src1_sel:WORD_1
-; GFX940-NEXT:    v_lshrrev_b32_e32 v3, 16, v1
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cndmask_b32_e32 v3, v6, v3, vcc
-; GFX940-NEXT:    v_cmp_o_f16_e64 vcc, v0, |v2|
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v6, v1, vcc
-; GFX940-NEXT:    v_perm_b32 v0, v3, v0, s0
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: v_fmaximum3_v2f16__fabs_all:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_and_b32_e32 v3, 0x7fff7fff, v0
+; GFX942-NEXT:    v_and_b32_e32 v4, 0x7fff7fff, v1
+; GFX942-NEXT:    v_pk_max_f16 v3, v3, v4
+; GFX942-NEXT:    v_mov_b32_e32 v6, 0x7e00
+; GFX942-NEXT:    v_lshrrev_b32_e32 v4, 16, v3
+; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, |v0|, |v1| src0_sel:WORD_1 src1_sel:WORD_1
+; GFX942-NEXT:    s_mov_b32 s0, 0x5040100
+; GFX942-NEXT:    v_and_b32_e32 v5, 0x7fff7fff, v2
+; GFX942-NEXT:    v_cndmask_b32_e32 v4, v6, v4, vcc
+; GFX942-NEXT:    v_cmp_o_f16_e64 vcc, |v0|, |v1|
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v6, v3, vcc
+; GFX942-NEXT:    v_perm_b32 v1, v4, v0, s0
+; GFX942-NEXT:    v_pk_max_f16 v1, v1, v5
+; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v4, |v2| src0_sel:DWORD src1_sel:WORD_1
+; GFX942-NEXT:    v_lshrrev_b32_e32 v3, 16, v1
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v6, v3, vcc
+; GFX942-NEXT:    v_cmp_o_f16_e64 vcc, v0, |v2|
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v6, v1, vcc
+; GFX942-NEXT:    v_perm_b32 v0, v3, v0, s0
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fmaximum3_v2f16__fabs_all:
 ; GFX950:       ; %bb.0:
@@ -1935,30 +1935,30 @@ define <2 x half> @v_fmaximum3_v2f16__fneg_all(<2 x half> %a, <2 x half> %b, <2
 ; GFX12-NEXT:    v_pk_maximum_f16 v0, v0, v2 neg_lo:[0,1] neg_hi:[0,1]
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: v_fmaximum3_v2f16__fneg_all:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_pk_max_f16 v3, v0, v1 neg_lo:[1,1] neg_hi:[1,1]
-; GFX940-NEXT:    v_mov_b32_e32 v4, 0x7e00
-; GFX940-NEXT:    v_cmp_o_f16_e64 vcc, -v0, -v1
-; GFX940-NEXT:    s_mov_b32 s0, 0x5040100
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cndmask_b32_e32 v5, v4, v3, vcc
-; GFX940-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
-; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, -v0, -v1 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX940-NEXT:    v_perm_b32 v1, v0, v5, s0
-; GFX940-NEXT:    v_pk_max_f16 v1, v1, v2 neg_lo:[0,1] neg_hi:[0,1]
-; GFX940-NEXT:    v_cmp_o_f16_e64 vcc, v5, -v2
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v3, v4, v1, vcc
-; GFX940-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
-; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v0, -v2 src0_sel:DWORD src1_sel:WORD_1
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
-; GFX940-NEXT:    v_perm_b32 v0, v0, v3, s0
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: v_fmaximum3_v2f16__fneg_all:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_pk_max_f16 v3, v0, v1 neg_lo:[1,1] neg_hi:[1,1]
+; GFX942-NEXT:    v_mov_b32_e32 v4, 0x7e00
+; GFX942-NEXT:    v_cmp_o_f16_e64 vcc, -v0, -v1
+; GFX942-NEXT:    s_mov_b32 s0, 0x5040100
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v5, v4, v3, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
+; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, -v0, -v1 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
+; GFX942-NEXT:    v_perm_b32 v1, v0, v5, s0
+; GFX942-NEXT:    v_pk_max_f16 v1, v1, v2 neg_lo:[0,1] neg_hi:[0,1]
+; GFX942-NEXT:    v_cmp_o_f16_e64 vcc, v5, -v2
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v4, v1, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
+; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v0, -v2 src0_sel:DWORD src1_sel:WORD_1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
+; GFX942-NEXT:    v_perm_b32 v0, v0, v3, s0
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fmaximum3_v2f16__fneg_all:
 ; GFX950:       ; %bb.0:
@@ -1988,30 +1988,30 @@ define <2 x half> @v_fmaximum3_v2f16__inlineimm1(<2 x half> %a, <2 x half> %c) {
 ; GFX12-NEXT:    v_pk_maximum_f16 v0, v0, v1
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: v_fmaximum3_v2f16__inlineimm1:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_pk_max_f16 v2, v0, 2.0 op_sel_hi:[1,0]
-; GFX940-NEXT:    v_mov_b32_e32 v4, 0x7e00
-; GFX940-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
-; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v0 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX940-NEXT:    s_mov_b32 s0, 0x5040100
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cndmask_b32_e32 v3, v4, v3, vcc
-; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v0, v0
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v4, v2, vcc
-; GFX940-NEXT:    v_perm_b32 v2, v3, v0, s0
-; GFX940-NEXT:    v_pk_max_f16 v2, v2, v1
-; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v3, v1 src0_sel:DWORD src1_sel:WORD_1
-; GFX940-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cndmask_b32_e32 v3, v4, v5, vcc
-; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v4, v2, vcc
-; GFX940-NEXT:    v_perm_b32 v0, v3, v0, s0
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: v_fmaximum3_v2f16__inlineimm1:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_pk_max_f16 v2, v0, 2.0 op_sel_hi:[1,0]
+; GFX942-NEXT:    v_mov_b32_e32 v4, 0x7e00
+; GFX942-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
+; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v0 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX942-NEXT:    s_mov_b32 s0, 0x5040100
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v4, v3, vcc
+; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v2, vcc
+; GFX942-NEXT:    v_perm_b32 v2, v3, v0, s0
+; GFX942-NEXT:    v_pk_max_f16 v2, v2, v1
+; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v3, v1 src0_sel:DWORD src1_sel:WORD_1
+; GFX942-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v4, v5, vcc
+; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v2, vcc
+; GFX942-NEXT:    v_perm_b32 v0, v3, v0, s0
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fmaximum3_v2f16__inlineimm1:
 ; GFX950:       ; %bb.0:
@@ -2038,30 +2038,30 @@ define <2 x half> @v_fmaximum3_v2f16__inlineimm2(<2 x half> %a, <2 x half> %b) {
 ; GFX12-NEXT:    v_pk_maximum_f16 v0, v0, 4.0 op_sel_hi:[1,0]
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: v_fmaximum3_v2f16__inlineimm2:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_pk_max_f16 v2, v0, v1
-; GFX940-NEXT:    v_mov_b32_e32 v3, 0x7e00
-; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
-; GFX940-NEXT:    s_mov_b32 s0, 0x5040100
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cndmask_b32_e32 v4, v3, v2, vcc
-; GFX940-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
-; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX940-NEXT:    v_perm_b32 v1, v0, v4, s0
-; GFX940-NEXT:    v_pk_max_f16 v1, v1, 4.0 op_sel_hi:[1,0]
-; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v4, v4
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v2, v3, v1, vcc
-; GFX940-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
-; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v0, v0
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v3, v1, vcc
-; GFX940-NEXT:    v_perm_b32 v0, v0, v2, s0
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: v_fmaximum3_v2f16__inlineimm2:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_pk_max_f16 v2, v0, v1
+; GFX942-NEXT:    v_mov_b32_e32 v3, 0x7e00
+; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
+; GFX942-NEXT:    s_mov_b32 s0, 0x5040100
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v4, v3, v2, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
+; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX942-NEXT:    v_perm_b32 v1, v0, v4, s0
+; GFX942-NEXT:    v_pk_max_f16 v1, v1, 4.0 op_sel_hi:[1,0]
+; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v4, v4
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v3, v1, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
+; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v3, v1, vcc
+; GFX942-NEXT:    v_perm_b32 v0, v0, v2, s0
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fmaximum3_v2f16__inlineimm2:
 ; GFX950:       ; %bb.0:
@@ -2090,42 +2090,42 @@ define <3 x half> @v_fmaximum3_v3f16(<3 x half> %a, <3 x half> %b, <3 x half> %c
 ; GFX12-NEXT:    v_pk_maximum_f16 v1, v5, v1
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: v_fmaximum3_v3f16:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_pk_max_f16 v6, v0, v2
-; GFX940-NEXT:    v_mov_b32_e32 v7, 0x7e00
-; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
-; GFX940-NEXT:    s_mov_b32 s0, 0x5040100
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cndmask_b32_e32 v8, v7, v6, vcc
-; GFX940-NEXT:    v_lshrrev_b32_e32 v6, 16, v6
-; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX940-NEXT:    v_pk_max_f16 v2, v1, v3
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v7, v6, vcc
-; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v1, v3
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v6, v7, v2, vcc
-; GFX940-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
-; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v1, v7, v2, vcc
-; GFX940-NEXT:    v_perm_b32 v1, v1, v6, s0
-; GFX940-NEXT:    v_pk_max_f16 v1, v5, v1
-; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v5, v6
-; GFX940-NEXT:    v_perm_b32 v2, v0, v8, s0
-; GFX940-NEXT:    v_pk_max_f16 v2, v4, v2
-; GFX940-NEXT:    v_cndmask_b32_e32 v1, v7, v1, vcc
-; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v4, v8
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v3, v7, v2, vcc
-; GFX940-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
-; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v4, v0 src0_sel:WORD_1 src1_sel:DWORD
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
-; GFX940-NEXT:    v_perm_b32 v0, v0, v3, s0
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: v_fmaximum3_v3f16:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_pk_max_f16 v6, v0, v2
+; GFX942-NEXT:    v_mov_b32_e32 v7, 0x7e00
+; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
+; GFX942-NEXT:    s_mov_b32 s0, 0x5040100
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v8, v7, v6, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v6, 16, v6
+; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX942-NEXT:    v_pk_max_f16 v2, v1, v3
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v7, v6, vcc
+; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v1, v3
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v6, v7, v2, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
+; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v7, v2, vcc
+; GFX942-NEXT:    v_perm_b32 v1, v1, v6, s0
+; GFX942-NEXT:    v_pk_max_f16 v1, v5, v1
+; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v5, v6
+; GFX942-NEXT:    v_perm_b32 v2, v0, v8, s0
+; GFX942-NEXT:    v_pk_max_f16 v2, v4, v2
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v7, v1, vcc
+; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v4, v8
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v7, v2, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
+; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v4, v0 src0_sel:WORD_1 src1_sel:DWORD
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
+; GFX942-NEXT:    v_perm_b32 v0, v0, v3, s0
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fmaximum3_v3f16:
 ; GFX950:       ; %bb.0:
@@ -2155,42 +2155,42 @@ define <3 x half> @v_fmaximum3_v3f16_commute(<3 x half> %a, <3 x half> %b, <3 x
 ; GFX12-NEXT:    v_pk_maximum_f16 v1, v1, v5
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: v_fmaximum3_v3f16_commute:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_pk_max_f16 v6, v0, v2
-; GFX940-NEXT:    v_mov_b32_e32 v7, 0x7e00
-; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
-; GFX940-NEXT:    s_mov_b32 s0, 0x5040100
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cndmask_b32_e32 v8, v7, v6, vcc
-; GFX940-NEXT:    v_lshrrev_b32_e32 v6, 16, v6
-; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX940-NEXT:    v_pk_max_f16 v2, v1, v3
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v7, v6, vcc
-; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v1, v3
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v6, v7, v2, vcc
-; GFX940-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
-; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v1, v7, v2, vcc
-; GFX940-NEXT:    v_perm_b32 v1, v1, v6, s0
-; GFX940-NEXT:    v_pk_max_f16 v1, v1, v5
-; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v6, v5
-; GFX940-NEXT:    v_perm_b32 v2, v0, v8, s0
-; GFX940-NEXT:    v_pk_max_f16 v2, v2, v4
-; GFX940-NEXT:    v_cndmask_b32_e32 v1, v7, v1, vcc
-; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v8, v4
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v3, v7, v2, vcc
-; GFX940-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
-; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v4 src0_sel:DWORD src1_sel:WORD_1
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
-; GFX940-NEXT:    v_perm_b32 v0, v0, v3, s0
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: v_fmaximum3_v3f16_commute:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_pk_max_f16 v6, v0, v2
+; GFX942-NEXT:    v_mov_b32_e32 v7, 0x7e00
+; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
+; GFX942-NEXT:    s_mov_b32 s0, 0x5040100
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v8, v7, v6, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v6, 16, v6
+; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX942-NEXT:    v_pk_max_f16 v2, v1, v3
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v7, v6, vcc
+; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v1, v3
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v6, v7, v2, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
+; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v7, v2, vcc
+; GFX942-NEXT:    v_perm_b32 v1, v1, v6, s0
+; GFX942-NEXT:    v_pk_max_f16 v1, v1, v5
+; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v6, v5
+; GFX942-NEXT:    v_perm_b32 v2, v0, v8, s0
+; GFX942-NEXT:    v_pk_max_f16 v2, v2, v4
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v7, v1, vcc
+; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v8, v4
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v7, v2, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
+; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v4 src0_sel:DWORD src1_sel:WORD_1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
+; GFX942-NEXT:    v_perm_b32 v0, v0, v3, s0
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fmaximum3_v3f16_commute:
 ; GFX950:       ; %bb.0:
@@ -2227,46 +2227,46 @@ define <3 x half> @v_fmaximum3_v3f16__fabs_all(<3 x half> %a, <3 x half> %b, <3
 ; GFX12-NEXT:    v_pk_maximum_f16 v1, v1, v5
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: v_fmaximum3_v3f16__fabs_all:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_and_b32_e32 v7, 0x7fff7fff, v1
-; GFX940-NEXT:    v_and_b32_e32 v9, 0x7fff7fff, v3
-; GFX940-NEXT:    v_and_b32_e32 v6, 0x7fff7fff, v0
-; GFX940-NEXT:    v_and_b32_e32 v8, 0x7fff7fff, v2
-; GFX940-NEXT:    v_pk_max_f16 v7, v7, v9
-; GFX940-NEXT:    v_mov_b32_e32 v12, 0x7e00
-; GFX940-NEXT:    v_lshrrev_b32_e32 v9, 16, v7
-; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, |v1|, |v3| src0_sel:WORD_1 src1_sel:WORD_1
-; GFX940-NEXT:    v_pk_max_f16 v6, v6, v8
-; GFX940-NEXT:    s_mov_b32 s0, 0x5040100
-; GFX940-NEXT:    v_cndmask_b32_e32 v9, v12, v9, vcc
-; GFX940-NEXT:    v_lshrrev_b32_e32 v8, 16, v6
-; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, |v0|, |v2| src0_sel:WORD_1 src1_sel:WORD_1
-; GFX940-NEXT:    v_and_b32_e32 v11, 0x7fff7fff, v4
-; GFX940-NEXT:    v_and_b32_e32 v10, 0x7fff7fff, v5
-; GFX940-NEXT:    v_cndmask_b32_e32 v8, v12, v8, vcc
-; GFX940-NEXT:    v_cmp_o_f16_e64 vcc, |v1|, |v3|
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v1, v12, v7, vcc
-; GFX940-NEXT:    v_cmp_o_f16_e64 vcc, |v0|, |v2|
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v12, v6, vcc
-; GFX940-NEXT:    v_perm_b32 v2, v8, v0, s0
-; GFX940-NEXT:    v_pk_max_f16 v2, v2, v11
-; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v8, |v4| src0_sel:DWORD src1_sel:WORD_1
-; GFX940-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
-; GFX940-NEXT:    v_perm_b32 v6, v9, v1, s0
-; GFX940-NEXT:    v_cndmask_b32_e32 v3, v12, v3, vcc
-; GFX940-NEXT:    v_pk_max_f16 v6, v6, v10
-; GFX940-NEXT:    v_cmp_o_f16_e64 vcc, v1, |v5|
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v1, v12, v6, vcc
-; GFX940-NEXT:    v_cmp_o_f16_e64 vcc, v0, |v4|
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v12, v2, vcc
-; GFX940-NEXT:    v_perm_b32 v0, v3, v0, s0
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: v_fmaximum3_v3f16__fabs_all:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_and_b32_e32 v7, 0x7fff7fff, v1
+; GFX942-NEXT:    v_and_b32_e32 v9, 0x7fff7fff, v3
+; GFX942-NEXT:    v_and_b32_e32 v6, 0x7fff7fff, v0
+; GFX942-NEXT:    v_and_b32_e32 v8, 0x7fff7fff, v2
+; GFX942-NEXT:    v_pk_max_f16 v7, v7, v9
+; GFX942-NEXT:    v_mov_b32_e32 v12, 0x7e00
+; GFX942-NEXT:    v_lshrrev_b32_e32 v9, 16, v7
+; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, |v1|, |v3| src0_sel:WORD_1 src1_sel:WORD_1
+; GFX942-NEXT:    v_pk_max_f16 v6, v6, v8
+; GFX942-NEXT:    s_mov_b32 s0, 0x5040100
+; GFX942-NEXT:    v_cndmask_b32_e32 v9, v12, v9, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v8, 16, v6
+; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, |v0|, |v2| src0_sel:WORD_1 src1_sel:WORD_1
+; GFX942-NEXT:    v_and_b32_e32 v11, 0x7fff7fff, v4
+; GFX942-NEXT:    v_and_b32_e32 v10, 0x7fff7fff, v5
+; GFX942-NEXT:    v_cndmask_b32_e32 v8, v12, v8, vcc
+; GFX942-NEXT:    v_cmp_o_f16_e64 vcc, |v1|, |v3|
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v12, v7, vcc
+; GFX942-NEXT:    v_cmp_o_f16_e64 vcc, |v0|, |v2|
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v12, v6, vcc
+; GFX942-NEXT:    v_perm_b32 v2, v8, v0, s0
+; GFX942-NEXT:    v_pk_max_f16 v2, v2, v11
+; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v8, |v4| src0_sel:DWORD src1_sel:WORD_1
+; GFX942-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
+; GFX942-NEXT:    v_perm_b32 v6, v9, v1, s0
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v12, v3, vcc
+; GFX942-NEXT:    v_pk_max_f16 v6, v6, v10
+; GFX942-NEXT:    v_cmp_o_f16_e64 vcc, v1, |v5|
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v12, v6, vcc
+; GFX942-NEXT:    v_cmp_o_f16_e64 vcc, v0, |v4|
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v12, v2, vcc
+; GFX942-NEXT:    v_perm_b32 v0, v3, v0, s0
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fmaximum3_v3f16__fabs_all:
 ; GFX950:       ; %bb.0:
@@ -2305,42 +2305,42 @@ define <3 x half> @v_fmaximum3_v3f16__fneg_all(<3 x half> %a, <3 x half> %b, <3
 ; GFX12-NEXT:    v_pk_maximum_f16 v1, v1, v5 neg_lo:[0,1] neg_hi:[0,1]
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: v_fmaximum3_v3f16__fneg_all:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_pk_max_f16 v6, v0, v2 neg_lo:[1,1] neg_hi:[1,1]
-; GFX940-NEXT:    v_mov_b32_e32 v7, 0x7e00
-; GFX940-NEXT:    v_cmp_o_f16_e64 vcc, -v0, -v2
-; GFX940-NEXT:    s_mov_b32 s0, 0x5040100
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cndmask_b32_e32 v8, v7, v6, vcc
-; GFX940-NEXT:    v_lshrrev_b32_e32 v6, 16, v6
-; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, -v0, -v2 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX940-NEXT:    v_pk_max_f16 v2, v1, v3 neg_lo:[1,1] neg_hi:[1,1]
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v7, v6, vcc
-; GFX940-NEXT:    v_cmp_o_f16_e64 vcc, -v1, -v3
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v6, v7, v2, vcc
-; GFX940-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
-; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, -v1, -v3 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v1, v7, v2, vcc
-; GFX940-NEXT:    v_perm_b32 v1, v1, v6, s0
-; GFX940-NEXT:    v_pk_max_f16 v1, v1, v5 neg_lo:[0,1] neg_hi:[0,1]
-; GFX940-NEXT:    v_cmp_o_f16_e64 vcc, v6, -v5
-; GFX940-NEXT:    v_perm_b32 v2, v0, v8, s0
-; GFX940-NEXT:    v_pk_max_f16 v2, v2, v4 neg_lo:[0,1] neg_hi:[0,1]
-; GFX940-NEXT:    v_cndmask_b32_e32 v1, v7, v1, vcc
-; GFX940-NEXT:    v_cmp_o_f16_e64 vcc, v8, -v4
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v3, v7, v2, vcc
-; GFX940-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
-; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v0, -v4 src0_sel:DWORD src1_sel:WORD_1
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
-; GFX940-NEXT:    v_perm_b32 v0, v0, v3, s0
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: v_fmaximum3_v3f16__fneg_all:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_pk_max_f16 v6, v0, v2 neg_lo:[1,1] neg_hi:[1,1]
+; GFX942-NEXT:    v_mov_b32_e32 v7, 0x7e00
+; GFX942-NEXT:    v_cmp_o_f16_e64 vcc, -v0, -v2
+; GFX942-NEXT:    s_mov_b32 s0, 0x5040100
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v8, v7, v6, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v6, 16, v6
+; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, -v0, -v2 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX942-NEXT:    v_pk_max_f16 v2, v1, v3 neg_lo:[1,1] neg_hi:[1,1]
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v7, v6, vcc
+; GFX942-NEXT:    v_cmp_o_f16_e64 vcc, -v1, -v3
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v6, v7, v2, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
+; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, -v1, -v3 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v7, v2, vcc
+; GFX942-NEXT:    v_perm_b32 v1, v1, v6, s0
+; GFX942-NEXT:    v_pk_max_f16 v1, v1, v5 neg_lo:[0,1] neg_hi:[0,1]
+; GFX942-NEXT:    v_cmp_o_f16_e64 vcc, v6, -v5
+; GFX942-NEXT:    v_perm_b32 v2, v0, v8, s0
+; GFX942-NEXT:    v_pk_max_f16 v2, v2, v4 neg_lo:[0,1] neg_hi:[0,1]
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v7, v1, vcc
+; GFX942-NEXT:    v_cmp_o_f16_e64 vcc, v8, -v4
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v7, v2, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
+; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v0, -v4 src0_sel:DWORD src1_sel:WORD_1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
+; GFX942-NEXT:    v_perm_b32 v0, v0, v3, s0
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fmaximum3_v3f16__fneg_all:
 ; GFX950:       ; %bb.0:
@@ -2373,39 +2373,39 @@ define <3 x half> @v_fmaximum3_v3f16__inlineimm1(<3 x half> %a, <3 x half> %c) {
 ; GFX12-NEXT:    v_pk_maximum_f16 v1, v1, v3
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: v_fmaximum3_v3f16__inlineimm1:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_pk_max_f16 v4, v0, 2.0 op_sel_hi:[1,0]
-; GFX940-NEXT:    v_mov_b32_e32 v6, 0x7e00
-; GFX940-NEXT:    v_lshrrev_b32_e32 v5, 16, v4
-; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v0 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX940-NEXT:    v_pk_max_f16 v7, v1, 2.0
-; GFX940-NEXT:    s_mov_b32 s1, 0x5040100
-; GFX940-NEXT:    v_cndmask_b32_e32 v5, v6, v5, vcc
-; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v1, v1
-; GFX940-NEXT:    s_movk_i32 s0, 0x7e00
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cndmask_b32_e32 v1, v6, v7, vcc
-; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v0, v0
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v6, v4, vcc
-; GFX940-NEXT:    v_perm_b32 v4, v5, v0, s1
-; GFX940-NEXT:    v_pk_max_f16 v4, v4, v2
-; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v5, v2 src0_sel:DWORD src1_sel:WORD_1
-; GFX940-NEXT:    v_lshrrev_b32_e32 v7, 16, v4
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cndmask_b32_e32 v5, v6, v7, vcc
-; GFX940-NEXT:    v_pack_b32_f16 v7, v1, s0
-; GFX940-NEXT:    v_pk_max_f16 v7, v7, v3
-; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v1, v3
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v1, v6, v7, vcc
-; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v6, v4, vcc
-; GFX940-NEXT:    v_perm_b32 v0, v5, v0, s1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: v_fmaximum3_v3f16__inlineimm1:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_pk_max_f16 v4, v0, 2.0 op_sel_hi:[1,0]
+; GFX942-NEXT:    v_mov_b32_e32 v6, 0x7e00
+; GFX942-NEXT:    v_lshrrev_b32_e32 v5, 16, v4
+; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v0 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX942-NEXT:    v_pk_max_f16 v7, v1, 2.0
+; GFX942-NEXT:    s_mov_b32 s1, 0x5040100
+; GFX942-NEXT:    v_cndmask_b32_e32 v5, v6, v5, vcc
+; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v1, v1
+; GFX942-NEXT:    s_movk_i32 s0, 0x7e00
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v6, v7, vcc
+; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v6, v4, vcc
+; GFX942-NEXT:    v_perm_b32 v4, v5, v0, s1
+; GFX942-NEXT:    v_pk_max_f16 v4, v4, v2
+; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v5, v2 src0_sel:DWORD src1_sel:WORD_1
+; GFX942-NEXT:    v_lshrrev_b32_e32 v7, 16, v4
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v5, v6, v7, vcc
+; GFX942-NEXT:    v_pack_b32_f16 v7, v1, s0
+; GFX942-NEXT:    v_pk_max_f16 v7, v7, v3
+; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v1, v3
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v6, v7, vcc
+; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v6, v4, vcc
+; GFX942-NEXT:    v_perm_b32 v0, v5, v0, s1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fmaximum3_v3f16__inlineimm1:
 ; GFX950:       ; %bb.0:
@@ -2435,42 +2435,42 @@ define <3 x half> @v_fmaximum3_v3f16__inlineimm2(<3 x half> %a, <3 x half> %b) {
 ; GFX12-NEXT:    v_pk_maximum_f16 v1, v1, 4.0
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: v_fmaximum3_v3f16__inlineimm2:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_pk_max_f16 v4, v0, v2
-; GFX940-NEXT:    v_mov_b32_e32 v5, 0x7e00
-; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
-; GFX940-NEXT:    s_mov_b32 s0, 0x5040100
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cndmask_b32_e32 v6, v5, v4, vcc
-; GFX940-NEXT:    v_lshrrev_b32_e32 v4, 16, v4
-; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX940-NEXT:    v_pk_max_f16 v2, v1, v3
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v5, v4, vcc
-; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v1, v3
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v4, v5, v2, vcc
-; GFX940-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
-; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v1, v5, v2, vcc
-; GFX940-NEXT:    v_perm_b32 v1, v1, v4, s0
-; GFX940-NEXT:    v_pk_max_f16 v1, v1, 4.0
-; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v4, v4
-; GFX940-NEXT:    v_perm_b32 v2, v0, v6, s0
-; GFX940-NEXT:    v_pk_max_f16 v2, v2, 4.0 op_sel_hi:[1,0]
-; GFX940-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
-; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v6, v6
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v3, v5, v2, vcc
-; GFX940-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
-; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v0, v0
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v5, v2, vcc
-; GFX940-NEXT:    v_perm_b32 v0, v0, v3, s0
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: v_fmaximum3_v3f16__inlineimm2:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_pk_max_f16 v4, v0, v2
+; GFX942-NEXT:    v_mov_b32_e32 v5, 0x7e00
+; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
+; GFX942-NEXT:    s_mov_b32 s0, 0x5040100
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v6, v5, v4, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v4, 16, v4
+; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX942-NEXT:    v_pk_max_f16 v2, v1, v3
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v5, v4, vcc
+; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v1, v3
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v4, v5, v2, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
+; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v5, v2, vcc
+; GFX942-NEXT:    v_perm_b32 v1, v1, v4, s0
+; GFX942-NEXT:    v_pk_max_f16 v1, v1, 4.0
+; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v4, v4
+; GFX942-NEXT:    v_perm_b32 v2, v0, v6, s0
+; GFX942-NEXT:    v_pk_max_f16 v2, v2, 4.0 op_sel_hi:[1,0]
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
+; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v6, v6
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v5, v2, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
+; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v5, v2, vcc
+; GFX942-NEXT:    v_perm_b32 v0, v0, v3, s0
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fmaximum3_v3f16__inlineimm2:
 ; GFX950:       ; %bb.0:
@@ -2500,48 +2500,48 @@ define <4 x half> @v_fmaximum3_v4f16(<4 x half> %a, <4 x half> %b, <4 x half> %c
 ; GFX12-NEXT:    v_pk_maximum_f16 v1, v5, v1
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: v_fmaximum3_v4f16:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_pk_max_f16 v6, v0, v2
-; GFX940-NEXT:    v_mov_b32_e32 v7, 0x7e00
-; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
-; GFX940-NEXT:    s_mov_b32 s0, 0x5040100
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cndmask_b32_e32 v8, v7, v6, vcc
-; GFX940-NEXT:    v_lshrrev_b32_e32 v6, 16, v6
-; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX940-NEXT:    v_pk_max_f16 v2, v1, v3
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v7, v6, vcc
-; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v1, v3
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v6, v7, v2, vcc
-; GFX940-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
-; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v1, v7, v2, vcc
-; GFX940-NEXT:    v_perm_b32 v2, v1, v6, s0
-; GFX940-NEXT:    v_pk_max_f16 v2, v5, v2
-; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v5, v6
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v3, v7, v2, vcc
-; GFX940-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
-; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v5, v1 src0_sel:WORD_1 src1_sel:DWORD
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v1, v7, v2, vcc
-; GFX940-NEXT:    v_perm_b32 v2, v0, v8, s0
-; GFX940-NEXT:    v_pk_max_f16 v2, v4, v2
-; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v4, v8
-; GFX940-NEXT:    v_perm_b32 v1, v1, v3, s0
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cndmask_b32_e32 v5, v7, v2, vcc
-; GFX940-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
-; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v4, v0 src0_sel:WORD_1 src1_sel:DWORD
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
-; GFX940-NEXT:    v_perm_b32 v0, v0, v5, s0
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: v_fmaximum3_v4f16:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_pk_max_f16 v6, v0, v2
+; GFX942-NEXT:    v_mov_b32_e32 v7, 0x7e00
+; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
+; GFX942-NEXT:    s_mov_b32 s0, 0x5040100
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v8, v7, v6, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v6, 16, v6
+; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX942-NEXT:    v_pk_max_f16 v2, v1, v3
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v7, v6, vcc
+; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v1, v3
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v6, v7, v2, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
+; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v7, v2, vcc
+; GFX942-NEXT:    v_perm_b32 v2, v1, v6, s0
+; GFX942-NEXT:    v_pk_max_f16 v2, v5, v2
+; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v5, v6
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v7, v2, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
+; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v5, v1 src0_sel:WORD_1 src1_sel:DWORD
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v7, v2, vcc
+; GFX942-NEXT:    v_perm_b32 v2, v0, v8, s0
+; GFX942-NEXT:    v_pk_max_f16 v2, v4, v2
+; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v4, v8
+; GFX942-NEXT:    v_perm_b32 v1, v1, v3, s0
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v5, v7, v2, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
+; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v4, v0 src0_sel:WORD_1 src1_sel:DWORD
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
+; GFX942-NEXT:    v_perm_b32 v0, v0, v5, s0
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fmaximum3_v4f16:
 ; GFX950:       ; %bb.0:
@@ -2571,48 +2571,48 @@ define <4 x half> @v_fmaximum3_v4f16_commute(<4 x half> %a, <4 x half> %b, <4 x
 ; GFX12-NEXT:    v_pk_maximum_f16 v1, v1, v5
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: v_fmaximum3_v4f16_commute:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_pk_max_f16 v6, v0, v2
-; GFX940-NEXT:    v_mov_b32_e32 v7, 0x7e00
-; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
-; GFX940-NEXT:    s_mov_b32 s0, 0x5040100
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cndmask_b32_e32 v8, v7, v6, vcc
-; GFX940-NEXT:    v_lshrrev_b32_e32 v6, 16, v6
-; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX940-NEXT:    v_pk_max_f16 v2, v1, v3
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v7, v6, vcc
-; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v1, v3
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v6, v7, v2, vcc
-; GFX940-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
-; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v1, v7, v2, vcc
-; GFX940-NEXT:    v_perm_b32 v2, v1, v6, s0
-; GFX940-NEXT:    v_pk_max_f16 v2, v2, v5
-; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v6, v5
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v3, v7, v2, vcc
-; GFX940-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
-; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v1, v5 src0_sel:DWORD src1_sel:WORD_1
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v1, v7, v2, vcc
-; GFX940-NEXT:    v_perm_b32 v2, v0, v8, s0
-; GFX940-NEXT:    v_pk_max_f16 v2, v2, v4
-; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v8, v4
-; GFX940-NEXT:    v_perm_b32 v1, v1, v3, s0
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cndmask_b32_e32 v5, v7, v2, vcc
-; GFX940-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
-; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v4 src0_sel:DWORD src1_sel:WORD_1
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
-; GFX940-NEXT:    v_perm_b32 v0, v0, v5, s0
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: v_fmaximum3_v4f16_commute:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_pk_max_f16 v6, v0, v2
+; GFX942-NEXT:    v_mov_b32_e32 v7, 0x7e00
+; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
+; GFX942-NEXT:    s_mov_b32 s0, 0x5040100
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v8, v7, v6, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v6, 16, v6
+; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX942-NEXT:    v_pk_max_f16 v2, v1, v3
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v7, v6, vcc
+; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v1, v3
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v6, v7, v2, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
+; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v7, v2, vcc
+; GFX942-NEXT:    v_perm_b32 v2, v1, v6, s0
+; GFX942-NEXT:    v_pk_max_f16 v2, v2, v5
+; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v6, v5
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v7, v2, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
+; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v1, v5 src0_sel:DWORD src1_sel:WORD_1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v7, v2, vcc
+; GFX942-NEXT:    v_perm_b32 v2, v0, v8, s0
+; GFX942-NEXT:    v_pk_max_f16 v2, v2, v4
+; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v8, v4
+; GFX942-NEXT:    v_perm_b32 v1, v1, v3, s0
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v5, v7, v2, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
+; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v4 src0_sel:DWORD src1_sel:WORD_1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
+; GFX942-NEXT:    v_perm_b32 v0, v0, v5, s0
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fmaximum3_v4f16_commute:
 ; GFX950:       ; %bb.0:
@@ -2649,52 +2649,52 @@ define <4 x half> @v_fmaximum3_v4f16__fabs_all(<4 x half> %a, <4 x half> %b, <4
 ; GFX12-NEXT:    v_pk_maximum_f16 v1, v1, v5
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: v_fmaximum3_v4f16__fabs_all:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_and_b32_e32 v7, 0x7fff7fff, v0
-; GFX940-NEXT:    v_and_b32_e32 v9, 0x7fff7fff, v2
-; GFX940-NEXT:    v_and_b32_e32 v6, 0x7fff7fff, v1
-; GFX940-NEXT:    v_and_b32_e32 v8, 0x7fff7fff, v3
-; GFX940-NEXT:    v_pk_max_f16 v7, v7, v9
-; GFX940-NEXT:    v_mov_b32_e32 v12, 0x7e00
-; GFX940-NEXT:    v_lshrrev_b32_e32 v9, 16, v7
-; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, |v0|, |v2| src0_sel:WORD_1 src1_sel:WORD_1
-; GFX940-NEXT:    v_pk_max_f16 v6, v6, v8
-; GFX940-NEXT:    s_mov_b32 s0, 0x5040100
-; GFX940-NEXT:    v_cndmask_b32_e32 v9, v12, v9, vcc
-; GFX940-NEXT:    v_lshrrev_b32_e32 v8, 16, v6
-; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, |v1|, |v3| src0_sel:WORD_1 src1_sel:WORD_1
-; GFX940-NEXT:    v_and_b32_e32 v11, 0x7fff7fff, v5
-; GFX940-NEXT:    v_and_b32_e32 v10, 0x7fff7fff, v4
-; GFX940-NEXT:    v_cndmask_b32_e32 v8, v12, v8, vcc
-; GFX940-NEXT:    v_cmp_o_f16_e64 vcc, |v0|, |v2|
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v12, v7, vcc
-; GFX940-NEXT:    v_cmp_o_f16_e64 vcc, |v1|, |v3|
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v1, v12, v6, vcc
-; GFX940-NEXT:    v_perm_b32 v2, v8, v1, s0
-; GFX940-NEXT:    v_pk_max_f16 v2, v2, v11
-; GFX940-NEXT:    v_perm_b32 v6, v9, v0, s0
-; GFX940-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
-; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v8, |v5| src0_sel:DWORD src1_sel:WORD_1
-; GFX940-NEXT:    v_pk_max_f16 v6, v6, v10
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cndmask_b32_e32 v3, v12, v3, vcc
-; GFX940-NEXT:    v_lshrrev_b32_e32 v7, 16, v6
-; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v9, |v4| src0_sel:DWORD src1_sel:WORD_1
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v7, v12, v7, vcc
-; GFX940-NEXT:    v_cmp_o_f16_e64 vcc, v1, |v5|
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v1, v12, v2, vcc
-; GFX940-NEXT:    v_cmp_o_f16_e64 vcc, v0, |v4|
-; GFX940-NEXT:    v_perm_b32 v1, v3, v1, s0
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v12, v6, vcc
-; GFX940-NEXT:    v_perm_b32 v0, v7, v0, s0
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: v_fmaximum3_v4f16__fabs_all:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_and_b32_e32 v7, 0x7fff7fff, v0
+; GFX942-NEXT:    v_and_b32_e32 v9, 0x7fff7fff, v2
+; GFX942-NEXT:    v_and_b32_e32 v6, 0x7fff7fff, v1
+; GFX942-NEXT:    v_and_b32_e32 v8, 0x7fff7fff, v3
+; GFX942-NEXT:    v_pk_max_f16 v7, v7, v9
+; GFX942-NEXT:    v_mov_b32_e32 v12, 0x7e00
+; GFX942-NEXT:    v_lshrrev_b32_e32 v9, 16, v7
+; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, |v0|, |v2| src0_sel:WORD_1 src1_sel:WORD_1
+; GFX942-NEXT:    v_pk_max_f16 v6, v6, v8
+; GFX942-NEXT:    s_mov_b32 s0, 0x5040100
+; GFX942-NEXT:    v_cndmask_b32_e32 v9, v12, v9, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v8, 16, v6
+; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, |v1|, |v3| src0_sel:WORD_1 src1_sel:WORD_1
+; GFX942-NEXT:    v_and_b32_e32 v11, 0x7fff7fff, v5
+; GFX942-NEXT:    v_and_b32_e32 v10, 0x7fff7fff, v4
+; GFX942-NEXT:    v_cndmask_b32_e32 v8, v12, v8, vcc
+; GFX942-NEXT:    v_cmp_o_f16_e64 vcc, |v0|, |v2|
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v12, v7, vcc
+; GFX942-NEXT:    v_cmp_o_f16_e64 vcc, |v1|, |v3|
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v12, v6, vcc
+; GFX942-NEXT:    v_perm_b32 v2, v8, v1, s0
+; GFX942-NEXT:    v_pk_max_f16 v2, v2, v11
+; GFX942-NEXT:    v_perm_b32 v6, v9, v0, s0
+; GFX942-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
+; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v8, |v5| src0_sel:DWORD src1_sel:WORD_1
+; GFX942-NEXT:    v_pk_max_f16 v6, v6, v10
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v12, v3, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v7, 16, v6
+; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v9, |v4| src0_sel:DWORD src1_sel:WORD_1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v7, v12, v7, vcc
+; GFX942-NEXT:    v_cmp_o_f16_e64 vcc, v1, |v5|
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v12, v2, vcc
+; GFX942-NEXT:    v_cmp_o_f16_e64 vcc, v0, |v4|
+; GFX942-NEXT:    v_perm_b32 v1, v3, v1, s0
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v12, v6, vcc
+; GFX942-NEXT:    v_perm_b32 v0, v7, v0, s0
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fmaximum3_v4f16__fabs_all:
 ; GFX950:       ; %bb.0:
@@ -2733,48 +2733,48 @@ define <4 x half> @v_fmaximum3_v4f16__fneg_all(<4 x half> %a, <4 x half> %b, <4
 ; GFX12-NEXT:    v_pk_maximum_f16 v1, v1, v5 neg_lo:[0,1] neg_hi:[0,1]
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: v_fmaximum3_v4f16__fneg_all:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_pk_max_f16 v6, v0, v2 neg_lo:[1,1] neg_hi:[1,1]
-; GFX940-NEXT:    v_mov_b32_e32 v7, 0x7e00
-; GFX940-NEXT:    v_cmp_o_f16_e64 vcc, -v0, -v2
-; GFX940-NEXT:    s_mov_b32 s0, 0x5040100
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cndmask_b32_e32 v8, v7, v6, vcc
-; GFX940-NEXT:    v_lshrrev_b32_e32 v6, 16, v6
-; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, -v0, -v2 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX940-NEXT:    v_pk_max_f16 v2, v1, v3 neg_lo:[1,1] neg_hi:[1,1]
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v7, v6, vcc
-; GFX940-NEXT:    v_cmp_o_f16_e64 vcc, -v1, -v3
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v6, v7, v2, vcc
-; GFX940-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
-; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, -v1, -v3 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v1, v7, v2, vcc
-; GFX940-NEXT:    v_perm_b32 v2, v1, v6, s0
-; GFX940-NEXT:    v_pk_max_f16 v2, v2, v5 neg_lo:[0,1] neg_hi:[0,1]
-; GFX940-NEXT:    v_cmp_o_f16_e64 vcc, v6, -v5
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v3, v7, v2, vcc
-; GFX940-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
-; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v1, -v5 src0_sel:DWORD src1_sel:WORD_1
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v1, v7, v2, vcc
-; GFX940-NEXT:    v_perm_b32 v2, v0, v8, s0
-; GFX940-NEXT:    v_pk_max_f16 v2, v2, v4 neg_lo:[0,1] neg_hi:[0,1]
-; GFX940-NEXT:    v_cmp_o_f16_e64 vcc, v8, -v4
-; GFX940-NEXT:    v_perm_b32 v1, v1, v3, s0
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cndmask_b32_e32 v5, v7, v2, vcc
-; GFX940-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
-; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v0, -v4 src0_sel:DWORD src1_sel:WORD_1
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
-; GFX940-NEXT:    v_perm_b32 v0, v0, v5, s0
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: v_fmaximum3_v4f16__fneg_all:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_pk_max_f16 v6, v0, v2 neg_lo:[1,1] neg_hi:[1,1]
+; GFX942-NEXT:    v_mov_b32_e32 v7, 0x7e00
+; GFX942-NEXT:    v_cmp_o_f16_e64 vcc, -v0, -v2
+; GFX942-NEXT:    s_mov_b32 s0, 0x5040100
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v8, v7, v6, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v6, 16, v6
+; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, -v0, -v2 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX942-NEXT:    v_pk_max_f16 v2, v1, v3 neg_lo:[1,1] neg_hi:[1,1]
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v7, v6, vcc
+; GFX942-NEXT:    v_cmp_o_f16_e64 vcc, -v1, -v3
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v6, v7, v2, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
+; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, -v1, -v3 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v7, v2, vcc
+; GFX942-NEXT:    v_perm_b32 v2, v1, v6, s0
+; GFX942-NEXT:    v_pk_max_f16 v2, v2, v5 neg_lo:[0,1] neg_hi:[0,1]
+; GFX942-NEXT:    v_cmp_o_f16_e64 vcc, v6, -v5
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v7, v2, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
+; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v1, -v5 src0_sel:DWORD src1_sel:WORD_1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v7, v2, vcc
+; GFX942-NEXT:    v_perm_b32 v2, v0, v8, s0
+; GFX942-NEXT:    v_pk_max_f16 v2, v2, v4 neg_lo:[0,1] neg_hi:[0,1]
+; GFX942-NEXT:    v_cmp_o_f16_e64 vcc, v8, -v4
+; GFX942-NEXT:    v_perm_b32 v1, v1, v3, s0
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v5, v7, v2, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
+; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v0, -v4 src0_sel:DWORD src1_sel:WORD_1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
+; GFX942-NEXT:    v_perm_b32 v0, v0, v5, s0
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fmaximum3_v4f16__fneg_all:
 ; GFX950:       ; %bb.0:
@@ -2807,46 +2807,46 @@ define <4 x half> @v_fmaximum3_v4f16__inlineimm1(<4 x half> %a, <4 x half> %c) {
 ; GFX12-NEXT:    v_pk_maximum_f16 v1, v1, v3
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: v_fmaximum3_v4f16__inlineimm1:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_pk_max_f16 v4, v0, 2.0 op_sel_hi:[1,0]
-; GFX940-NEXT:    v_mov_b32_e32 v6, 0x7e00
-; GFX940-NEXT:    v_lshrrev_b32_e32 v5, 16, v4
-; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v0 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX940-NEXT:    v_pk_max_f16 v7, v1, 2.0 op_sel_hi:[1,0]
-; GFX940-NEXT:    s_mov_b32 s0, 0x5040100
-; GFX940-NEXT:    v_cndmask_b32_e32 v5, v6, v5, vcc
-; GFX940-NEXT:    v_lshrrev_b32_e32 v8, 16, v7
-; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v1, v1 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v8, v6, v8, vcc
-; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v0, v0
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v6, v4, vcc
-; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v1, v1
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v1, v6, v7, vcc
-; GFX940-NEXT:    v_perm_b32 v4, v8, v1, s0
-; GFX940-NEXT:    v_pk_max_f16 v4, v4, v3
-; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v8, v3 src0_sel:DWORD src1_sel:WORD_1
-; GFX940-NEXT:    v_perm_b32 v8, v5, v0, s0
-; GFX940-NEXT:    v_lshrrev_b32_e32 v7, 16, v4
-; GFX940-NEXT:    v_pk_max_f16 v8, v8, v2
-; GFX940-NEXT:    v_cndmask_b32_e32 v7, v6, v7, vcc
-; GFX940-NEXT:    v_lshrrev_b32_e32 v9, 16, v8
-; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v5, v2 src0_sel:DWORD src1_sel:WORD_1
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v5, v6, v9, vcc
-; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v1, v3
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v1, v6, v4, vcc
-; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
-; GFX940-NEXT:    v_perm_b32 v1, v7, v1, s0
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v6, v8, vcc
-; GFX940-NEXT:    v_perm_b32 v0, v5, v0, s0
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: v_fmaximum3_v4f16__inlineimm1:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_pk_max_f16 v4, v0, 2.0 op_sel_hi:[1,0]
+; GFX942-NEXT:    v_mov_b32_e32 v6, 0x7e00
+; GFX942-NEXT:    v_lshrrev_b32_e32 v5, 16, v4
+; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v0 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX942-NEXT:    v_pk_max_f16 v7, v1, 2.0 op_sel_hi:[1,0]
+; GFX942-NEXT:    s_mov_b32 s0, 0x5040100
+; GFX942-NEXT:    v_cndmask_b32_e32 v5, v6, v5, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v8, 16, v7
+; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v1, v1 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v8, v6, v8, vcc
+; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v6, v4, vcc
+; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v1, v1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v6, v7, vcc
+; GFX942-NEXT:    v_perm_b32 v4, v8, v1, s0
+; GFX942-NEXT:    v_pk_max_f16 v4, v4, v3
+; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v8, v3 src0_sel:DWORD src1_sel:WORD_1
+; GFX942-NEXT:    v_perm_b32 v8, v5, v0, s0
+; GFX942-NEXT:    v_lshrrev_b32_e32 v7, 16, v4
+; GFX942-NEXT:    v_pk_max_f16 v8, v8, v2
+; GFX942-NEXT:    v_cndmask_b32_e32 v7, v6, v7, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v9, 16, v8
+; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v5, v2 src0_sel:DWORD src1_sel:WORD_1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v5, v6, v9, vcc
+; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v1, v3
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v6, v4, vcc
+; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
+; GFX942-NEXT:    v_perm_b32 v1, v7, v1, s0
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v6, v8, vcc
+; GFX942-NEXT:    v_perm_b32 v0, v5, v0, s0
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fmaximum3_v4f16__inlineimm1:
 ; GFX950:       ; %bb.0:
@@ -2876,48 +2876,48 @@ define <4 x half> @v_fmaximum3_v4f16__inlineimm2(<4 x half> %a, <4 x half> %b) {
 ; GFX12-NEXT:    v_pk_maximum_f16 v1, v1, 4.0 op_sel_hi:[1,0]
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: v_fmaximum3_v4f16__inlineimm2:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_pk_max_f16 v4, v0, v2
-; GFX940-NEXT:    v_mov_b32_e32 v5, 0x7e00
-; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
-; GFX940-NEXT:    s_mov_b32 s0, 0x5040100
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cndmask_b32_e32 v6, v5, v4, vcc
-; GFX940-NEXT:    v_lshrrev_b32_e32 v4, 16, v4
-; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX940-NEXT:    v_pk_max_f16 v2, v1, v3
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v5, v4, vcc
-; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v1, v3
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v4, v5, v2, vcc
-; GFX940-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
-; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v1, v5, v2, vcc
-; GFX940-NEXT:    v_perm_b32 v2, v1, v4, s0
-; GFX940-NEXT:    v_pk_max_f16 v2, v2, 4.0 op_sel_hi:[1,0]
-; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v4, v4
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v3, v5, v2, vcc
-; GFX940-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
-; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v1, v1
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v1, v5, v2, vcc
-; GFX940-NEXT:    v_perm_b32 v2, v0, v6, s0
-; GFX940-NEXT:    v_pk_max_f16 v2, v2, 4.0 op_sel_hi:[1,0]
-; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v6, v6
-; GFX940-NEXT:    v_perm_b32 v1, v1, v3, s0
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cndmask_b32_e32 v4, v5, v2, vcc
-; GFX940-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
-; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v0, v0
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v5, v2, vcc
-; GFX940-NEXT:    v_perm_b32 v0, v0, v4, s0
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: v_fmaximum3_v4f16__inlineimm2:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_pk_max_f16 v4, v0, v2
+; GFX942-NEXT:    v_mov_b32_e32 v5, 0x7e00
+; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
+; GFX942-NEXT:    s_mov_b32 s0, 0x5040100
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v6, v5, v4, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v4, 16, v4
+; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX942-NEXT:    v_pk_max_f16 v2, v1, v3
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v5, v4, vcc
+; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v1, v3
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v4, v5, v2, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
+; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v5, v2, vcc
+; GFX942-NEXT:    v_perm_b32 v2, v1, v4, s0
+; GFX942-NEXT:    v_pk_max_f16 v2, v2, 4.0 op_sel_hi:[1,0]
+; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v4, v4
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v5, v2, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
+; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v1, v1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v5, v2, vcc
+; GFX942-NEXT:    v_perm_b32 v2, v0, v6, s0
+; GFX942-NEXT:    v_pk_max_f16 v2, v2, 4.0 op_sel_hi:[1,0]
+; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v6, v6
+; GFX942-NEXT:    v_perm_b32 v1, v1, v3, s0
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v4, v5, v2, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
+; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v5, v2, vcc
+; GFX942-NEXT:    v_perm_b32 v0, v0, v4, s0
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fmaximum3_v4f16__inlineimm2:
 ; GFX950:       ; %bb.0:
@@ -3538,19 +3538,19 @@ define <2 x float> @v_no_fmaximum3_f32__multi_use(float %a, float %b, float %c)
 ; GFX12-NEXT:    v_maximum_f32 v1, v0, v2
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: v_no_fmaximum3_f32__multi_use:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_max_f32_e32 v3, v0, v1
-; GFX940-NEXT:    v_mov_b32_e32 v4, 0x7fc00000
-; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX940-NEXT:    v_max_f32_e32 v1, v0, v2
-; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v0, v2
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v1, v4, v1, vcc
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: v_no_fmaximum3_f32__multi_use:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_max_f32_e32 v3, v0, v1
+; GFX942-NEXT:    v_mov_b32_e32 v4, 0x7fc00000
+; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
+; GFX942-NEXT:    v_max_f32_e32 v1, v0, v2
+; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v0, v2
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v4, v1, vcc
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_no_fmaximum3_f32__multi_use:
 ; GFX950:       ; %bb.0:
@@ -3573,22 +3573,22 @@ define amdgpu_ps <2 x i32> @s_no_fmaximum3_f32__multi_use(float inreg %a, float
 ; GFX12-NEXT:    s_maximum_f32 s1, s0, s2
 ; GFX12-NEXT:    ; return to shader part epilog
 ;
-; GFX940-LABEL: s_no_fmaximum3_f32__multi_use:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    v_mov_b32_e32 v0, s1
-; GFX940-NEXT:    v_max_f32_e32 v1, s0, v0
-; GFX940-NEXT:    v_mov_b32_e32 v2, 0x7fc00000
-; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, s0, v0
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
-; GFX940-NEXT:    v_max_f32_e32 v1, s2, v0
-; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, s2, v0
-; GFX940-NEXT:    v_readfirstlane_b32 s0, v0
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cndmask_b32_e32 v1, v2, v1, vcc
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_readfirstlane_b32 s1, v1
-; GFX940-NEXT:    ; return to shader part epilog
+; GFX942-LABEL: s_no_fmaximum3_f32__multi_use:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    v_mov_b32_e32 v0, s1
+; GFX942-NEXT:    v_max_f32_e32 v1, s0, v0
+; GFX942-NEXT:    v_mov_b32_e32 v2, 0x7fc00000
+; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, s0, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX942-NEXT:    v_max_f32_e32 v1, s2, v0
+; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, s2, v0
+; GFX942-NEXT:    v_readfirstlane_b32 s0, v0
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v2, v1, vcc
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_readfirstlane_b32 s1, v1
+; GFX942-NEXT:    ; return to shader part epilog
 ;
 ; GFX950-LABEL: s_no_fmaximum3_f32__multi_use:
 ; GFX950:       ; %bb.0:
@@ -3697,30 +3697,30 @@ define <4 x half> @v_no_fmaximum3_v2f16__multi_use(<2 x half> %a, <2 x half> %b,
 ; GFX12-NEXT:    v_pk_maximum_f16 v1, v0, v2
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: v_no_fmaximum3_v2f16__multi_use:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_pk_max_f16 v3, v0, v1
-; GFX940-NEXT:    v_mov_b32_e32 v4, 0x7e00
-; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
-; GFX940-NEXT:    s_mov_b32 s0, 0x5040100
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cndmask_b32_e32 v5, v4, v3, vcc
-; GFX940-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
-; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v1, v4, v3, vcc
-; GFX940-NEXT:    v_perm_b32 v0, v1, v5, s0
-; GFX940-NEXT:    v_pk_max_f16 v3, v0, v2
-; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v5, v2
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v5, v4, v3, vcc
-; GFX940-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
-; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v1, v2 src0_sel:DWORD src1_sel:WORD_1
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v1, v4, v3, vcc
-; GFX940-NEXT:    v_perm_b32 v1, v1, v5, s0
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: v_no_fmaximum3_v2f16__multi_use:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_pk_max_f16 v3, v0, v1
+; GFX942-NEXT:    v_mov_b32_e32 v4, 0x7e00
+; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
+; GFX942-NEXT:    s_mov_b32 s0, 0x5040100
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v5, v4, v3, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
+; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v4, v3, vcc
+; GFX942-NEXT:    v_perm_b32 v0, v1, v5, s0
+; GFX942-NEXT:    v_pk_max_f16 v3, v0, v2
+; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v5, v2
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v5, v4, v3, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
+; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v1, v2 src0_sel:DWORD src1_sel:WORD_1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v4, v3, vcc
+; GFX942-NEXT:    v_perm_b32 v1, v1, v5, s0
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_no_fmaximum3_v2f16__multi_use:
 ; GFX950:       ; %bb.0:
diff --git llvm/test/CodeGen/AMDGPU/fminimum3.ll llvm/test/CodeGen/AMDGPU/fminimum3.ll
index 234a07849a91..56e0b2c2f06c 100644
--- llvm/test/CodeGen/AMDGPU/fminimum3.ll
+++ llvm/test/CodeGen/AMDGPU/fminimum3.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck -check-prefix=GFX12 %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX9,GFX940 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx942 < %s | FileCheck -check-prefixes=GFX9,GFX942 %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx950 < %s | FileCheck -check-prefixes=GFX9,GFX950 %s
 
 define float @v_fminimum3_f32(float %a, float %b, float %c) {
@@ -14,19 +14,19 @@ define float @v_fminimum3_f32(float %a, float %b, float %c) {
 ; GFX12-NEXT:    v_minimum3_f32 v0, v0, v1, v2
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: v_fminimum3_f32:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_min_f32_e32 v3, v0, v1
-; GFX940-NEXT:    v_mov_b32_e32 v4, 0x7fc00000
-; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX940-NEXT:    v_min_f32_e32 v1, v0, v2
-; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v0, v2
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: v_fminimum3_f32:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_min_f32_e32 v3, v0, v1
+; GFX942-NEXT:    v_mov_b32_e32 v4, 0x7fc00000
+; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
+; GFX942-NEXT:    v_min_f32_e32 v1, v0, v2
+; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v0, v2
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fminimum3_f32:
 ; GFX950:       ; %bb.0:
@@ -49,19 +49,19 @@ define float @v_fminimum3_f32_commute(float %a, float %b, float %c) {
 ; GFX12-NEXT:    v_minimum3_f32 v0, v2, v0, v1
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: v_fminimum3_f32_commute:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_min_f32_e32 v3, v0, v1
-; GFX940-NEXT:    v_mov_b32_e32 v4, 0x7fc00000
-; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX940-NEXT:    v_min_f32_e32 v1, v2, v0
-; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v2, v0
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: v_fminimum3_f32_commute:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_min_f32_e32 v3, v0, v1
+; GFX942-NEXT:    v_mov_b32_e32 v4, 0x7fc00000
+; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
+; GFX942-NEXT:    v_min_f32_e32 v1, v2, v0
+; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v2, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fminimum3_f32_commute:
 ; GFX950:       ; %bb.0:
@@ -83,21 +83,21 @@ define amdgpu_ps i32 @s_fminimum3_f32(float inreg %a, float inreg %b, float inre
 ; GFX12-NEXT:    s_wait_alu 0xf1ff
 ; GFX12-NEXT:    ; return to shader part epilog
 ;
-; GFX940-LABEL: s_fminimum3_f32:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    v_mov_b32_e32 v0, s1
-; GFX940-NEXT:    v_min_f32_e32 v1, s0, v0
-; GFX940-NEXT:    v_mov_b32_e32 v2, 0x7fc00000
-; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, s0, v0
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
-; GFX940-NEXT:    v_min_f32_e32 v1, s2, v0
-; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, s2, v0
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_readfirstlane_b32 s0, v0
-; GFX940-NEXT:    ; return to shader part epilog
+; GFX942-LABEL: s_fminimum3_f32:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    v_mov_b32_e32 v0, s1
+; GFX942-NEXT:    v_min_f32_e32 v1, s0, v0
+; GFX942-NEXT:    v_mov_b32_e32 v2, 0x7fc00000
+; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, s0, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX942-NEXT:    v_min_f32_e32 v1, s2, v0
+; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, s2, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_readfirstlane_b32 s0, v0
+; GFX942-NEXT:    ; return to shader part epilog
 ;
 ; GFX950-LABEL: s_fminimum3_f32:
 ; GFX950:       ; %bb.0:
@@ -125,19 +125,19 @@ define float @v_fminimum3_f32_fabs0(float %a, float %b, float %c) {
 ; GFX12-NEXT:    v_minimum3_f32 v0, |v0|, v1, v2
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: v_fminimum3_f32_fabs0:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_min_f32_e64 v3, |v0|, v1
-; GFX940-NEXT:    v_mov_b32_e32 v4, 0x7fc00000
-; GFX940-NEXT:    v_cmp_o_f32_e64 vcc, |v0|, v1
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX940-NEXT:    v_min_f32_e32 v1, v0, v2
-; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v0, v2
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: v_fminimum3_f32_fabs0:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_min_f32_e64 v3, |v0|, v1
+; GFX942-NEXT:    v_mov_b32_e32 v4, 0x7fc00000
+; GFX942-NEXT:    v_cmp_o_f32_e64 vcc, |v0|, v1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
+; GFX942-NEXT:    v_min_f32_e32 v1, v0, v2
+; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v0, v2
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fminimum3_f32_fabs0:
 ; GFX950:       ; %bb.0:
@@ -161,19 +161,19 @@ define float @v_fminimum3_f32_fabs1(float %a, float %b, float %c) {
 ; GFX12-NEXT:    v_minimum3_f32 v0, v0, |v1|, v2
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: v_fminimum3_f32_fabs1:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_min_f32_e64 v3, v0, |v1|
-; GFX940-NEXT:    v_mov_b32_e32 v4, 0x7fc00000
-; GFX940-NEXT:    v_cmp_o_f32_e64 vcc, v0, |v1|
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX940-NEXT:    v_min_f32_e32 v1, v0, v2
-; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v0, v2
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: v_fminimum3_f32_fabs1:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_min_f32_e64 v3, v0, |v1|
+; GFX942-NEXT:    v_mov_b32_e32 v4, 0x7fc00000
+; GFX942-NEXT:    v_cmp_o_f32_e64 vcc, v0, |v1|
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
+; GFX942-NEXT:    v_min_f32_e32 v1, v0, v2
+; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v0, v2
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fminimum3_f32_fabs1:
 ; GFX950:       ; %bb.0:
@@ -197,19 +197,19 @@ define float @v_fminimum3_f32_fabs2(float %a, float %b, float %c) {
 ; GFX12-NEXT:    v_minimum3_f32 v0, v0, v1, |v2|
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: v_fminimum3_f32_fabs2:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_min_f32_e32 v3, v0, v1
-; GFX940-NEXT:    v_mov_b32_e32 v4, 0x7fc00000
-; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX940-NEXT:    v_min_f32_e64 v1, v0, |v2|
-; GFX940-NEXT:    v_cmp_o_f32_e64 vcc, v0, |v2|
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: v_fminimum3_f32_fabs2:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_min_f32_e32 v3, v0, v1
+; GFX942-NEXT:    v_mov_b32_e32 v4, 0x7fc00000
+; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
+; GFX942-NEXT:    v_min_f32_e64 v1, v0, |v2|
+; GFX942-NEXT:    v_cmp_o_f32_e64 vcc, v0, |v2|
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fminimum3_f32_fabs2:
 ; GFX950:       ; %bb.0:
@@ -233,19 +233,19 @@ define float @v_fminimum3_f32_fabs_all(float %a, float %b, float %c) {
 ; GFX12-NEXT:    v_minimum3_f32 v0, |v0|, |v1|, |v2|
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: v_fminimum3_f32_fabs_all:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_min_f32_e64 v3, |v0|, |v1|
-; GFX940-NEXT:    v_mov_b32_e32 v4, 0x7fc00000
-; GFX940-NEXT:    v_cmp_o_f32_e64 vcc, |v0|, |v1|
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX940-NEXT:    v_min_f32_e64 v1, v0, |v2|
-; GFX940-NEXT:    v_cmp_o_f32_e64 vcc, v0, |v2|
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: v_fminimum3_f32_fabs_all:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_min_f32_e64 v3, |v0|, |v1|
+; GFX942-NEXT:    v_mov_b32_e32 v4, 0x7fc00000
+; GFX942-NEXT:    v_cmp_o_f32_e64 vcc, |v0|, |v1|
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
+; GFX942-NEXT:    v_min_f32_e64 v1, v0, |v2|
+; GFX942-NEXT:    v_cmp_o_f32_e64 vcc, v0, |v2|
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fminimum3_f32_fabs_all:
 ; GFX950:       ; %bb.0:
@@ -271,19 +271,19 @@ define float @v_fminimum3_f32_fneg_all(float %a, float %b, float %c) {
 ; GFX12-NEXT:    v_minimum3_f32 v0, -v0, -v1, -v2
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: v_fminimum3_f32_fneg_all:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_min_f32_e64 v3, -v0, -v1
-; GFX940-NEXT:    v_mov_b32_e32 v4, 0x7fc00000
-; GFX940-NEXT:    v_cmp_o_f32_e64 vcc, -v0, -v1
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX940-NEXT:    v_min_f32_e64 v1, v0, -v2
-; GFX940-NEXT:    v_cmp_o_f32_e64 vcc, v0, -v2
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: v_fminimum3_f32_fneg_all:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_min_f32_e64 v3, -v0, -v1
+; GFX942-NEXT:    v_mov_b32_e32 v4, 0x7fc00000
+; GFX942-NEXT:    v_cmp_o_f32_e64 vcc, -v0, -v1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
+; GFX942-NEXT:    v_min_f32_e64 v1, v0, -v2
+; GFX942-NEXT:    v_cmp_o_f32_e64 vcc, v0, -v2
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fminimum3_f32_fneg_all:
 ; GFX950:       ; %bb.0:
@@ -309,19 +309,19 @@ define float @v_fminimum3_f32_fneg_fabs_all(float %a, float %b, float %c) {
 ; GFX12-NEXT:    v_minimum3_f32 v0, -|v0|, -|v1|, -|v2|
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: v_fminimum3_f32_fneg_fabs_all:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_min_f32_e64 v3, -|v0|, -|v1|
-; GFX940-NEXT:    v_mov_b32_e32 v4, 0x7fc00000
-; GFX940-NEXT:    v_cmp_o_f32_e64 vcc, -|v0|, -|v1|
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX940-NEXT:    v_min_f32_e64 v1, v0, -|v2|
-; GFX940-NEXT:    v_cmp_o_f32_e64 vcc, v0, -|v2|
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: v_fminimum3_f32_fneg_fabs_all:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_min_f32_e64 v3, -|v0|, -|v1|
+; GFX942-NEXT:    v_mov_b32_e32 v4, 0x7fc00000
+; GFX942-NEXT:    v_cmp_o_f32_e64 vcc, -|v0|, -|v1|
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
+; GFX942-NEXT:    v_min_f32_e64 v1, v0, -|v2|
+; GFX942-NEXT:    v_cmp_o_f32_e64 vcc, v0, -|v2|
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fminimum3_f32_fneg_fabs_all:
 ; GFX950:       ; %bb.0:
@@ -350,19 +350,19 @@ define float @v_fminimum3_f32_fneg0(float %a, float %b, float %c) {
 ; GFX12-NEXT:    v_minimum3_f32 v0, -v0, v1, v2
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: v_fminimum3_f32_fneg0:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_min_f32_e64 v3, -v0, v1
-; GFX940-NEXT:    v_mov_b32_e32 v4, 0x7fc00000
-; GFX940-NEXT:    v_cmp_o_f32_e64 vcc, -v0, v1
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX940-NEXT:    v_min_f32_e32 v1, v0, v2
-; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v0, v2
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: v_fminimum3_f32_fneg0:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_min_f32_e64 v3, -v0, v1
+; GFX942-NEXT:    v_mov_b32_e32 v4, 0x7fc00000
+; GFX942-NEXT:    v_cmp_o_f32_e64 vcc, -v0, v1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
+; GFX942-NEXT:    v_min_f32_e32 v1, v0, v2
+; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v0, v2
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fminimum3_f32_fneg0:
 ; GFX950:       ; %bb.0:
@@ -386,19 +386,19 @@ define float @v_fminimum3_f32_fneg1(float %a, float %b, float %c) {
 ; GFX12-NEXT:    v_minimum3_f32 v0, v0, -v1, v2
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: v_fminimum3_f32_fneg1:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_min_f32_e64 v3, v0, -v1
-; GFX940-NEXT:    v_mov_b32_e32 v4, 0x7fc00000
-; GFX940-NEXT:    v_cmp_o_f32_e64 vcc, v0, -v1
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX940-NEXT:    v_min_f32_e32 v1, v0, v2
-; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v0, v2
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: v_fminimum3_f32_fneg1:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_min_f32_e64 v3, v0, -v1
+; GFX942-NEXT:    v_mov_b32_e32 v4, 0x7fc00000
+; GFX942-NEXT:    v_cmp_o_f32_e64 vcc, v0, -v1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
+; GFX942-NEXT:    v_min_f32_e32 v1, v0, v2
+; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v0, v2
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fminimum3_f32_fneg1:
 ; GFX950:       ; %bb.0:
@@ -422,19 +422,19 @@ define float @v_fminimum3_f32_fneg2(float %a, float %b, float %c) {
 ; GFX12-NEXT:    v_minimum3_f32 v0, v0, v1, -v2
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: v_fminimum3_f32_fneg2:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_min_f32_e32 v3, v0, v1
-; GFX940-NEXT:    v_mov_b32_e32 v4, 0x7fc00000
-; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX940-NEXT:    v_min_f32_e64 v1, v0, -v2
-; GFX940-NEXT:    v_cmp_o_f32_e64 vcc, v0, -v2
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: v_fminimum3_f32_fneg2:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_min_f32_e32 v3, v0, v1
+; GFX942-NEXT:    v_mov_b32_e32 v4, 0x7fc00000
+; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
+; GFX942-NEXT:    v_min_f32_e64 v1, v0, -v2
+; GFX942-NEXT:    v_cmp_o_f32_e64 vcc, v0, -v2
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fminimum3_f32_fneg2:
 ; GFX950:       ; %bb.0:
@@ -458,19 +458,19 @@ define float @v_fminimum3_f32_const0(float %b, float %c) {
 ; GFX12-NEXT:    v_minimum3_f32 v0, v0, 0x41000000, v1
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: v_fminimum3_f32_const0:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_min_f32_e32 v2, 0x41000000, v0
-; GFX940-NEXT:    v_mov_b32_e32 v3, 0x7fc00000
-; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v0, v0
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX940-NEXT:    v_min_f32_e32 v2, v0, v1
-; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: v_fminimum3_f32_const0:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_min_f32_e32 v2, 0x41000000, v0
+; GFX942-NEXT:    v_mov_b32_e32 v3, 0x7fc00000
+; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX942-NEXT:    v_min_f32_e32 v2, v0, v1
+; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fminimum3_f32_const0:
 ; GFX950:       ; %bb.0:
@@ -494,19 +494,19 @@ define float @v_fminimum3_f32__const2(float %a, float %b) {
 ; GFX12-NEXT:    v_minimum3_f32 v0, v0, v1, 0x41000000
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: v_fminimum3_f32__const2:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_min_f32_e32 v2, v0, v1
-; GFX940-NEXT:    v_mov_b32_e32 v3, 0x7fc00000
-; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX940-NEXT:    v_min_f32_e32 v1, 0x41000000, v0
-; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v0, v0
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v3, v1, vcc
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: v_fminimum3_f32__const2:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_min_f32_e32 v2, v0, v1
+; GFX942-NEXT:    v_mov_b32_e32 v3, 0x7fc00000
+; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX942-NEXT:    v_min_f32_e32 v1, 0x41000000, v0
+; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v3, v1, vcc
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fminimum3_f32__const2:
 ; GFX950:       ; %bb.0:
@@ -530,19 +530,19 @@ define float @v_fminimum3_f32_inlineimm0(float %b, float %c) {
 ; GFX12-NEXT:    v_minimum3_f32 v0, v0, 4.0, v1
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: v_fminimum3_f32_inlineimm0:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_min_f32_e32 v2, 4.0, v0
-; GFX940-NEXT:    v_mov_b32_e32 v3, 0x7fc00000
-; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v0, v0
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX940-NEXT:    v_min_f32_e32 v2, v0, v1
-; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: v_fminimum3_f32_inlineimm0:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_min_f32_e32 v2, 4.0, v0
+; GFX942-NEXT:    v_mov_b32_e32 v3, 0x7fc00000
+; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX942-NEXT:    v_min_f32_e32 v2, v0, v1
+; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fminimum3_f32_inlineimm0:
 ; GFX950:       ; %bb.0:
@@ -565,19 +565,19 @@ define float @v_fminimum3_f32__inlineimm(float %a, float %b) {
 ; GFX12-NEXT:    v_minimum3_f32 v0, v0, v1, 4.0
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: v_fminimum3_f32__inlineimm:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_min_f32_e32 v2, v0, v1
-; GFX940-NEXT:    v_mov_b32_e32 v3, 0x7fc00000
-; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX940-NEXT:    v_min_f32_e32 v1, 4.0, v0
-; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v0, v0
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v3, v1, vcc
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: v_fminimum3_f32__inlineimm:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_min_f32_e32 v2, v0, v1
+; GFX942-NEXT:    v_mov_b32_e32 v3, 0x7fc00000
+; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX942-NEXT:    v_min_f32_e32 v1, 4.0, v0
+; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v3, v1, vcc
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fminimum3_f32__inlineimm:
 ; GFX950:       ; %bb.0:
@@ -602,19 +602,19 @@ define float @v_fminimum3_f32_const1_const2(float %a) {
 ; GFX12-NEXT:    v_minimum3_f32 v0, v0, s0, 0x41800000
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: v_fminimum3_f32_const1_const2:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_min_f32_e32 v1, 0x41000000, v0
-; GFX940-NEXT:    v_mov_b32_e32 v2, 0x7fc00000
-; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v0, v0
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
-; GFX940-NEXT:    v_min_f32_e32 v1, 0x41800000, v0
-; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v0, v0
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: v_fminimum3_f32_const1_const2:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_min_f32_e32 v1, 0x41000000, v0
+; GFX942-NEXT:    v_mov_b32_e32 v2, 0x7fc00000
+; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX942-NEXT:    v_min_f32_e32 v1, 0x41800000, v0
+; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fminimum3_f32_const1_const2:
 ; GFX950:       ; %bb.0:
@@ -640,27 +640,27 @@ define <2 x float> @v_fminimum3_v2f32(<2 x float> %a, <2 x float> %b, <2 x float
 ; GFX12-NEXT:    v_minimum3_f32 v1, v5, v1, v3
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: v_fminimum3_v2f32:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_min_f32_e32 v6, v1, v3
-; GFX940-NEXT:    v_mov_b32_e32 v7, 0x7fc00000
-; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v1, v3
-; GFX940-NEXT:    v_min_f32_e32 v3, v0, v2
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cndmask_b32_e32 v1, v7, v6, vcc
-; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v0, v2
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v7, v3, vcc
-; GFX940-NEXT:    v_min_f32_e32 v2, v4, v0
-; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v4, v0
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
-; GFX940-NEXT:    v_min_f32_e32 v2, v5, v1
-; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v5, v1
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v1, v7, v2, vcc
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: v_fminimum3_v2f32:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_min_f32_e32 v6, v1, v3
+; GFX942-NEXT:    v_mov_b32_e32 v7, 0x7fc00000
+; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v1, v3
+; GFX942-NEXT:    v_min_f32_e32 v3, v0, v2
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v7, v6, vcc
+; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v0, v2
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v7, v3, vcc
+; GFX942-NEXT:    v_min_f32_e32 v2, v4, v0
+; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v4, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
+; GFX942-NEXT:    v_min_f32_e32 v2, v5, v1
+; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v5, v1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v7, v2, vcc
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fminimum3_v2f32:
 ; GFX950:       ; %bb.0:
@@ -685,27 +685,27 @@ define <2 x float> @v_fminimum3_v2f32_commute(<2 x float> %a, <2 x float> %b, <2
 ; GFX12-NEXT:    v_minimum3_f32 v1, v1, v3, v5
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: v_fminimum3_v2f32_commute:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_min_f32_e32 v6, v1, v3
-; GFX940-NEXT:    v_mov_b32_e32 v7, 0x7fc00000
-; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v1, v3
-; GFX940-NEXT:    v_min_f32_e32 v3, v0, v2
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cndmask_b32_e32 v1, v7, v6, vcc
-; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v0, v2
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v7, v3, vcc
-; GFX940-NEXT:    v_min_f32_e32 v2, v0, v4
-; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v0, v4
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
-; GFX940-NEXT:    v_min_f32_e32 v2, v1, v5
-; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v1, v5
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v1, v7, v2, vcc
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: v_fminimum3_v2f32_commute:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_min_f32_e32 v6, v1, v3
+; GFX942-NEXT:    v_mov_b32_e32 v7, 0x7fc00000
+; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v1, v3
+; GFX942-NEXT:    v_min_f32_e32 v3, v0, v2
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v7, v6, vcc
+; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v0, v2
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v7, v3, vcc
+; GFX942-NEXT:    v_min_f32_e32 v2, v0, v4
+; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v0, v4
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
+; GFX942-NEXT:    v_min_f32_e32 v2, v1, v5
+; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v1, v5
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v7, v2, vcc
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fminimum3_v2f32_commute:
 ; GFX950:       ; %bb.0:
@@ -730,27 +730,27 @@ define <2 x float> @v_fminimum3_v2f32__fabs_all(<2 x float> %a, <2 x float> %b,
 ; GFX12-NEXT:    v_minimum3_f32 v1, |v1|, |v3|, |v5|
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: v_fminimum3_v2f32__fabs_all:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_min_f32_e64 v6, |v1|, |v3|
-; GFX940-NEXT:    v_mov_b32_e32 v7, 0x7fc00000
-; GFX940-NEXT:    v_cmp_o_f32_e64 vcc, |v1|, |v3|
-; GFX940-NEXT:    v_min_f32_e64 v3, |v0|, |v2|
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cndmask_b32_e32 v1, v7, v6, vcc
-; GFX940-NEXT:    v_cmp_o_f32_e64 vcc, |v0|, |v2|
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v7, v3, vcc
-; GFX940-NEXT:    v_min_f32_e64 v2, v0, |v4|
-; GFX940-NEXT:    v_cmp_o_f32_e64 vcc, v0, |v4|
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
-; GFX940-NEXT:    v_min_f32_e64 v2, v1, |v5|
-; GFX940-NEXT:    v_cmp_o_f32_e64 vcc, v1, |v5|
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v1, v7, v2, vcc
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: v_fminimum3_v2f32__fabs_all:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_min_f32_e64 v6, |v1|, |v3|
+; GFX942-NEXT:    v_mov_b32_e32 v7, 0x7fc00000
+; GFX942-NEXT:    v_cmp_o_f32_e64 vcc, |v1|, |v3|
+; GFX942-NEXT:    v_min_f32_e64 v3, |v0|, |v2|
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v7, v6, vcc
+; GFX942-NEXT:    v_cmp_o_f32_e64 vcc, |v0|, |v2|
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v7, v3, vcc
+; GFX942-NEXT:    v_min_f32_e64 v2, v0, |v4|
+; GFX942-NEXT:    v_cmp_o_f32_e64 vcc, v0, |v4|
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
+; GFX942-NEXT:    v_min_f32_e64 v2, v1, |v5|
+; GFX942-NEXT:    v_cmp_o_f32_e64 vcc, v1, |v5|
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v7, v2, vcc
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fminimum3_v2f32__fabs_all:
 ; GFX950:       ; %bb.0:
@@ -778,27 +778,27 @@ define <2 x float> @v_fminimum3_v2f32__fneg_all(<2 x float> %a, <2 x float> %b,
 ; GFX12-NEXT:    v_minimum3_f32 v1, -v1, -v3, -v5
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: v_fminimum3_v2f32__fneg_all:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_min_f32_e64 v6, -v1, -v3
-; GFX940-NEXT:    v_mov_b32_e32 v7, 0x7fc00000
-; GFX940-NEXT:    v_cmp_o_f32_e64 vcc, -v1, -v3
-; GFX940-NEXT:    v_min_f32_e64 v3, -v0, -v2
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cndmask_b32_e32 v1, v7, v6, vcc
-; GFX940-NEXT:    v_cmp_o_f32_e64 vcc, -v0, -v2
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v7, v3, vcc
-; GFX940-NEXT:    v_min_f32_e64 v2, v0, -v4
-; GFX940-NEXT:    v_cmp_o_f32_e64 vcc, v0, -v4
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
-; GFX940-NEXT:    v_min_f32_e64 v2, v1, -v5
-; GFX940-NEXT:    v_cmp_o_f32_e64 vcc, v1, -v5
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v1, v7, v2, vcc
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: v_fminimum3_v2f32__fneg_all:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_min_f32_e64 v6, -v1, -v3
+; GFX942-NEXT:    v_mov_b32_e32 v7, 0x7fc00000
+; GFX942-NEXT:    v_cmp_o_f32_e64 vcc, -v1, -v3
+; GFX942-NEXT:    v_min_f32_e64 v3, -v0, -v2
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v7, v6, vcc
+; GFX942-NEXT:    v_cmp_o_f32_e64 vcc, -v0, -v2
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v7, v3, vcc
+; GFX942-NEXT:    v_min_f32_e64 v2, v0, -v4
+; GFX942-NEXT:    v_cmp_o_f32_e64 vcc, v0, -v4
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
+; GFX942-NEXT:    v_min_f32_e64 v2, v1, -v5
+; GFX942-NEXT:    v_cmp_o_f32_e64 vcc, v1, -v5
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v7, v2, vcc
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fminimum3_v2f32__fneg_all:
 ; GFX950:       ; %bb.0:
@@ -826,27 +826,27 @@ define <2 x float> @v_fminimum3_v2f32__inlineimm1(<2 x float> %a, <2 x float> %c
 ; GFX12-NEXT:    v_minimum3_f32 v1, v1, 2.0, v3
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: v_fminimum3_v2f32__inlineimm1:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_min_f32_e32 v4, 2.0, v1
-; GFX940-NEXT:    v_mov_b32_e32 v5, 0x7fc00000
-; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v1, v1
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v1, v5, v4, vcc
-; GFX940-NEXT:    v_min_f32_e32 v4, 2.0, v0
-; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v0, v0
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v5, v4, vcc
-; GFX940-NEXT:    v_min_f32_e32 v4, v0, v2
-; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v0, v2
-; GFX940-NEXT:    v_min_f32_e32 v2, v1, v3
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v5, v4, vcc
-; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v1, v3
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v1, v5, v2, vcc
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: v_fminimum3_v2f32__inlineimm1:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_min_f32_e32 v4, 2.0, v1
+; GFX942-NEXT:    v_mov_b32_e32 v5, 0x7fc00000
+; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v1, v1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v5, v4, vcc
+; GFX942-NEXT:    v_min_f32_e32 v4, 2.0, v0
+; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v5, v4, vcc
+; GFX942-NEXT:    v_min_f32_e32 v4, v0, v2
+; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v0, v2
+; GFX942-NEXT:    v_min_f32_e32 v2, v1, v3
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v5, v4, vcc
+; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v1, v3
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v5, v2, vcc
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fminimum3_v2f32__inlineimm1:
 ; GFX950:       ; %bb.0:
@@ -871,27 +871,27 @@ define <2 x float> @v_fminimum3_v2f32__inlineimm2(<2 x float> %a, <2 x float> %b
 ; GFX12-NEXT:    v_minimum3_f32 v1, v1, v3, 4.0
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: v_fminimum3_v2f32__inlineimm2:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_min_f32_e32 v4, v1, v3
-; GFX940-NEXT:    v_mov_b32_e32 v5, 0x7fc00000
-; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v1, v3
-; GFX940-NEXT:    v_min_f32_e32 v3, v0, v2
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cndmask_b32_e32 v1, v5, v4, vcc
-; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v0, v2
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v5, v3, vcc
-; GFX940-NEXT:    v_min_f32_e32 v2, 4.0, v0
-; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v0, v0
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v5, v2, vcc
-; GFX940-NEXT:    v_min_f32_e32 v2, 4.0, v1
-; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v1, v1
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v1, v5, v2, vcc
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: v_fminimum3_v2f32__inlineimm2:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_min_f32_e32 v4, v1, v3
+; GFX942-NEXT:    v_mov_b32_e32 v5, 0x7fc00000
+; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v1, v3
+; GFX942-NEXT:    v_min_f32_e32 v3, v0, v2
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v5, v4, vcc
+; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v0, v2
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v5, v3, vcc
+; GFX942-NEXT:    v_min_f32_e32 v2, 4.0, v0
+; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v5, v2, vcc
+; GFX942-NEXT:    v_min_f32_e32 v2, 4.0, v1
+; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v1, v1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v5, v2, vcc
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fminimum3_v2f32__inlineimm2:
 ; GFX950:       ; %bb.0:
@@ -917,35 +917,35 @@ define <3 x float> @v_fminimum3_v3f32(<3 x float> %a, <3 x float> %b, <3 x float
 ; GFX12-NEXT:    v_minimum3_f32 v2, v8, v2, v5
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: v_fminimum3_v3f32:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_min_f32_e32 v9, v2, v5
-; GFX940-NEXT:    v_mov_b32_e32 v10, 0x7fc00000
-; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v2, v5
-; GFX940-NEXT:    v_min_f32_e32 v5, v1, v4
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cndmask_b32_e32 v2, v10, v9, vcc
-; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v1, v4
-; GFX940-NEXT:    v_min_f32_e32 v4, v0, v3
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cndmask_b32_e32 v1, v10, v5, vcc
-; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v0, v3
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v10, v4, vcc
-; GFX940-NEXT:    v_min_f32_e32 v3, v6, v0
-; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v6, v0
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v10, v3, vcc
-; GFX940-NEXT:    v_min_f32_e32 v3, v7, v1
-; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v7, v1
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v1, v10, v3, vcc
-; GFX940-NEXT:    v_min_f32_e32 v3, v8, v2
-; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v8, v2
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v2, v10, v3, vcc
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: v_fminimum3_v3f32:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_min_f32_e32 v9, v2, v5
+; GFX942-NEXT:    v_mov_b32_e32 v10, 0x7fc00000
+; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v2, v5
+; GFX942-NEXT:    v_min_f32_e32 v5, v1, v4
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v10, v9, vcc
+; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v1, v4
+; GFX942-NEXT:    v_min_f32_e32 v4, v0, v3
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v10, v5, vcc
+; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v0, v3
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v10, v4, vcc
+; GFX942-NEXT:    v_min_f32_e32 v3, v6, v0
+; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v6, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v10, v3, vcc
+; GFX942-NEXT:    v_min_f32_e32 v3, v7, v1
+; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v7, v1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v10, v3, vcc
+; GFX942-NEXT:    v_min_f32_e32 v3, v8, v2
+; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v8, v2
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v10, v3, vcc
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fminimum3_v3f32:
 ; GFX950:       ; %bb.0:
@@ -972,35 +972,35 @@ define <3 x float> @v_fminimum3_v3f32_commute(<3 x float> %a, <3 x float> %b, <3
 ; GFX12-NEXT:    v_minimum3_f32 v2, v2, v5, v8
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: v_fminimum3_v3f32_commute:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_min_f32_e32 v9, v2, v5
-; GFX940-NEXT:    v_mov_b32_e32 v10, 0x7fc00000
-; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v2, v5
-; GFX940-NEXT:    v_min_f32_e32 v5, v1, v4
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cndmask_b32_e32 v2, v10, v9, vcc
-; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v1, v4
-; GFX940-NEXT:    v_min_f32_e32 v4, v0, v3
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cndmask_b32_e32 v1, v10, v5, vcc
-; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v0, v3
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v10, v4, vcc
-; GFX940-NEXT:    v_min_f32_e32 v3, v0, v6
-; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v0, v6
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v10, v3, vcc
-; GFX940-NEXT:    v_min_f32_e32 v3, v1, v7
-; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v1, v7
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v1, v10, v3, vcc
-; GFX940-NEXT:    v_min_f32_e32 v3, v2, v8
-; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v2, v8
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v2, v10, v3, vcc
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: v_fminimum3_v3f32_commute:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_min_f32_e32 v9, v2, v5
+; GFX942-NEXT:    v_mov_b32_e32 v10, 0x7fc00000
+; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v2, v5
+; GFX942-NEXT:    v_min_f32_e32 v5, v1, v4
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v10, v9, vcc
+; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v1, v4
+; GFX942-NEXT:    v_min_f32_e32 v4, v0, v3
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v10, v5, vcc
+; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v0, v3
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v10, v4, vcc
+; GFX942-NEXT:    v_min_f32_e32 v3, v0, v6
+; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v0, v6
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v10, v3, vcc
+; GFX942-NEXT:    v_min_f32_e32 v3, v1, v7
+; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v1, v7
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v10, v3, vcc
+; GFX942-NEXT:    v_min_f32_e32 v3, v2, v8
+; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v2, v8
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v10, v3, vcc
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fminimum3_v3f32_commute:
 ; GFX950:       ; %bb.0:
@@ -1027,35 +1027,35 @@ define <3 x float> @v_fminimum3_v3f32__fabs_all(<3 x float> %a, <3 x float> %b,
 ; GFX12-NEXT:    v_minimum3_f32 v2, |v2|, |v5|, |v8|
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: v_fminimum3_v3f32__fabs_all:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_min_f32_e64 v9, |v2|, |v5|
-; GFX940-NEXT:    v_mov_b32_e32 v10, 0x7fc00000
-; GFX940-NEXT:    v_cmp_o_f32_e64 vcc, |v2|, |v5|
-; GFX940-NEXT:    v_min_f32_e64 v5, |v1|, |v4|
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cndmask_b32_e32 v2, v10, v9, vcc
-; GFX940-NEXT:    v_cmp_o_f32_e64 vcc, |v1|, |v4|
-; GFX940-NEXT:    v_min_f32_e64 v4, |v0|, |v3|
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cndmask_b32_e32 v1, v10, v5, vcc
-; GFX940-NEXT:    v_cmp_o_f32_e64 vcc, |v0|, |v3|
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v10, v4, vcc
-; GFX940-NEXT:    v_min_f32_e64 v3, v0, |v6|
-; GFX940-NEXT:    v_cmp_o_f32_e64 vcc, v0, |v6|
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v10, v3, vcc
-; GFX940-NEXT:    v_min_f32_e64 v3, v1, |v7|
-; GFX940-NEXT:    v_cmp_o_f32_e64 vcc, v1, |v7|
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v1, v10, v3, vcc
-; GFX940-NEXT:    v_min_f32_e64 v3, v2, |v8|
-; GFX940-NEXT:    v_cmp_o_f32_e64 vcc, v2, |v8|
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v2, v10, v3, vcc
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: v_fminimum3_v3f32__fabs_all:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_min_f32_e64 v9, |v2|, |v5|
+; GFX942-NEXT:    v_mov_b32_e32 v10, 0x7fc00000
+; GFX942-NEXT:    v_cmp_o_f32_e64 vcc, |v2|, |v5|
+; GFX942-NEXT:    v_min_f32_e64 v5, |v1|, |v4|
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v10, v9, vcc
+; GFX942-NEXT:    v_cmp_o_f32_e64 vcc, |v1|, |v4|
+; GFX942-NEXT:    v_min_f32_e64 v4, |v0|, |v3|
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v10, v5, vcc
+; GFX942-NEXT:    v_cmp_o_f32_e64 vcc, |v0|, |v3|
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v10, v4, vcc
+; GFX942-NEXT:    v_min_f32_e64 v3, v0, |v6|
+; GFX942-NEXT:    v_cmp_o_f32_e64 vcc, v0, |v6|
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v10, v3, vcc
+; GFX942-NEXT:    v_min_f32_e64 v3, v1, |v7|
+; GFX942-NEXT:    v_cmp_o_f32_e64 vcc, v1, |v7|
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v10, v3, vcc
+; GFX942-NEXT:    v_min_f32_e64 v3, v2, |v8|
+; GFX942-NEXT:    v_cmp_o_f32_e64 vcc, v2, |v8|
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v10, v3, vcc
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fminimum3_v3f32__fabs_all:
 ; GFX950:       ; %bb.0:
@@ -1085,35 +1085,35 @@ define <3 x float> @v_fminimum3_v3f32__fneg_all(<3 x float> %a, <3 x float> %b,
 ; GFX12-NEXT:    v_minimum3_f32 v2, -v2, -v5, -v8
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: v_fminimum3_v3f32__fneg_all:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_min_f32_e64 v9, -v2, -v5
-; GFX940-NEXT:    v_mov_b32_e32 v10, 0x7fc00000
-; GFX940-NEXT:    v_cmp_o_f32_e64 vcc, -v2, -v5
-; GFX940-NEXT:    v_min_f32_e64 v5, -v1, -v4
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cndmask_b32_e32 v2, v10, v9, vcc
-; GFX940-NEXT:    v_cmp_o_f32_e64 vcc, -v1, -v4
-; GFX940-NEXT:    v_min_f32_e64 v4, -v0, -v3
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cndmask_b32_e32 v1, v10, v5, vcc
-; GFX940-NEXT:    v_cmp_o_f32_e64 vcc, -v0, -v3
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v10, v4, vcc
-; GFX940-NEXT:    v_min_f32_e64 v3, v0, -v6
-; GFX940-NEXT:    v_cmp_o_f32_e64 vcc, v0, -v6
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v10, v3, vcc
-; GFX940-NEXT:    v_min_f32_e64 v3, v1, -v7
-; GFX940-NEXT:    v_cmp_o_f32_e64 vcc, v1, -v7
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v1, v10, v3, vcc
-; GFX940-NEXT:    v_min_f32_e64 v3, v2, -v8
-; GFX940-NEXT:    v_cmp_o_f32_e64 vcc, v2, -v8
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v2, v10, v3, vcc
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: v_fminimum3_v3f32__fneg_all:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_min_f32_e64 v9, -v2, -v5
+; GFX942-NEXT:    v_mov_b32_e32 v10, 0x7fc00000
+; GFX942-NEXT:    v_cmp_o_f32_e64 vcc, -v2, -v5
+; GFX942-NEXT:    v_min_f32_e64 v5, -v1, -v4
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v10, v9, vcc
+; GFX942-NEXT:    v_cmp_o_f32_e64 vcc, -v1, -v4
+; GFX942-NEXT:    v_min_f32_e64 v4, -v0, -v3
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v10, v5, vcc
+; GFX942-NEXT:    v_cmp_o_f32_e64 vcc, -v0, -v3
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v10, v4, vcc
+; GFX942-NEXT:    v_min_f32_e64 v3, v0, -v6
+; GFX942-NEXT:    v_cmp_o_f32_e64 vcc, v0, -v6
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v10, v3, vcc
+; GFX942-NEXT:    v_min_f32_e64 v3, v1, -v7
+; GFX942-NEXT:    v_cmp_o_f32_e64 vcc, v1, -v7
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v10, v3, vcc
+; GFX942-NEXT:    v_min_f32_e64 v3, v2, -v8
+; GFX942-NEXT:    v_cmp_o_f32_e64 vcc, v2, -v8
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v10, v3, vcc
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fminimum3_v3f32__fneg_all:
 ; GFX950:       ; %bb.0:
@@ -1143,35 +1143,35 @@ define <3 x float> @v_fminimum3_v3f32__inlineimm1(<3 x float> %a, <3 x float> %c
 ; GFX12-NEXT:    v_minimum3_f32 v2, v2, 2.0, v5
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: v_fminimum3_v3f32__inlineimm1:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_min_f32_e32 v6, 2.0, v2
-; GFX940-NEXT:    v_mov_b32_e32 v7, 0x7fc00000
-; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v2, v2
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v2, v7, v6, vcc
-; GFX940-NEXT:    v_min_f32_e32 v6, 2.0, v1
-; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v1, v1
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v1, v7, v6, vcc
-; GFX940-NEXT:    v_min_f32_e32 v6, 2.0, v0
-; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v0, v0
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v7, v6, vcc
-; GFX940-NEXT:    v_min_f32_e32 v6, v0, v3
-; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v0, v3
-; GFX940-NEXT:    v_min_f32_e32 v3, v1, v4
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v7, v6, vcc
-; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v1, v4
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v1, v7, v3, vcc
-; GFX940-NEXT:    v_min_f32_e32 v3, v2, v5
-; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v2, v5
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v2, v7, v3, vcc
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: v_fminimum3_v3f32__inlineimm1:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_min_f32_e32 v6, 2.0, v2
+; GFX942-NEXT:    v_mov_b32_e32 v7, 0x7fc00000
+; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v2, v2
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v7, v6, vcc
+; GFX942-NEXT:    v_min_f32_e32 v6, 2.0, v1
+; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v1, v1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v7, v6, vcc
+; GFX942-NEXT:    v_min_f32_e32 v6, 2.0, v0
+; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v7, v6, vcc
+; GFX942-NEXT:    v_min_f32_e32 v6, v0, v3
+; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v0, v3
+; GFX942-NEXT:    v_min_f32_e32 v3, v1, v4
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v7, v6, vcc
+; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v1, v4
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v7, v3, vcc
+; GFX942-NEXT:    v_min_f32_e32 v3, v2, v5
+; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v2, v5
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v7, v3, vcc
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fminimum3_v3f32__inlineimm1:
 ; GFX950:       ; %bb.0:
@@ -1198,35 +1198,35 @@ define <3 x float> @v_fminimum3_v3f32__inlineimm2(<3 x float> %a, <3 x float> %b
 ; GFX12-NEXT:    v_minimum3_f32 v2, v2, v5, 4.0
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: v_fminimum3_v3f32__inlineimm2:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_min_f32_e32 v6, v2, v5
-; GFX940-NEXT:    v_mov_b32_e32 v7, 0x7fc00000
-; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v2, v5
-; GFX940-NEXT:    v_min_f32_e32 v5, v1, v4
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cndmask_b32_e32 v2, v7, v6, vcc
-; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v1, v4
-; GFX940-NEXT:    v_min_f32_e32 v4, v0, v3
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cndmask_b32_e32 v1, v7, v5, vcc
-; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v0, v3
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v7, v4, vcc
-; GFX940-NEXT:    v_min_f32_e32 v3, 4.0, v0
-; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v0, v0
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v7, v3, vcc
-; GFX940-NEXT:    v_min_f32_e32 v3, 4.0, v1
-; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v1, v1
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v1, v7, v3, vcc
-; GFX940-NEXT:    v_min_f32_e32 v3, 4.0, v2
-; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v2, v2
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v2, v7, v3, vcc
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: v_fminimum3_v3f32__inlineimm2:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_min_f32_e32 v6, v2, v5
+; GFX942-NEXT:    v_mov_b32_e32 v7, 0x7fc00000
+; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v2, v5
+; GFX942-NEXT:    v_min_f32_e32 v5, v1, v4
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v7, v6, vcc
+; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v1, v4
+; GFX942-NEXT:    v_min_f32_e32 v4, v0, v3
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v7, v5, vcc
+; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v0, v3
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v7, v4, vcc
+; GFX942-NEXT:    v_min_f32_e32 v3, 4.0, v0
+; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v7, v3, vcc
+; GFX942-NEXT:    v_min_f32_e32 v3, 4.0, v1
+; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v1, v1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v7, v3, vcc
+; GFX942-NEXT:    v_min_f32_e32 v3, 4.0, v2
+; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v2, v2
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v7, v3, vcc
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fminimum3_v3f32__inlineimm2:
 ; GFX950:       ; %bb.0:
@@ -1774,30 +1774,30 @@ define <2 x half> @v_fminimum3_v2f16(<2 x half> %a, <2 x half> %b, <2 x half> %c
 ; GFX12-NEXT:    v_pk_minimum_f16 v0, v2, v0
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: v_fminimum3_v2f16:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_pk_min_f16 v3, v0, v1
-; GFX940-NEXT:    v_mov_b32_e32 v4, 0x7e00
-; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
-; GFX940-NEXT:    s_mov_b32 s0, 0x5040100
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cndmask_b32_e32 v5, v4, v3, vcc
-; GFX940-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
-; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX940-NEXT:    v_perm_b32 v1, v0, v5, s0
-; GFX940-NEXT:    v_pk_min_f16 v1, v2, v1
-; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v2, v5
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v3, v4, v1, vcc
-; GFX940-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
-; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v2, v0 src0_sel:WORD_1 src1_sel:DWORD
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
-; GFX940-NEXT:    v_perm_b32 v0, v0, v3, s0
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: v_fminimum3_v2f16:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_pk_min_f16 v3, v0, v1
+; GFX942-NEXT:    v_mov_b32_e32 v4, 0x7e00
+; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
+; GFX942-NEXT:    s_mov_b32 s0, 0x5040100
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v5, v4, v3, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
+; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
+; GFX942-NEXT:    v_perm_b32 v1, v0, v5, s0
+; GFX942-NEXT:    v_pk_min_f16 v1, v2, v1
+; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v2, v5
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v4, v1, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
+; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v2, v0 src0_sel:WORD_1 src1_sel:DWORD
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
+; GFX942-NEXT:    v_perm_b32 v0, v0, v3, s0
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fminimum3_v2f16:
 ; GFX950:       ; %bb.0:
@@ -1824,30 +1824,30 @@ define <2 x half> @v_fminimum3_v2f16_commute(<2 x half> %a, <2 x half> %b, <2 x
 ; GFX12-NEXT:    v_pk_minimum_f16 v0, v0, v2
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: v_fminimum3_v2f16_commute:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_pk_min_f16 v3, v0, v1
-; GFX940-NEXT:    v_mov_b32_e32 v4, 0x7e00
-; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
-; GFX940-NEXT:    s_mov_b32 s0, 0x5040100
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cndmask_b32_e32 v5, v4, v3, vcc
-; GFX940-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
-; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX940-NEXT:    v_perm_b32 v1, v0, v5, s0
-; GFX940-NEXT:    v_pk_min_f16 v1, v1, v2
-; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v5, v2
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v3, v4, v1, vcc
-; GFX940-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
-; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:DWORD src1_sel:WORD_1
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
-; GFX940-NEXT:    v_perm_b32 v0, v0, v3, s0
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: v_fminimum3_v2f16_commute:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_pk_min_f16 v3, v0, v1
+; GFX942-NEXT:    v_mov_b32_e32 v4, 0x7e00
+; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
+; GFX942-NEXT:    s_mov_b32 s0, 0x5040100
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v5, v4, v3, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
+; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
+; GFX942-NEXT:    v_perm_b32 v1, v0, v5, s0
+; GFX942-NEXT:    v_pk_min_f16 v1, v1, v2
+; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v5, v2
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v4, v1, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
+; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:DWORD src1_sel:WORD_1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
+; GFX942-NEXT:    v_perm_b32 v0, v0, v3, s0
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fminimum3_v2f16_commute:
 ; GFX950:       ; %bb.0:
@@ -1877,32 +1877,32 @@ define <2 x half> @v_fminimum3_v2f16__fabs_all(<2 x half> %a, <2 x half> %b, <2
 ; GFX12-NEXT:    v_pk_minimum_f16 v0, v0, v2
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: v_fminimum3_v2f16__fabs_all:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_and_b32_e32 v3, 0x7fff7fff, v0
-; GFX940-NEXT:    v_and_b32_e32 v4, 0x7fff7fff, v1
-; GFX940-NEXT:    v_pk_min_f16 v3, v3, v4
-; GFX940-NEXT:    v_mov_b32_e32 v6, 0x7e00
-; GFX940-NEXT:    v_lshrrev_b32_e32 v4, 16, v3
-; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, |v0|, |v1| src0_sel:WORD_1 src1_sel:WORD_1
-; GFX940-NEXT:    s_mov_b32 s0, 0x5040100
-; GFX940-NEXT:    v_and_b32_e32 v5, 0x7fff7fff, v2
-; GFX940-NEXT:    v_cndmask_b32_e32 v4, v6, v4, vcc
-; GFX940-NEXT:    v_cmp_o_f16_e64 vcc, |v0|, |v1|
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v6, v3, vcc
-; GFX940-NEXT:    v_perm_b32 v1, v4, v0, s0
-; GFX940-NEXT:    v_pk_min_f16 v1, v1, v5
-; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v4, |v2| src0_sel:DWORD src1_sel:WORD_1
-; GFX940-NEXT:    v_lshrrev_b32_e32 v3, 16, v1
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cndmask_b32_e32 v3, v6, v3, vcc
-; GFX940-NEXT:    v_cmp_o_f16_e64 vcc, v0, |v2|
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v6, v1, vcc
-; GFX940-NEXT:    v_perm_b32 v0, v3, v0, s0
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: v_fminimum3_v2f16__fabs_all:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_and_b32_e32 v3, 0x7fff7fff, v0
+; GFX942-NEXT:    v_and_b32_e32 v4, 0x7fff7fff, v1
+; GFX942-NEXT:    v_pk_min_f16 v3, v3, v4
+; GFX942-NEXT:    v_mov_b32_e32 v6, 0x7e00
+; GFX942-NEXT:    v_lshrrev_b32_e32 v4, 16, v3
+; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, |v0|, |v1| src0_sel:WORD_1 src1_sel:WORD_1
+; GFX942-NEXT:    s_mov_b32 s0, 0x5040100
+; GFX942-NEXT:    v_and_b32_e32 v5, 0x7fff7fff, v2
+; GFX942-NEXT:    v_cndmask_b32_e32 v4, v6, v4, vcc
+; GFX942-NEXT:    v_cmp_o_f16_e64 vcc, |v0|, |v1|
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v6, v3, vcc
+; GFX942-NEXT:    v_perm_b32 v1, v4, v0, s0
+; GFX942-NEXT:    v_pk_min_f16 v1, v1, v5
+; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v4, |v2| src0_sel:DWORD src1_sel:WORD_1
+; GFX942-NEXT:    v_lshrrev_b32_e32 v3, 16, v1
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v6, v3, vcc
+; GFX942-NEXT:    v_cmp_o_f16_e64 vcc, v0, |v2|
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v6, v1, vcc
+; GFX942-NEXT:    v_perm_b32 v0, v3, v0, s0
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fminimum3_v2f16__fabs_all:
 ; GFX950:       ; %bb.0:
@@ -1935,30 +1935,30 @@ define <2 x half> @v_fminimum3_v2f16__fneg_all(<2 x half> %a, <2 x half> %b, <2
 ; GFX12-NEXT:    v_pk_minimum_f16 v0, v0, v2 neg_lo:[0,1] neg_hi:[0,1]
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: v_fminimum3_v2f16__fneg_all:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_pk_min_f16 v3, v0, v1 neg_lo:[1,1] neg_hi:[1,1]
-; GFX940-NEXT:    v_mov_b32_e32 v4, 0x7e00
-; GFX940-NEXT:    v_cmp_o_f16_e64 vcc, -v0, -v1
-; GFX940-NEXT:    s_mov_b32 s0, 0x5040100
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cndmask_b32_e32 v5, v4, v3, vcc
-; GFX940-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
-; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, -v0, -v1 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX940-NEXT:    v_perm_b32 v1, v0, v5, s0
-; GFX940-NEXT:    v_pk_min_f16 v1, v1, v2 neg_lo:[0,1] neg_hi:[0,1]
-; GFX940-NEXT:    v_cmp_o_f16_e64 vcc, v5, -v2
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v3, v4, v1, vcc
-; GFX940-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
-; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v0, -v2 src0_sel:DWORD src1_sel:WORD_1
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
-; GFX940-NEXT:    v_perm_b32 v0, v0, v3, s0
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: v_fminimum3_v2f16__fneg_all:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_pk_min_f16 v3, v0, v1 neg_lo:[1,1] neg_hi:[1,1]
+; GFX942-NEXT:    v_mov_b32_e32 v4, 0x7e00
+; GFX942-NEXT:    v_cmp_o_f16_e64 vcc, -v0, -v1
+; GFX942-NEXT:    s_mov_b32 s0, 0x5040100
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v5, v4, v3, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
+; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, -v0, -v1 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
+; GFX942-NEXT:    v_perm_b32 v1, v0, v5, s0
+; GFX942-NEXT:    v_pk_min_f16 v1, v1, v2 neg_lo:[0,1] neg_hi:[0,1]
+; GFX942-NEXT:    v_cmp_o_f16_e64 vcc, v5, -v2
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v4, v1, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
+; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v0, -v2 src0_sel:DWORD src1_sel:WORD_1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
+; GFX942-NEXT:    v_perm_b32 v0, v0, v3, s0
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fminimum3_v2f16__fneg_all:
 ; GFX950:       ; %bb.0:
@@ -1988,30 +1988,30 @@ define <2 x half> @v_fminimum3_v2f16__inlineimm1(<2 x half> %a, <2 x half> %c) {
 ; GFX12-NEXT:    v_pk_minimum_f16 v0, v0, v1
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: v_fminimum3_v2f16__inlineimm1:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_pk_min_f16 v2, v0, 2.0 op_sel_hi:[1,0]
-; GFX940-NEXT:    v_mov_b32_e32 v4, 0x7e00
-; GFX940-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
-; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v0 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX940-NEXT:    s_mov_b32 s0, 0x5040100
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cndmask_b32_e32 v3, v4, v3, vcc
-; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v0, v0
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v4, v2, vcc
-; GFX940-NEXT:    v_perm_b32 v2, v3, v0, s0
-; GFX940-NEXT:    v_pk_min_f16 v2, v2, v1
-; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v3, v1 src0_sel:DWORD src1_sel:WORD_1
-; GFX940-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cndmask_b32_e32 v3, v4, v5, vcc
-; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v4, v2, vcc
-; GFX940-NEXT:    v_perm_b32 v0, v3, v0, s0
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: v_fminimum3_v2f16__inlineimm1:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_pk_min_f16 v2, v0, 2.0 op_sel_hi:[1,0]
+; GFX942-NEXT:    v_mov_b32_e32 v4, 0x7e00
+; GFX942-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
+; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v0 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX942-NEXT:    s_mov_b32 s0, 0x5040100
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v4, v3, vcc
+; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v2, vcc
+; GFX942-NEXT:    v_perm_b32 v2, v3, v0, s0
+; GFX942-NEXT:    v_pk_min_f16 v2, v2, v1
+; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v3, v1 src0_sel:DWORD src1_sel:WORD_1
+; GFX942-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v4, v5, vcc
+; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v2, vcc
+; GFX942-NEXT:    v_perm_b32 v0, v3, v0, s0
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fminimum3_v2f16__inlineimm1:
 ; GFX950:       ; %bb.0:
@@ -2038,30 +2038,30 @@ define <2 x half> @v_fminimum3_v2f16__inlineimm2(<2 x half> %a, <2 x half> %b) {
 ; GFX12-NEXT:    v_pk_minimum_f16 v0, v0, 4.0 op_sel_hi:[1,0]
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: v_fminimum3_v2f16__inlineimm2:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_pk_min_f16 v2, v0, v1
-; GFX940-NEXT:    v_mov_b32_e32 v3, 0x7e00
-; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
-; GFX940-NEXT:    s_mov_b32 s0, 0x5040100
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cndmask_b32_e32 v4, v3, v2, vcc
-; GFX940-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
-; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX940-NEXT:    v_perm_b32 v1, v0, v4, s0
-; GFX940-NEXT:    v_pk_min_f16 v1, v1, 4.0 op_sel_hi:[1,0]
-; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v4, v4
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v2, v3, v1, vcc
-; GFX940-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
-; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v0, v0
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v3, v1, vcc
-; GFX940-NEXT:    v_perm_b32 v0, v0, v2, s0
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: v_fminimum3_v2f16__inlineimm2:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_pk_min_f16 v2, v0, v1
+; GFX942-NEXT:    v_mov_b32_e32 v3, 0x7e00
+; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
+; GFX942-NEXT:    s_mov_b32 s0, 0x5040100
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v4, v3, v2, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
+; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX942-NEXT:    v_perm_b32 v1, v0, v4, s0
+; GFX942-NEXT:    v_pk_min_f16 v1, v1, 4.0 op_sel_hi:[1,0]
+; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v4, v4
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v3, v1, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
+; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v3, v1, vcc
+; GFX942-NEXT:    v_perm_b32 v0, v0, v2, s0
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fminimum3_v2f16__inlineimm2:
 ; GFX950:       ; %bb.0:
@@ -2090,42 +2090,42 @@ define <3 x half> @v_fminimum3_v3f16(<3 x half> %a, <3 x half> %b, <3 x half> %c
 ; GFX12-NEXT:    v_pk_minimum_f16 v1, v5, v1
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: v_fminimum3_v3f16:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_pk_min_f16 v6, v0, v2
-; GFX940-NEXT:    v_mov_b32_e32 v7, 0x7e00
-; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
-; GFX940-NEXT:    s_mov_b32 s0, 0x5040100
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cndmask_b32_e32 v8, v7, v6, vcc
-; GFX940-NEXT:    v_lshrrev_b32_e32 v6, 16, v6
-; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX940-NEXT:    v_pk_min_f16 v2, v1, v3
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v7, v6, vcc
-; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v1, v3
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v6, v7, v2, vcc
-; GFX940-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
-; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v1, v7, v2, vcc
-; GFX940-NEXT:    v_perm_b32 v1, v1, v6, s0
-; GFX940-NEXT:    v_pk_min_f16 v1, v5, v1
-; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v5, v6
-; GFX940-NEXT:    v_perm_b32 v2, v0, v8, s0
-; GFX940-NEXT:    v_pk_min_f16 v2, v4, v2
-; GFX940-NEXT:    v_cndmask_b32_e32 v1, v7, v1, vcc
-; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v4, v8
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v3, v7, v2, vcc
-; GFX940-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
-; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v4, v0 src0_sel:WORD_1 src1_sel:DWORD
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
-; GFX940-NEXT:    v_perm_b32 v0, v0, v3, s0
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: v_fminimum3_v3f16:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_pk_min_f16 v6, v0, v2
+; GFX942-NEXT:    v_mov_b32_e32 v7, 0x7e00
+; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
+; GFX942-NEXT:    s_mov_b32 s0, 0x5040100
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v8, v7, v6, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v6, 16, v6
+; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX942-NEXT:    v_pk_min_f16 v2, v1, v3
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v7, v6, vcc
+; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v1, v3
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v6, v7, v2, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
+; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v7, v2, vcc
+; GFX942-NEXT:    v_perm_b32 v1, v1, v6, s0
+; GFX942-NEXT:    v_pk_min_f16 v1, v5, v1
+; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v5, v6
+; GFX942-NEXT:    v_perm_b32 v2, v0, v8, s0
+; GFX942-NEXT:    v_pk_min_f16 v2, v4, v2
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v7, v1, vcc
+; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v4, v8
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v7, v2, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
+; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v4, v0 src0_sel:WORD_1 src1_sel:DWORD
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
+; GFX942-NEXT:    v_perm_b32 v0, v0, v3, s0
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fminimum3_v3f16:
 ; GFX950:       ; %bb.0:
@@ -2155,42 +2155,42 @@ define <3 x half> @v_fminimum3_v3f16_commute(<3 x half> %a, <3 x half> %b, <3 x
 ; GFX12-NEXT:    v_pk_minimum_f16 v1, v1, v5
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: v_fminimum3_v3f16_commute:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_pk_min_f16 v6, v0, v2
-; GFX940-NEXT:    v_mov_b32_e32 v7, 0x7e00
-; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
-; GFX940-NEXT:    s_mov_b32 s0, 0x5040100
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cndmask_b32_e32 v8, v7, v6, vcc
-; GFX940-NEXT:    v_lshrrev_b32_e32 v6, 16, v6
-; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX940-NEXT:    v_pk_min_f16 v2, v1, v3
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v7, v6, vcc
-; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v1, v3
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v6, v7, v2, vcc
-; GFX940-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
-; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v1, v7, v2, vcc
-; GFX940-NEXT:    v_perm_b32 v1, v1, v6, s0
-; GFX940-NEXT:    v_pk_min_f16 v1, v1, v5
-; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v6, v5
-; GFX940-NEXT:    v_perm_b32 v2, v0, v8, s0
-; GFX940-NEXT:    v_pk_min_f16 v2, v2, v4
-; GFX940-NEXT:    v_cndmask_b32_e32 v1, v7, v1, vcc
-; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v8, v4
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v3, v7, v2, vcc
-; GFX940-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
-; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v4 src0_sel:DWORD src1_sel:WORD_1
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
-; GFX940-NEXT:    v_perm_b32 v0, v0, v3, s0
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: v_fminimum3_v3f16_commute:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_pk_min_f16 v6, v0, v2
+; GFX942-NEXT:    v_mov_b32_e32 v7, 0x7e00
+; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
+; GFX942-NEXT:    s_mov_b32 s0, 0x5040100
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v8, v7, v6, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v6, 16, v6
+; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX942-NEXT:    v_pk_min_f16 v2, v1, v3
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v7, v6, vcc
+; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v1, v3
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v6, v7, v2, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
+; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v7, v2, vcc
+; GFX942-NEXT:    v_perm_b32 v1, v1, v6, s0
+; GFX942-NEXT:    v_pk_min_f16 v1, v1, v5
+; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v6, v5
+; GFX942-NEXT:    v_perm_b32 v2, v0, v8, s0
+; GFX942-NEXT:    v_pk_min_f16 v2, v2, v4
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v7, v1, vcc
+; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v8, v4
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v7, v2, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
+; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v4 src0_sel:DWORD src1_sel:WORD_1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
+; GFX942-NEXT:    v_perm_b32 v0, v0, v3, s0
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fminimum3_v3f16_commute:
 ; GFX950:       ; %bb.0:
@@ -2227,46 +2227,46 @@ define <3 x half> @v_fminimum3_v3f16__fabs_all(<3 x half> %a, <3 x half> %b, <3
 ; GFX12-NEXT:    v_pk_minimum_f16 v1, v1, v5
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: v_fminimum3_v3f16__fabs_all:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_and_b32_e32 v7, 0x7fff7fff, v1
-; GFX940-NEXT:    v_and_b32_e32 v9, 0x7fff7fff, v3
-; GFX940-NEXT:    v_and_b32_e32 v6, 0x7fff7fff, v0
-; GFX940-NEXT:    v_and_b32_e32 v8, 0x7fff7fff, v2
-; GFX940-NEXT:    v_pk_min_f16 v7, v7, v9
-; GFX940-NEXT:    v_mov_b32_e32 v12, 0x7e00
-; GFX940-NEXT:    v_lshrrev_b32_e32 v9, 16, v7
-; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, |v1|, |v3| src0_sel:WORD_1 src1_sel:WORD_1
-; GFX940-NEXT:    v_pk_min_f16 v6, v6, v8
-; GFX940-NEXT:    s_mov_b32 s0, 0x5040100
-; GFX940-NEXT:    v_cndmask_b32_e32 v9, v12, v9, vcc
-; GFX940-NEXT:    v_lshrrev_b32_e32 v8, 16, v6
-; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, |v0|, |v2| src0_sel:WORD_1 src1_sel:WORD_1
-; GFX940-NEXT:    v_and_b32_e32 v11, 0x7fff7fff, v4
-; GFX940-NEXT:    v_and_b32_e32 v10, 0x7fff7fff, v5
-; GFX940-NEXT:    v_cndmask_b32_e32 v8, v12, v8, vcc
-; GFX940-NEXT:    v_cmp_o_f16_e64 vcc, |v1|, |v3|
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v1, v12, v7, vcc
-; GFX940-NEXT:    v_cmp_o_f16_e64 vcc, |v0|, |v2|
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v12, v6, vcc
-; GFX940-NEXT:    v_perm_b32 v2, v8, v0, s0
-; GFX940-NEXT:    v_pk_min_f16 v2, v2, v11
-; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v8, |v4| src0_sel:DWORD src1_sel:WORD_1
-; GFX940-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
-; GFX940-NEXT:    v_perm_b32 v6, v9, v1, s0
-; GFX940-NEXT:    v_cndmask_b32_e32 v3, v12, v3, vcc
-; GFX940-NEXT:    v_pk_min_f16 v6, v6, v10
-; GFX940-NEXT:    v_cmp_o_f16_e64 vcc, v1, |v5|
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v1, v12, v6, vcc
-; GFX940-NEXT:    v_cmp_o_f16_e64 vcc, v0, |v4|
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v12, v2, vcc
-; GFX940-NEXT:    v_perm_b32 v0, v3, v0, s0
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: v_fminimum3_v3f16__fabs_all:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_and_b32_e32 v7, 0x7fff7fff, v1
+; GFX942-NEXT:    v_and_b32_e32 v9, 0x7fff7fff, v3
+; GFX942-NEXT:    v_and_b32_e32 v6, 0x7fff7fff, v0
+; GFX942-NEXT:    v_and_b32_e32 v8, 0x7fff7fff, v2
+; GFX942-NEXT:    v_pk_min_f16 v7, v7, v9
+; GFX942-NEXT:    v_mov_b32_e32 v12, 0x7e00
+; GFX942-NEXT:    v_lshrrev_b32_e32 v9, 16, v7
+; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, |v1|, |v3| src0_sel:WORD_1 src1_sel:WORD_1
+; GFX942-NEXT:    v_pk_min_f16 v6, v6, v8
+; GFX942-NEXT:    s_mov_b32 s0, 0x5040100
+; GFX942-NEXT:    v_cndmask_b32_e32 v9, v12, v9, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v8, 16, v6
+; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, |v0|, |v2| src0_sel:WORD_1 src1_sel:WORD_1
+; GFX942-NEXT:    v_and_b32_e32 v11, 0x7fff7fff, v4
+; GFX942-NEXT:    v_and_b32_e32 v10, 0x7fff7fff, v5
+; GFX942-NEXT:    v_cndmask_b32_e32 v8, v12, v8, vcc
+; GFX942-NEXT:    v_cmp_o_f16_e64 vcc, |v1|, |v3|
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v12, v7, vcc
+; GFX942-NEXT:    v_cmp_o_f16_e64 vcc, |v0|, |v2|
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v12, v6, vcc
+; GFX942-NEXT:    v_perm_b32 v2, v8, v0, s0
+; GFX942-NEXT:    v_pk_min_f16 v2, v2, v11
+; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v8, |v4| src0_sel:DWORD src1_sel:WORD_1
+; GFX942-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
+; GFX942-NEXT:    v_perm_b32 v6, v9, v1, s0
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v12, v3, vcc
+; GFX942-NEXT:    v_pk_min_f16 v6, v6, v10
+; GFX942-NEXT:    v_cmp_o_f16_e64 vcc, v1, |v5|
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v12, v6, vcc
+; GFX942-NEXT:    v_cmp_o_f16_e64 vcc, v0, |v4|
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v12, v2, vcc
+; GFX942-NEXT:    v_perm_b32 v0, v3, v0, s0
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fminimum3_v3f16__fabs_all:
 ; GFX950:       ; %bb.0:
@@ -2305,42 +2305,42 @@ define <3 x half> @v_fminimum3_v3f16__fneg_all(<3 x half> %a, <3 x half> %b, <3
 ; GFX12-NEXT:    v_pk_minimum_f16 v1, v1, v5 neg_lo:[0,1] neg_hi:[0,1]
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: v_fminimum3_v3f16__fneg_all:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_pk_min_f16 v6, v0, v2 neg_lo:[1,1] neg_hi:[1,1]
-; GFX940-NEXT:    v_mov_b32_e32 v7, 0x7e00
-; GFX940-NEXT:    v_cmp_o_f16_e64 vcc, -v0, -v2
-; GFX940-NEXT:    s_mov_b32 s0, 0x5040100
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cndmask_b32_e32 v8, v7, v6, vcc
-; GFX940-NEXT:    v_lshrrev_b32_e32 v6, 16, v6
-; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, -v0, -v2 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX940-NEXT:    v_pk_min_f16 v2, v1, v3 neg_lo:[1,1] neg_hi:[1,1]
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v7, v6, vcc
-; GFX940-NEXT:    v_cmp_o_f16_e64 vcc, -v1, -v3
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v6, v7, v2, vcc
-; GFX940-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
-; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, -v1, -v3 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v1, v7, v2, vcc
-; GFX940-NEXT:    v_perm_b32 v1, v1, v6, s0
-; GFX940-NEXT:    v_pk_min_f16 v1, v1, v5 neg_lo:[0,1] neg_hi:[0,1]
-; GFX940-NEXT:    v_cmp_o_f16_e64 vcc, v6, -v5
-; GFX940-NEXT:    v_perm_b32 v2, v0, v8, s0
-; GFX940-NEXT:    v_pk_min_f16 v2, v2, v4 neg_lo:[0,1] neg_hi:[0,1]
-; GFX940-NEXT:    v_cndmask_b32_e32 v1, v7, v1, vcc
-; GFX940-NEXT:    v_cmp_o_f16_e64 vcc, v8, -v4
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v3, v7, v2, vcc
-; GFX940-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
-; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v0, -v4 src0_sel:DWORD src1_sel:WORD_1
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
-; GFX940-NEXT:    v_perm_b32 v0, v0, v3, s0
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: v_fminimum3_v3f16__fneg_all:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_pk_min_f16 v6, v0, v2 neg_lo:[1,1] neg_hi:[1,1]
+; GFX942-NEXT:    v_mov_b32_e32 v7, 0x7e00
+; GFX942-NEXT:    v_cmp_o_f16_e64 vcc, -v0, -v2
+; GFX942-NEXT:    s_mov_b32 s0, 0x5040100
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v8, v7, v6, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v6, 16, v6
+; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, -v0, -v2 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX942-NEXT:    v_pk_min_f16 v2, v1, v3 neg_lo:[1,1] neg_hi:[1,1]
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v7, v6, vcc
+; GFX942-NEXT:    v_cmp_o_f16_e64 vcc, -v1, -v3
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v6, v7, v2, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
+; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, -v1, -v3 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v7, v2, vcc
+; GFX942-NEXT:    v_perm_b32 v1, v1, v6, s0
+; GFX942-NEXT:    v_pk_min_f16 v1, v1, v5 neg_lo:[0,1] neg_hi:[0,1]
+; GFX942-NEXT:    v_cmp_o_f16_e64 vcc, v6, -v5
+; GFX942-NEXT:    v_perm_b32 v2, v0, v8, s0
+; GFX942-NEXT:    v_pk_min_f16 v2, v2, v4 neg_lo:[0,1] neg_hi:[0,1]
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v7, v1, vcc
+; GFX942-NEXT:    v_cmp_o_f16_e64 vcc, v8, -v4
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v7, v2, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
+; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v0, -v4 src0_sel:DWORD src1_sel:WORD_1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
+; GFX942-NEXT:    v_perm_b32 v0, v0, v3, s0
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fminimum3_v3f16__fneg_all:
 ; GFX950:       ; %bb.0:
@@ -2373,39 +2373,39 @@ define <3 x half> @v_fminimum3_v3f16__inlineimm1(<3 x half> %a, <3 x half> %c) {
 ; GFX12-NEXT:    v_pk_minimum_f16 v1, v1, v3
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: v_fminimum3_v3f16__inlineimm1:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_pk_min_f16 v4, v0, 2.0 op_sel_hi:[1,0]
-; GFX940-NEXT:    v_mov_b32_e32 v6, 0x7e00
-; GFX940-NEXT:    v_lshrrev_b32_e32 v5, 16, v4
-; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v0 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX940-NEXT:    v_pk_min_f16 v7, v1, 2.0
-; GFX940-NEXT:    s_mov_b32 s1, 0x5040100
-; GFX940-NEXT:    v_cndmask_b32_e32 v5, v6, v5, vcc
-; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v1, v1
-; GFX940-NEXT:    s_movk_i32 s0, 0x7e00
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cndmask_b32_e32 v1, v6, v7, vcc
-; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v0, v0
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v6, v4, vcc
-; GFX940-NEXT:    v_perm_b32 v4, v5, v0, s1
-; GFX940-NEXT:    v_pk_min_f16 v4, v4, v2
-; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v5, v2 src0_sel:DWORD src1_sel:WORD_1
-; GFX940-NEXT:    v_lshrrev_b32_e32 v7, 16, v4
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cndmask_b32_e32 v5, v6, v7, vcc
-; GFX940-NEXT:    v_pack_b32_f16 v7, v1, s0
-; GFX940-NEXT:    v_pk_min_f16 v7, v7, v3
-; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v1, v3
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v1, v6, v7, vcc
-; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v6, v4, vcc
-; GFX940-NEXT:    v_perm_b32 v0, v5, v0, s1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: v_fminimum3_v3f16__inlineimm1:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_pk_min_f16 v4, v0, 2.0 op_sel_hi:[1,0]
+; GFX942-NEXT:    v_mov_b32_e32 v6, 0x7e00
+; GFX942-NEXT:    v_lshrrev_b32_e32 v5, 16, v4
+; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v0 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX942-NEXT:    v_pk_min_f16 v7, v1, 2.0
+; GFX942-NEXT:    s_mov_b32 s1, 0x5040100
+; GFX942-NEXT:    v_cndmask_b32_e32 v5, v6, v5, vcc
+; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v1, v1
+; GFX942-NEXT:    s_movk_i32 s0, 0x7e00
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v6, v7, vcc
+; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v6, v4, vcc
+; GFX942-NEXT:    v_perm_b32 v4, v5, v0, s1
+; GFX942-NEXT:    v_pk_min_f16 v4, v4, v2
+; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v5, v2 src0_sel:DWORD src1_sel:WORD_1
+; GFX942-NEXT:    v_lshrrev_b32_e32 v7, 16, v4
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v5, v6, v7, vcc
+; GFX942-NEXT:    v_pack_b32_f16 v7, v1, s0
+; GFX942-NEXT:    v_pk_min_f16 v7, v7, v3
+; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v1, v3
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v6, v7, vcc
+; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v6, v4, vcc
+; GFX942-NEXT:    v_perm_b32 v0, v5, v0, s1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fminimum3_v3f16__inlineimm1:
 ; GFX950:       ; %bb.0:
@@ -2435,42 +2435,42 @@ define <3 x half> @v_fminimum3_v3f16__inlineimm2(<3 x half> %a, <3 x half> %b) {
 ; GFX12-NEXT:    v_pk_minimum_f16 v1, v1, 4.0
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: v_fminimum3_v3f16__inlineimm2:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_pk_min_f16 v4, v0, v2
-; GFX940-NEXT:    v_mov_b32_e32 v5, 0x7e00
-; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
-; GFX940-NEXT:    s_mov_b32 s0, 0x5040100
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cndmask_b32_e32 v6, v5, v4, vcc
-; GFX940-NEXT:    v_lshrrev_b32_e32 v4, 16, v4
-; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX940-NEXT:    v_pk_min_f16 v2, v1, v3
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v5, v4, vcc
-; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v1, v3
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v4, v5, v2, vcc
-; GFX940-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
-; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v1, v5, v2, vcc
-; GFX940-NEXT:    v_perm_b32 v1, v1, v4, s0
-; GFX940-NEXT:    v_pk_min_f16 v1, v1, 4.0
-; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v4, v4
-; GFX940-NEXT:    v_perm_b32 v2, v0, v6, s0
-; GFX940-NEXT:    v_pk_min_f16 v2, v2, 4.0 op_sel_hi:[1,0]
-; GFX940-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
-; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v6, v6
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v3, v5, v2, vcc
-; GFX940-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
-; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v0, v0
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v5, v2, vcc
-; GFX940-NEXT:    v_perm_b32 v0, v0, v3, s0
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: v_fminimum3_v3f16__inlineimm2:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_pk_min_f16 v4, v0, v2
+; GFX942-NEXT:    v_mov_b32_e32 v5, 0x7e00
+; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
+; GFX942-NEXT:    s_mov_b32 s0, 0x5040100
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v6, v5, v4, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v4, 16, v4
+; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX942-NEXT:    v_pk_min_f16 v2, v1, v3
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v5, v4, vcc
+; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v1, v3
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v4, v5, v2, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
+; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v5, v2, vcc
+; GFX942-NEXT:    v_perm_b32 v1, v1, v4, s0
+; GFX942-NEXT:    v_pk_min_f16 v1, v1, 4.0
+; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v4, v4
+; GFX942-NEXT:    v_perm_b32 v2, v0, v6, s0
+; GFX942-NEXT:    v_pk_min_f16 v2, v2, 4.0 op_sel_hi:[1,0]
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
+; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v6, v6
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v5, v2, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
+; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v5, v2, vcc
+; GFX942-NEXT:    v_perm_b32 v0, v0, v3, s0
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fminimum3_v3f16__inlineimm2:
 ; GFX950:       ; %bb.0:
@@ -2500,48 +2500,48 @@ define <4 x half> @v_fminimum3_v4f16(<4 x half> %a, <4 x half> %b, <4 x half> %c
 ; GFX12-NEXT:    v_pk_minimum_f16 v1, v5, v1
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: v_fminimum3_v4f16:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_pk_min_f16 v6, v0, v2
-; GFX940-NEXT:    v_mov_b32_e32 v7, 0x7e00
-; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
-; GFX940-NEXT:    s_mov_b32 s0, 0x5040100
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cndmask_b32_e32 v8, v7, v6, vcc
-; GFX940-NEXT:    v_lshrrev_b32_e32 v6, 16, v6
-; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX940-NEXT:    v_pk_min_f16 v2, v1, v3
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v7, v6, vcc
-; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v1, v3
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v6, v7, v2, vcc
-; GFX940-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
-; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v1, v7, v2, vcc
-; GFX940-NEXT:    v_perm_b32 v2, v1, v6, s0
-; GFX940-NEXT:    v_pk_min_f16 v2, v5, v2
-; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v5, v6
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v3, v7, v2, vcc
-; GFX940-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
-; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v5, v1 src0_sel:WORD_1 src1_sel:DWORD
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v1, v7, v2, vcc
-; GFX940-NEXT:    v_perm_b32 v2, v0, v8, s0
-; GFX940-NEXT:    v_pk_min_f16 v2, v4, v2
-; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v4, v8
-; GFX940-NEXT:    v_perm_b32 v1, v1, v3, s0
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cndmask_b32_e32 v5, v7, v2, vcc
-; GFX940-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
-; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v4, v0 src0_sel:WORD_1 src1_sel:DWORD
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
-; GFX940-NEXT:    v_perm_b32 v0, v0, v5, s0
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: v_fminimum3_v4f16:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_pk_min_f16 v6, v0, v2
+; GFX942-NEXT:    v_mov_b32_e32 v7, 0x7e00
+; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
+; GFX942-NEXT:    s_mov_b32 s0, 0x5040100
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v8, v7, v6, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v6, 16, v6
+; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX942-NEXT:    v_pk_min_f16 v2, v1, v3
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v7, v6, vcc
+; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v1, v3
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v6, v7, v2, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
+; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v7, v2, vcc
+; GFX942-NEXT:    v_perm_b32 v2, v1, v6, s0
+; GFX942-NEXT:    v_pk_min_f16 v2, v5, v2
+; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v5, v6
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v7, v2, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
+; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v5, v1 src0_sel:WORD_1 src1_sel:DWORD
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v7, v2, vcc
+; GFX942-NEXT:    v_perm_b32 v2, v0, v8, s0
+; GFX942-NEXT:    v_pk_min_f16 v2, v4, v2
+; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v4, v8
+; GFX942-NEXT:    v_perm_b32 v1, v1, v3, s0
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v5, v7, v2, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
+; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v4, v0 src0_sel:WORD_1 src1_sel:DWORD
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
+; GFX942-NEXT:    v_perm_b32 v0, v0, v5, s0
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fminimum3_v4f16:
 ; GFX950:       ; %bb.0:
@@ -2571,48 +2571,48 @@ define <4 x half> @v_fminimum3_v4f16_commute(<4 x half> %a, <4 x half> %b, <4 x
 ; GFX12-NEXT:    v_pk_minimum_f16 v1, v1, v5
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: v_fminimum3_v4f16_commute:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_pk_min_f16 v6, v0, v2
-; GFX940-NEXT:    v_mov_b32_e32 v7, 0x7e00
-; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
-; GFX940-NEXT:    s_mov_b32 s0, 0x5040100
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cndmask_b32_e32 v8, v7, v6, vcc
-; GFX940-NEXT:    v_lshrrev_b32_e32 v6, 16, v6
-; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX940-NEXT:    v_pk_min_f16 v2, v1, v3
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v7, v6, vcc
-; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v1, v3
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v6, v7, v2, vcc
-; GFX940-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
-; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v1, v7, v2, vcc
-; GFX940-NEXT:    v_perm_b32 v2, v1, v6, s0
-; GFX940-NEXT:    v_pk_min_f16 v2, v2, v5
-; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v6, v5
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v3, v7, v2, vcc
-; GFX940-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
-; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v1, v5 src0_sel:DWORD src1_sel:WORD_1
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v1, v7, v2, vcc
-; GFX940-NEXT:    v_perm_b32 v2, v0, v8, s0
-; GFX940-NEXT:    v_pk_min_f16 v2, v2, v4
-; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v8, v4
-; GFX940-NEXT:    v_perm_b32 v1, v1, v3, s0
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cndmask_b32_e32 v5, v7, v2, vcc
-; GFX940-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
-; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v4 src0_sel:DWORD src1_sel:WORD_1
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
-; GFX940-NEXT:    v_perm_b32 v0, v0, v5, s0
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: v_fminimum3_v4f16_commute:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_pk_min_f16 v6, v0, v2
+; GFX942-NEXT:    v_mov_b32_e32 v7, 0x7e00
+; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
+; GFX942-NEXT:    s_mov_b32 s0, 0x5040100
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v8, v7, v6, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v6, 16, v6
+; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX942-NEXT:    v_pk_min_f16 v2, v1, v3
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v7, v6, vcc
+; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v1, v3
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v6, v7, v2, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
+; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v7, v2, vcc
+; GFX942-NEXT:    v_perm_b32 v2, v1, v6, s0
+; GFX942-NEXT:    v_pk_min_f16 v2, v2, v5
+; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v6, v5
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v7, v2, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
+; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v1, v5 src0_sel:DWORD src1_sel:WORD_1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v7, v2, vcc
+; GFX942-NEXT:    v_perm_b32 v2, v0, v8, s0
+; GFX942-NEXT:    v_pk_min_f16 v2, v2, v4
+; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v8, v4
+; GFX942-NEXT:    v_perm_b32 v1, v1, v3, s0
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v5, v7, v2, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
+; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v4 src0_sel:DWORD src1_sel:WORD_1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
+; GFX942-NEXT:    v_perm_b32 v0, v0, v5, s0
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fminimum3_v4f16_commute:
 ; GFX950:       ; %bb.0:
@@ -2649,52 +2649,52 @@ define <4 x half> @v_fminimum3_v4f16__fabs_all(<4 x half> %a, <4 x half> %b, <4
 ; GFX12-NEXT:    v_pk_minimum_f16 v1, v1, v5
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: v_fminimum3_v4f16__fabs_all:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_and_b32_e32 v7, 0x7fff7fff, v0
-; GFX940-NEXT:    v_and_b32_e32 v9, 0x7fff7fff, v2
-; GFX940-NEXT:    v_and_b32_e32 v6, 0x7fff7fff, v1
-; GFX940-NEXT:    v_and_b32_e32 v8, 0x7fff7fff, v3
-; GFX940-NEXT:    v_pk_min_f16 v7, v7, v9
-; GFX940-NEXT:    v_mov_b32_e32 v12, 0x7e00
-; GFX940-NEXT:    v_lshrrev_b32_e32 v9, 16, v7
-; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, |v0|, |v2| src0_sel:WORD_1 src1_sel:WORD_1
-; GFX940-NEXT:    v_pk_min_f16 v6, v6, v8
-; GFX940-NEXT:    s_mov_b32 s0, 0x5040100
-; GFX940-NEXT:    v_cndmask_b32_e32 v9, v12, v9, vcc
-; GFX940-NEXT:    v_lshrrev_b32_e32 v8, 16, v6
-; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, |v1|, |v3| src0_sel:WORD_1 src1_sel:WORD_1
-; GFX940-NEXT:    v_and_b32_e32 v11, 0x7fff7fff, v5
-; GFX940-NEXT:    v_and_b32_e32 v10, 0x7fff7fff, v4
-; GFX940-NEXT:    v_cndmask_b32_e32 v8, v12, v8, vcc
-; GFX940-NEXT:    v_cmp_o_f16_e64 vcc, |v0|, |v2|
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v12, v7, vcc
-; GFX940-NEXT:    v_cmp_o_f16_e64 vcc, |v1|, |v3|
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v1, v12, v6, vcc
-; GFX940-NEXT:    v_perm_b32 v2, v8, v1, s0
-; GFX940-NEXT:    v_pk_min_f16 v2, v2, v11
-; GFX940-NEXT:    v_perm_b32 v6, v9, v0, s0
-; GFX940-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
-; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v8, |v5| src0_sel:DWORD src1_sel:WORD_1
-; GFX940-NEXT:    v_pk_min_f16 v6, v6, v10
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cndmask_b32_e32 v3, v12, v3, vcc
-; GFX940-NEXT:    v_lshrrev_b32_e32 v7, 16, v6
-; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v9, |v4| src0_sel:DWORD src1_sel:WORD_1
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v7, v12, v7, vcc
-; GFX940-NEXT:    v_cmp_o_f16_e64 vcc, v1, |v5|
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v1, v12, v2, vcc
-; GFX940-NEXT:    v_cmp_o_f16_e64 vcc, v0, |v4|
-; GFX940-NEXT:    v_perm_b32 v1, v3, v1, s0
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v12, v6, vcc
-; GFX940-NEXT:    v_perm_b32 v0, v7, v0, s0
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: v_fminimum3_v4f16__fabs_all:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_and_b32_e32 v7, 0x7fff7fff, v0
+; GFX942-NEXT:    v_and_b32_e32 v9, 0x7fff7fff, v2
+; GFX942-NEXT:    v_and_b32_e32 v6, 0x7fff7fff, v1
+; GFX942-NEXT:    v_and_b32_e32 v8, 0x7fff7fff, v3
+; GFX942-NEXT:    v_pk_min_f16 v7, v7, v9
+; GFX942-NEXT:    v_mov_b32_e32 v12, 0x7e00
+; GFX942-NEXT:    v_lshrrev_b32_e32 v9, 16, v7
+; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, |v0|, |v2| src0_sel:WORD_1 src1_sel:WORD_1
+; GFX942-NEXT:    v_pk_min_f16 v6, v6, v8
+; GFX942-NEXT:    s_mov_b32 s0, 0x5040100
+; GFX942-NEXT:    v_cndmask_b32_e32 v9, v12, v9, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v8, 16, v6
+; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, |v1|, |v3| src0_sel:WORD_1 src1_sel:WORD_1
+; GFX942-NEXT:    v_and_b32_e32 v11, 0x7fff7fff, v5
+; GFX942-NEXT:    v_and_b32_e32 v10, 0x7fff7fff, v4
+; GFX942-NEXT:    v_cndmask_b32_e32 v8, v12, v8, vcc
+; GFX942-NEXT:    v_cmp_o_f16_e64 vcc, |v0|, |v2|
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v12, v7, vcc
+; GFX942-NEXT:    v_cmp_o_f16_e64 vcc, |v1|, |v3|
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v12, v6, vcc
+; GFX942-NEXT:    v_perm_b32 v2, v8, v1, s0
+; GFX942-NEXT:    v_pk_min_f16 v2, v2, v11
+; GFX942-NEXT:    v_perm_b32 v6, v9, v0, s0
+; GFX942-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
+; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v8, |v5| src0_sel:DWORD src1_sel:WORD_1
+; GFX942-NEXT:    v_pk_min_f16 v6, v6, v10
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v12, v3, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v7, 16, v6
+; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v9, |v4| src0_sel:DWORD src1_sel:WORD_1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v7, v12, v7, vcc
+; GFX942-NEXT:    v_cmp_o_f16_e64 vcc, v1, |v5|
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v12, v2, vcc
+; GFX942-NEXT:    v_cmp_o_f16_e64 vcc, v0, |v4|
+; GFX942-NEXT:    v_perm_b32 v1, v3, v1, s0
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v12, v6, vcc
+; GFX942-NEXT:    v_perm_b32 v0, v7, v0, s0
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fminimum3_v4f16__fabs_all:
 ; GFX950:       ; %bb.0:
@@ -2733,48 +2733,48 @@ define <4 x half> @v_fminimum3_v4f16__fneg_all(<4 x half> %a, <4 x half> %b, <4
 ; GFX12-NEXT:    v_pk_minimum_f16 v1, v1, v5 neg_lo:[0,1] neg_hi:[0,1]
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: v_fminimum3_v4f16__fneg_all:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_pk_min_f16 v6, v0, v2 neg_lo:[1,1] neg_hi:[1,1]
-; GFX940-NEXT:    v_mov_b32_e32 v7, 0x7e00
-; GFX940-NEXT:    v_cmp_o_f16_e64 vcc, -v0, -v2
-; GFX940-NEXT:    s_mov_b32 s0, 0x5040100
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cndmask_b32_e32 v8, v7, v6, vcc
-; GFX940-NEXT:    v_lshrrev_b32_e32 v6, 16, v6
-; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, -v0, -v2 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX940-NEXT:    v_pk_min_f16 v2, v1, v3 neg_lo:[1,1] neg_hi:[1,1]
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v7, v6, vcc
-; GFX940-NEXT:    v_cmp_o_f16_e64 vcc, -v1, -v3
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v6, v7, v2, vcc
-; GFX940-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
-; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, -v1, -v3 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v1, v7, v2, vcc
-; GFX940-NEXT:    v_perm_b32 v2, v1, v6, s0
-; GFX940-NEXT:    v_pk_min_f16 v2, v2, v5 neg_lo:[0,1] neg_hi:[0,1]
-; GFX940-NEXT:    v_cmp_o_f16_e64 vcc, v6, -v5
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v3, v7, v2, vcc
-; GFX940-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
-; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v1, -v5 src0_sel:DWORD src1_sel:WORD_1
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v1, v7, v2, vcc
-; GFX940-NEXT:    v_perm_b32 v2, v0, v8, s0
-; GFX940-NEXT:    v_pk_min_f16 v2, v2, v4 neg_lo:[0,1] neg_hi:[0,1]
-; GFX940-NEXT:    v_cmp_o_f16_e64 vcc, v8, -v4
-; GFX940-NEXT:    v_perm_b32 v1, v1, v3, s0
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cndmask_b32_e32 v5, v7, v2, vcc
-; GFX940-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
-; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v0, -v4 src0_sel:DWORD src1_sel:WORD_1
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
-; GFX940-NEXT:    v_perm_b32 v0, v0, v5, s0
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: v_fminimum3_v4f16__fneg_all:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_pk_min_f16 v6, v0, v2 neg_lo:[1,1] neg_hi:[1,1]
+; GFX942-NEXT:    v_mov_b32_e32 v7, 0x7e00
+; GFX942-NEXT:    v_cmp_o_f16_e64 vcc, -v0, -v2
+; GFX942-NEXT:    s_mov_b32 s0, 0x5040100
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v8, v7, v6, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v6, 16, v6
+; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, -v0, -v2 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX942-NEXT:    v_pk_min_f16 v2, v1, v3 neg_lo:[1,1] neg_hi:[1,1]
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v7, v6, vcc
+; GFX942-NEXT:    v_cmp_o_f16_e64 vcc, -v1, -v3
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v6, v7, v2, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
+; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, -v1, -v3 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v7, v2, vcc
+; GFX942-NEXT:    v_perm_b32 v2, v1, v6, s0
+; GFX942-NEXT:    v_pk_min_f16 v2, v2, v5 neg_lo:[0,1] neg_hi:[0,1]
+; GFX942-NEXT:    v_cmp_o_f16_e64 vcc, v6, -v5
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v7, v2, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
+; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v1, -v5 src0_sel:DWORD src1_sel:WORD_1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v7, v2, vcc
+; GFX942-NEXT:    v_perm_b32 v2, v0, v8, s0
+; GFX942-NEXT:    v_pk_min_f16 v2, v2, v4 neg_lo:[0,1] neg_hi:[0,1]
+; GFX942-NEXT:    v_cmp_o_f16_e64 vcc, v8, -v4
+; GFX942-NEXT:    v_perm_b32 v1, v1, v3, s0
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v5, v7, v2, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
+; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v0, -v4 src0_sel:DWORD src1_sel:WORD_1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
+; GFX942-NEXT:    v_perm_b32 v0, v0, v5, s0
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fminimum3_v4f16__fneg_all:
 ; GFX950:       ; %bb.0:
@@ -2807,46 +2807,46 @@ define <4 x half> @v_fminimum3_v4f16__inlineimm1(<4 x half> %a, <4 x half> %c) {
 ; GFX12-NEXT:    v_pk_minimum_f16 v1, v1, v3
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: v_fminimum3_v4f16__inlineimm1:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_pk_min_f16 v4, v0, 2.0 op_sel_hi:[1,0]
-; GFX940-NEXT:    v_mov_b32_e32 v6, 0x7e00
-; GFX940-NEXT:    v_lshrrev_b32_e32 v5, 16, v4
-; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v0 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX940-NEXT:    v_pk_min_f16 v7, v1, 2.0 op_sel_hi:[1,0]
-; GFX940-NEXT:    s_mov_b32 s0, 0x5040100
-; GFX940-NEXT:    v_cndmask_b32_e32 v5, v6, v5, vcc
-; GFX940-NEXT:    v_lshrrev_b32_e32 v8, 16, v7
-; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v1, v1 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v8, v6, v8, vcc
-; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v0, v0
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v6, v4, vcc
-; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v1, v1
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v1, v6, v7, vcc
-; GFX940-NEXT:    v_perm_b32 v4, v8, v1, s0
-; GFX940-NEXT:    v_pk_min_f16 v4, v4, v3
-; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v8, v3 src0_sel:DWORD src1_sel:WORD_1
-; GFX940-NEXT:    v_perm_b32 v8, v5, v0, s0
-; GFX940-NEXT:    v_lshrrev_b32_e32 v7, 16, v4
-; GFX940-NEXT:    v_pk_min_f16 v8, v8, v2
-; GFX940-NEXT:    v_cndmask_b32_e32 v7, v6, v7, vcc
-; GFX940-NEXT:    v_lshrrev_b32_e32 v9, 16, v8
-; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v5, v2 src0_sel:DWORD src1_sel:WORD_1
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v5, v6, v9, vcc
-; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v1, v3
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v1, v6, v4, vcc
-; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
-; GFX940-NEXT:    v_perm_b32 v1, v7, v1, s0
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v6, v8, vcc
-; GFX940-NEXT:    v_perm_b32 v0, v5, v0, s0
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: v_fminimum3_v4f16__inlineimm1:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_pk_min_f16 v4, v0, 2.0 op_sel_hi:[1,0]
+; GFX942-NEXT:    v_mov_b32_e32 v6, 0x7e00
+; GFX942-NEXT:    v_lshrrev_b32_e32 v5, 16, v4
+; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v0 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX942-NEXT:    v_pk_min_f16 v7, v1, 2.0 op_sel_hi:[1,0]
+; GFX942-NEXT:    s_mov_b32 s0, 0x5040100
+; GFX942-NEXT:    v_cndmask_b32_e32 v5, v6, v5, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v8, 16, v7
+; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v1, v1 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v8, v6, v8, vcc
+; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v6, v4, vcc
+; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v1, v1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v6, v7, vcc
+; GFX942-NEXT:    v_perm_b32 v4, v8, v1, s0
+; GFX942-NEXT:    v_pk_min_f16 v4, v4, v3
+; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v8, v3 src0_sel:DWORD src1_sel:WORD_1
+; GFX942-NEXT:    v_perm_b32 v8, v5, v0, s0
+; GFX942-NEXT:    v_lshrrev_b32_e32 v7, 16, v4
+; GFX942-NEXT:    v_pk_min_f16 v8, v8, v2
+; GFX942-NEXT:    v_cndmask_b32_e32 v7, v6, v7, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v9, 16, v8
+; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v5, v2 src0_sel:DWORD src1_sel:WORD_1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v5, v6, v9, vcc
+; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v1, v3
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v6, v4, vcc
+; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
+; GFX942-NEXT:    v_perm_b32 v1, v7, v1, s0
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v6, v8, vcc
+; GFX942-NEXT:    v_perm_b32 v0, v5, v0, s0
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fminimum3_v4f16__inlineimm1:
 ; GFX950:       ; %bb.0:
@@ -2876,48 +2876,48 @@ define <4 x half> @v_fminimum3_v4f16__inlineimm2(<4 x half> %a, <4 x half> %b) {
 ; GFX12-NEXT:    v_pk_minimum_f16 v1, v1, 4.0 op_sel_hi:[1,0]
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: v_fminimum3_v4f16__inlineimm2:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_pk_min_f16 v4, v0, v2
-; GFX940-NEXT:    v_mov_b32_e32 v5, 0x7e00
-; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
-; GFX940-NEXT:    s_mov_b32 s0, 0x5040100
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cndmask_b32_e32 v6, v5, v4, vcc
-; GFX940-NEXT:    v_lshrrev_b32_e32 v4, 16, v4
-; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX940-NEXT:    v_pk_min_f16 v2, v1, v3
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v5, v4, vcc
-; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v1, v3
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v4, v5, v2, vcc
-; GFX940-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
-; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v1, v5, v2, vcc
-; GFX940-NEXT:    v_perm_b32 v2, v1, v4, s0
-; GFX940-NEXT:    v_pk_min_f16 v2, v2, 4.0 op_sel_hi:[1,0]
-; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v4, v4
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v3, v5, v2, vcc
-; GFX940-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
-; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v1, v1
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v1, v5, v2, vcc
-; GFX940-NEXT:    v_perm_b32 v2, v0, v6, s0
-; GFX940-NEXT:    v_pk_min_f16 v2, v2, 4.0 op_sel_hi:[1,0]
-; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v6, v6
-; GFX940-NEXT:    v_perm_b32 v1, v1, v3, s0
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cndmask_b32_e32 v4, v5, v2, vcc
-; GFX940-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
-; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v0, v0
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v5, v2, vcc
-; GFX940-NEXT:    v_perm_b32 v0, v0, v4, s0
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: v_fminimum3_v4f16__inlineimm2:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_pk_min_f16 v4, v0, v2
+; GFX942-NEXT:    v_mov_b32_e32 v5, 0x7e00
+; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
+; GFX942-NEXT:    s_mov_b32 s0, 0x5040100
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v6, v5, v4, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v4, 16, v4
+; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX942-NEXT:    v_pk_min_f16 v2, v1, v3
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v5, v4, vcc
+; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v1, v3
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v4, v5, v2, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
+; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v5, v2, vcc
+; GFX942-NEXT:    v_perm_b32 v2, v1, v4, s0
+; GFX942-NEXT:    v_pk_min_f16 v2, v2, 4.0 op_sel_hi:[1,0]
+; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v4, v4
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v5, v2, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
+; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v1, v1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v5, v2, vcc
+; GFX942-NEXT:    v_perm_b32 v2, v0, v6, s0
+; GFX942-NEXT:    v_pk_min_f16 v2, v2, 4.0 op_sel_hi:[1,0]
+; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v6, v6
+; GFX942-NEXT:    v_perm_b32 v1, v1, v3, s0
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v4, v5, v2, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
+; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v0, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v5, v2, vcc
+; GFX942-NEXT:    v_perm_b32 v0, v0, v4, s0
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_fminimum3_v4f16__inlineimm2:
 ; GFX950:       ; %bb.0:
@@ -3538,19 +3538,19 @@ define <2 x float> @v_no_fminimum3_f32__multi_use(float %a, float %b, float %c)
 ; GFX12-NEXT:    v_minimum_f32 v1, v0, v2
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: v_no_fminimum3_f32__multi_use:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_min_f32_e32 v3, v0, v1
-; GFX940-NEXT:    v_mov_b32_e32 v4, 0x7fc00000
-; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX940-NEXT:    v_min_f32_e32 v1, v0, v2
-; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v0, v2
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v1, v4, v1, vcc
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: v_no_fminimum3_f32__multi_use:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_min_f32_e32 v3, v0, v1
+; GFX942-NEXT:    v_mov_b32_e32 v4, 0x7fc00000
+; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
+; GFX942-NEXT:    v_min_f32_e32 v1, v0, v2
+; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, v0, v2
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v4, v1, vcc
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_no_fminimum3_f32__multi_use:
 ; GFX950:       ; %bb.0:
@@ -3573,22 +3573,22 @@ define amdgpu_ps <2 x i32> @s_no_fminimum3_f32__multi_use(float inreg %a, float
 ; GFX12-NEXT:    s_minimum_f32 s1, s0, s2
 ; GFX12-NEXT:    ; return to shader part epilog
 ;
-; GFX940-LABEL: s_no_fminimum3_f32__multi_use:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    v_mov_b32_e32 v0, s1
-; GFX940-NEXT:    v_min_f32_e32 v1, s0, v0
-; GFX940-NEXT:    v_mov_b32_e32 v2, 0x7fc00000
-; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, s0, v0
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
-; GFX940-NEXT:    v_min_f32_e32 v1, s2, v0
-; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, s2, v0
-; GFX940-NEXT:    v_readfirstlane_b32 s0, v0
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cndmask_b32_e32 v1, v2, v1, vcc
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_readfirstlane_b32 s1, v1
-; GFX940-NEXT:    ; return to shader part epilog
+; GFX942-LABEL: s_no_fminimum3_f32__multi_use:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    v_mov_b32_e32 v0, s1
+; GFX942-NEXT:    v_min_f32_e32 v1, s0, v0
+; GFX942-NEXT:    v_mov_b32_e32 v2, 0x7fc00000
+; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, s0, v0
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX942-NEXT:    v_min_f32_e32 v1, s2, v0
+; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, s2, v0
+; GFX942-NEXT:    v_readfirstlane_b32 s0, v0
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v2, v1, vcc
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_readfirstlane_b32 s1, v1
+; GFX942-NEXT:    ; return to shader part epilog
 ;
 ; GFX950-LABEL: s_no_fminimum3_f32__multi_use:
 ; GFX950:       ; %bb.0:
@@ -3697,30 +3697,30 @@ define <4 x half> @v_no_fminimum3_v2f16__multi_use(<2 x half> %a, <2 x half> %b,
 ; GFX12-NEXT:    v_pk_minimum_f16 v1, v0, v2
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: v_no_fminimum3_v2f16__multi_use:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_pk_min_f16 v3, v0, v1
-; GFX940-NEXT:    v_mov_b32_e32 v4, 0x7e00
-; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
-; GFX940-NEXT:    s_mov_b32 s0, 0x5040100
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_cndmask_b32_e32 v5, v4, v3, vcc
-; GFX940-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
-; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v1, v4, v3, vcc
-; GFX940-NEXT:    v_perm_b32 v0, v1, v5, s0
-; GFX940-NEXT:    v_pk_min_f16 v3, v0, v2
-; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v5, v2
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v5, v4, v3, vcc
-; GFX940-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
-; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v1, v2 src0_sel:DWORD src1_sel:WORD_1
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v1, v4, v3, vcc
-; GFX940-NEXT:    v_perm_b32 v1, v1, v5, s0
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: v_no_fminimum3_v2f16__multi_use:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_pk_min_f16 v3, v0, v1
+; GFX942-NEXT:    v_mov_b32_e32 v4, 0x7e00
+; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
+; GFX942-NEXT:    s_mov_b32 s0, 0x5040100
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_cndmask_b32_e32 v5, v4, v3, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
+; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v4, v3, vcc
+; GFX942-NEXT:    v_perm_b32 v0, v1, v5, s0
+; GFX942-NEXT:    v_pk_min_f16 v3, v0, v2
+; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, v5, v2
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v5, v4, v3, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
+; GFX942-NEXT:    v_cmp_o_f16_sdwa vcc, v1, v2 src0_sel:DWORD src1_sel:WORD_1
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v4, v3, vcc
+; GFX942-NEXT:    v_perm_b32 v1, v1, v5, s0
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_no_fminimum3_v2f16__multi_use:
 ; GFX950:       ; %bb.0:
diff --git llvm/test/CodeGen/AMDGPU/fold-agpr-phis.mir llvm/test/CodeGen/AMDGPU/fold-agpr-phis.mir
index e94546fd5e8a..a9b3eaf4c33a 100644
--- llvm/test/CodeGen/AMDGPU/fold-agpr-phis.mir
+++ llvm/test/CodeGen/AMDGPU/fold-agpr-phis.mir
@@ -1,7 +1,7 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
 # RUN: llc -mtriple=amdgcn -mcpu=gfx908 -verify-machineinstrs -run-pass si-fold-operands %s -o - | FileCheck %s --check-prefixes=GFX908
 # RUN: llc -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs -run-pass si-fold-operands %s -o - | FileCheck %s --check-prefixes=GFX90A
-# RUN: llc -mtriple=amdgcn -mcpu=gfx940 -verify-machineinstrs -run-pass si-fold-operands %s -o - | FileCheck %s --check-prefixes=GFX90A
+# RUN: llc -mtriple=amdgcn -mcpu=gfx942 -verify-machineinstrs -run-pass si-fold-operands %s -o - | FileCheck %s --check-prefixes=GFX90A
 
 ---
 name: test_sgpr_init_multiuse
diff --git llvm/test/CodeGen/AMDGPU/fold-zero-high-bits-clear-kill-flags.mir llvm/test/CodeGen/AMDGPU/fold-zero-high-bits-clear-kill-flags.mir
index baaca76bfd8a..b3658080aae0 100644
--- llvm/test/CodeGen/AMDGPU/fold-zero-high-bits-clear-kill-flags.mir
+++ llvm/test/CodeGen/AMDGPU/fold-zero-high-bits-clear-kill-flags.mir
@@ -1,5 +1,5 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
-# RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx940 -verify-machineinstrs -run-pass si-fold-operands -o - %s | FileCheck -enable-var-scope -check-prefix=GCN %s
+# RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx942 -verify-machineinstrs -run-pass si-fold-operands -o - %s | FileCheck -enable-var-scope -check-prefix=GCN %s
 
 ---
 name:            fold_zero_high_bits_src1_alive
diff --git llvm/test/CodeGen/AMDGPU/fp-atomics-gfx940.ll llvm/test/CodeGen/AMDGPU/fp-atomics-gfx942.ll
similarity index 65%
rename from llvm/test/CodeGen/AMDGPU/fp-atomics-gfx940.ll
rename to llvm/test/CodeGen/AMDGPU/fp-atomics-gfx942.ll
index 4216bdf409ed..d683bf4f778b 100644
--- llvm/test/CodeGen/AMDGPU/fp-atomics-gfx940.ll
+++ llvm/test/CodeGen/AMDGPU/fp-atomics-gfx942.ll
@@ -1,22 +1,22 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx940 | FileCheck %s -check-prefix=GFX940
+; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx942 | FileCheck %s -check-prefix=GFX942
 ; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1200 | FileCheck %s -check-prefix=GFX12
 
 declare <2 x half> @llvm.amdgcn.ds.fadd.v2f16(ptr addrspace(3) %ptr, <2 x half> %data, i32, i32, i1)
 declare <2 x i16> @llvm.amdgcn.ds.fadd.v2bf16(ptr addrspace(3) %ptr, <2 x i16> %data)
 
 define amdgpu_kernel void @flat_atomic_fadd_f32_noret_pat(ptr %ptr) {
-; GFX940-LABEL: flat_atomic_fadd_f32_noret_pat:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX940-NEXT:    v_mov_b32_e32 v2, 4.0
-; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b64_e32 v[0:1], s[0:1]
-; GFX940-NEXT:    buffer_wbl2 sc0 sc1
-; GFX940-NEXT:    flat_atomic_add_f32 v[0:1], v2 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc0 sc1
-; GFX940-NEXT:    s_endpgm
+; GFX942-LABEL: flat_atomic_fadd_f32_noret_pat:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX942-NEXT:    v_mov_b32_e32 v2, 4.0
+; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b64_e32 v[0:1], s[0:1]
+; GFX942-NEXT:    buffer_wbl2 sc0 sc1
+; GFX942-NEXT:    flat_atomic_add_f32 v[0:1], v2 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc0 sc1
+; GFX942-NEXT:    s_endpgm
 ;
 ; GFX12-LABEL: flat_atomic_fadd_f32_noret_pat:
 ; GFX12:       ; %bb.0:
@@ -34,17 +34,17 @@ define amdgpu_kernel void @flat_atomic_fadd_f32_noret_pat(ptr %ptr) {
 }
 
 define amdgpu_kernel void @flat_atomic_fadd_f32_noret_pat_ieee(ptr %ptr) #0 {
-; GFX940-LABEL: flat_atomic_fadd_f32_noret_pat_ieee:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX940-NEXT:    v_mov_b32_e32 v2, 4.0
-; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b64_e32 v[0:1], s[0:1]
-; GFX940-NEXT:    buffer_wbl2 sc0 sc1
-; GFX940-NEXT:    flat_atomic_add_f32 v[0:1], v2 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc0 sc1
-; GFX940-NEXT:    s_endpgm
+; GFX942-LABEL: flat_atomic_fadd_f32_noret_pat_ieee:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX942-NEXT:    v_mov_b32_e32 v2, 4.0
+; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b64_e32 v[0:1], s[0:1]
+; GFX942-NEXT:    buffer_wbl2 sc0 sc1
+; GFX942-NEXT:    flat_atomic_add_f32 v[0:1], v2 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc0 sc1
+; GFX942-NEXT:    s_endpgm
 ;
 ; GFX12-LABEL: flat_atomic_fadd_f32_noret_pat_ieee:
 ; GFX12:       ; %bb.0:
@@ -61,15 +61,15 @@ define amdgpu_kernel void @flat_atomic_fadd_f32_noret_pat_ieee(ptr %ptr) #0 {
   ret void
 }
 define float @flat_atomic_fadd_f32_rtn_pat(ptr %ptr, float %data) {
-; GFX940-LABEL: flat_atomic_fadd_f32_rtn_pat:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v2, 4.0
-; GFX940-NEXT:    buffer_wbl2 sc0 sc1
-; GFX940-NEXT:    flat_atomic_add_f32 v0, v[0:1], v2 sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc0 sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_atomic_fadd_f32_rtn_pat:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v2, 4.0
+; GFX942-NEXT:    buffer_wbl2 sc0 sc1
+; GFX942-NEXT:    flat_atomic_add_f32 v0, v[0:1], v2 sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc0 sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: flat_atomic_fadd_f32_rtn_pat:
 ; GFX12:       ; %bb.0:
@@ -90,15 +90,15 @@ define float @flat_atomic_fadd_f32_rtn_pat(ptr %ptr, float %data) {
 }
 
 define amdgpu_kernel void @local_atomic_fadd_v2f16_noret(ptr addrspace(3) %ptr, <2 x half> %data) {
-; GFX940-LABEL: local_atomic_fadd_v2f16_noret:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v0, s0
-; GFX940-NEXT:    v_mov_b32_e32 v1, s1
-; GFX940-NEXT:    ds_pk_add_f16 v0, v1
-; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-NEXT:    s_endpgm
+; GFX942-LABEL: local_atomic_fadd_v2f16_noret:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v0, s0
+; GFX942-NEXT:    v_mov_b32_e32 v1, s1
+; GFX942-NEXT:    ds_pk_add_f16 v0, v1
+; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-NEXT:    s_endpgm
 ;
 ; GFX12-LABEL: local_atomic_fadd_v2f16_noret:
 ; GFX12:       ; %bb.0:
@@ -114,12 +114,12 @@ define amdgpu_kernel void @local_atomic_fadd_v2f16_noret(ptr addrspace(3) %ptr,
 }
 
 define <2 x half> @local_atomic_fadd_v2f16_rtn(ptr addrspace(3) %ptr, <2 x half> %data) {
-; GFX940-LABEL: local_atomic_fadd_v2f16_rtn:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    ds_pk_add_rtn_f16 v0, v0, v1
-; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: local_atomic_fadd_v2f16_rtn:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    ds_pk_add_rtn_f16 v0, v0, v1
+; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: local_atomic_fadd_v2f16_rtn:
 ; GFX12:       ; %bb.0:
@@ -138,15 +138,15 @@ define <2 x half> @local_atomic_fadd_v2f16_rtn(ptr addrspace(3) %ptr, <2 x half>
 }
 
 define amdgpu_kernel void @local_atomic_fadd_v2bf16_noret(ptr addrspace(3) %ptr, <2 x i16> %data) {
-; GFX940-LABEL: local_atomic_fadd_v2bf16_noret:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v0, s0
-; GFX940-NEXT:    v_mov_b32_e32 v1, s1
-; GFX940-NEXT:    ds_pk_add_bf16 v0, v1
-; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-NEXT:    s_endpgm
+; GFX942-LABEL: local_atomic_fadd_v2bf16_noret:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v0, s0
+; GFX942-NEXT:    v_mov_b32_e32 v1, s1
+; GFX942-NEXT:    ds_pk_add_bf16 v0, v1
+; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-NEXT:    s_endpgm
 ;
 ; GFX12-LABEL: local_atomic_fadd_v2bf16_noret:
 ; GFX12:       ; %bb.0:
@@ -162,12 +162,12 @@ define amdgpu_kernel void @local_atomic_fadd_v2bf16_noret(ptr addrspace(3) %ptr,
 }
 
 define <2 x i16> @local_atomic_fadd_v2bf16_rtn(ptr addrspace(3) %ptr, <2 x i16> %data) {
-; GFX940-LABEL: local_atomic_fadd_v2bf16_rtn:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    ds_pk_add_rtn_bf16 v0, v0, v1
-; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: local_atomic_fadd_v2bf16_rtn:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    ds_pk_add_rtn_bf16 v0, v0, v1
+; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: local_atomic_fadd_v2bf16_rtn:
 ; GFX12:       ; %bb.0:
diff --git llvm/test/CodeGen/AMDGPU/fp64-atomics-gfx90a.ll llvm/test/CodeGen/AMDGPU/fp64-atomics-gfx90a.ll
index 4aec2ffead43..694994e0a82b 100644
--- llvm/test/CodeGen/AMDGPU/fp64-atomics-gfx90a.ll
+++ llvm/test/CodeGen/AMDGPU/fp64-atomics-gfx90a.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx90a -amdgpu-atomic-optimizer-strategy=None | FileCheck %s -check-prefix=GFX90A
-; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx940 -amdgpu-atomic-optimizer-strategy=None | FileCheck %s -check-prefix=GFX940
+; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx942 -amdgpu-atomic-optimizer-strategy=None | FileCheck %s -check-prefix=GFX942
 
 declare double @llvm.amdgcn.struct.buffer.atomic.fadd.f64(double, <4 x i32>, i32, i32, i32, i32 immarg)
 declare double @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f64(double, ptr addrspace(8), i32, i32, i32, i32 immarg)
@@ -28,16 +28,16 @@ define amdgpu_kernel void @raw_buffer_atomic_add_noret_f64(<4 x i32> %rsrc, doub
 ; GFX90A-NEXT:    buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 offen
 ; GFX90A-NEXT:    s_endpgm
 ;
-; GFX940-LABEL: raw_buffer_atomic_add_noret_f64:
-; GFX940:       ; %bb.0: ; %main_body
-; GFX940-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
-; GFX940-NEXT:    s_load_dword s8, s[4:5], 0x3c
-; GFX940-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b64_e32 v[0:1], s[6:7]
-; GFX940-NEXT:    v_mov_b32_e32 v2, s8
-; GFX940-NEXT:    buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 offen
-; GFX940-NEXT:    s_endpgm
+; GFX942-LABEL: raw_buffer_atomic_add_noret_f64:
+; GFX942:       ; %bb.0: ; %main_body
+; GFX942-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
+; GFX942-NEXT:    s_load_dword s8, s[4:5], 0x3c
+; GFX942-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b64_e32 v[0:1], s[6:7]
+; GFX942-NEXT:    v_mov_b32_e32 v2, s8
+; GFX942-NEXT:    buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 offen
+; GFX942-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.raw.buffer.atomic.fadd.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0)
   ret void
@@ -51,12 +51,12 @@ define amdgpu_ps void @raw_buffer_atomic_add_rtn_f64(<4 x i32> inreg %rsrc, doub
 ; GFX90A-NEXT:    flat_store_dwordx2 v[0:1], v[0:1]
 ; GFX90A-NEXT:    s_endpgm
 ;
-; GFX940-LABEL: raw_buffer_atomic_add_rtn_f64:
-; GFX940:       ; %bb.0: ; %main_body
-; GFX940-NEXT:    buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 offen sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    flat_store_dwordx2 v[0:1], v[0:1] sc0 sc1
-; GFX940-NEXT:    s_endpgm
+; GFX942-LABEL: raw_buffer_atomic_add_rtn_f64:
+; GFX942:       ; %bb.0: ; %main_body
+; GFX942-NEXT:    buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 offen sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    flat_store_dwordx2 v[0:1], v[0:1] sc0 sc1
+; GFX942-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.raw.buffer.atomic.fadd.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0)
   store double %ret, ptr undef
@@ -79,20 +79,20 @@ define amdgpu_kernel void @raw_buffer_atomic_add_rtn_f64_off4_slc(<4 x i32> %rsr
 ; GFX90A-NEXT:    global_store_dwordx2 v2, v[0:1], s[8:9]
 ; GFX90A-NEXT:    s_endpgm
 ;
-; GFX940-LABEL: raw_buffer_atomic_add_rtn_f64_off4_slc:
-; GFX940:       ; %bb.0: ; %main_body
-; GFX940-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
-; GFX940-NEXT:    s_load_dword s10, s[4:5], 0x3c
-; GFX940-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX940-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x44
-; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b64_e32 v[0:1], s[6:7]
-; GFX940-NEXT:    v_mov_b32_e32 v2, s10
-; GFX940-NEXT:    buffer_atomic_add_f64 v[0:1], v2, s[0:3], 4 offen sc0 nt
-; GFX940-NEXT:    v_mov_b32_e32 v2, 0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    global_store_dwordx2 v2, v[0:1], s[8:9] sc0 sc1
-; GFX940-NEXT:    s_endpgm
+; GFX942-LABEL: raw_buffer_atomic_add_rtn_f64_off4_slc:
+; GFX942:       ; %bb.0: ; %main_body
+; GFX942-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
+; GFX942-NEXT:    s_load_dword s10, s[4:5], 0x3c
+; GFX942-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX942-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x44
+; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b64_e32 v[0:1], s[6:7]
+; GFX942-NEXT:    v_mov_b32_e32 v2, s10
+; GFX942-NEXT:    buffer_atomic_add_f64 v[0:1], v2, s[0:3], 4 offen sc0 nt
+; GFX942-NEXT:    v_mov_b32_e32 v2, 0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    global_store_dwordx2 v2, v[0:1], s[8:9] sc0 sc1
+; GFX942-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.raw.buffer.atomic.fadd.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i32 2)
   store double %ret, ptr addrspace(1) %out, align 8
@@ -111,16 +111,16 @@ define amdgpu_kernel void @raw_ptr_buffer_atomic_add_noret_f64(ptr addrspace(8)
 ; GFX90A-NEXT:    buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 offen
 ; GFX90A-NEXT:    s_endpgm
 ;
-; GFX940-LABEL: raw_ptr_buffer_atomic_add_noret_f64:
-; GFX940:       ; %bb.0: ; %main_body
-; GFX940-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
-; GFX940-NEXT:    s_load_dword s8, s[4:5], 0x3c
-; GFX940-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b64_e32 v[0:1], s[6:7]
-; GFX940-NEXT:    v_mov_b32_e32 v2, s8
-; GFX940-NEXT:    buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 offen
-; GFX940-NEXT:    s_endpgm
+; GFX942-LABEL: raw_ptr_buffer_atomic_add_noret_f64:
+; GFX942:       ; %bb.0: ; %main_body
+; GFX942-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
+; GFX942-NEXT:    s_load_dword s8, s[4:5], 0x3c
+; GFX942-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b64_e32 v[0:1], s[6:7]
+; GFX942-NEXT:    v_mov_b32_e32 v2, s8
+; GFX942-NEXT:    buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 offen
+; GFX942-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0)
   ret void
@@ -134,12 +134,12 @@ define amdgpu_ps void @raw_ptr_buffer_atomic_add_rtn_f64(ptr addrspace(8) inreg
 ; GFX90A-NEXT:    flat_store_dwordx2 v[0:1], v[0:1]
 ; GFX90A-NEXT:    s_endpgm
 ;
-; GFX940-LABEL: raw_ptr_buffer_atomic_add_rtn_f64:
-; GFX940:       ; %bb.0: ; %main_body
-; GFX940-NEXT:    buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 offen sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    flat_store_dwordx2 v[0:1], v[0:1] sc0 sc1
-; GFX940-NEXT:    s_endpgm
+; GFX942-LABEL: raw_ptr_buffer_atomic_add_rtn_f64:
+; GFX942:       ; %bb.0: ; %main_body
+; GFX942-NEXT:    buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 offen sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    flat_store_dwordx2 v[0:1], v[0:1] sc0 sc1
+; GFX942-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0)
   store double %ret, ptr undef
@@ -162,20 +162,20 @@ define amdgpu_kernel void @raw_ptr_buffer_atomic_add_rtn_f64_off4_slc(ptr addrsp
 ; GFX90A-NEXT:    global_store_dwordx2 v2, v[0:1], s[8:9]
 ; GFX90A-NEXT:    s_endpgm
 ;
-; GFX940-LABEL: raw_ptr_buffer_atomic_add_rtn_f64_off4_slc:
-; GFX940:       ; %bb.0: ; %main_body
-; GFX940-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
-; GFX940-NEXT:    s_load_dword s10, s[4:5], 0x3c
-; GFX940-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX940-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x44
-; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b64_e32 v[0:1], s[6:7]
-; GFX940-NEXT:    v_mov_b32_e32 v2, s10
-; GFX940-NEXT:    buffer_atomic_add_f64 v[0:1], v2, s[0:3], 4 offen sc0 nt
-; GFX940-NEXT:    v_mov_b32_e32 v2, 0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    global_store_dwordx2 v2, v[0:1], s[8:9] sc0 sc1
-; GFX940-NEXT:    s_endpgm
+; GFX942-LABEL: raw_ptr_buffer_atomic_add_rtn_f64_off4_slc:
+; GFX942:       ; %bb.0: ; %main_body
+; GFX942-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
+; GFX942-NEXT:    s_load_dword s10, s[4:5], 0x3c
+; GFX942-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX942-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x44
+; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b64_e32 v[0:1], s[6:7]
+; GFX942-NEXT:    v_mov_b32_e32 v2, s10
+; GFX942-NEXT:    buffer_atomic_add_f64 v[0:1], v2, s[0:3], 4 offen sc0 nt
+; GFX942-NEXT:    v_mov_b32_e32 v2, 0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    global_store_dwordx2 v2, v[0:1], s[8:9] sc0 sc1
+; GFX942-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 4, i32 2)
   store double %ret, ptr addrspace(1) %out, align 8
@@ -194,16 +194,16 @@ define amdgpu_kernel void @struct_buffer_atomic_add_noret_f64(<4 x i32> %rsrc, d
 ; GFX90A-NEXT:    buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 idxen
 ; GFX90A-NEXT:    s_endpgm
 ;
-; GFX940-LABEL: struct_buffer_atomic_add_noret_f64:
-; GFX940:       ; %bb.0: ; %main_body
-; GFX940-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
-; GFX940-NEXT:    s_load_dword s8, s[4:5], 0x3c
-; GFX940-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b64_e32 v[0:1], s[6:7]
-; GFX940-NEXT:    v_mov_b32_e32 v2, s8
-; GFX940-NEXT:    buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 idxen
-; GFX940-NEXT:    s_endpgm
+; GFX942-LABEL: struct_buffer_atomic_add_noret_f64:
+; GFX942:       ; %bb.0: ; %main_body
+; GFX942-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
+; GFX942-NEXT:    s_load_dword s8, s[4:5], 0x3c
+; GFX942-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b64_e32 v[0:1], s[6:7]
+; GFX942-NEXT:    v_mov_b32_e32 v2, s8
+; GFX942-NEXT:    buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 idxen
+; GFX942-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.struct.buffer.atomic.fadd.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 0)
   ret void
@@ -217,12 +217,12 @@ define amdgpu_ps void @struct_buffer_atomic_add_rtn_f64(<4 x i32> inreg %rsrc, d
 ; GFX90A-NEXT:    flat_store_dwordx2 v[0:1], v[0:1]
 ; GFX90A-NEXT:    s_endpgm
 ;
-; GFX940-LABEL: struct_buffer_atomic_add_rtn_f64:
-; GFX940:       ; %bb.0: ; %main_body
-; GFX940-NEXT:    buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 idxen sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    flat_store_dwordx2 v[0:1], v[0:1] sc0 sc1
-; GFX940-NEXT:    s_endpgm
+; GFX942-LABEL: struct_buffer_atomic_add_rtn_f64:
+; GFX942:       ; %bb.0: ; %main_body
+; GFX942-NEXT:    buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 idxen sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    flat_store_dwordx2 v[0:1], v[0:1] sc0 sc1
+; GFX942-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.struct.buffer.atomic.fadd.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 0)
   store double %ret, ptr undef
@@ -245,20 +245,20 @@ define amdgpu_kernel void @struct_buffer_atomic_add_rtn_f64_off4_slc(<4 x i32> %
 ; GFX90A-NEXT:    global_store_dwordx2 v2, v[0:1], s[8:9]
 ; GFX90A-NEXT:    s_endpgm
 ;
-; GFX940-LABEL: struct_buffer_atomic_add_rtn_f64_off4_slc:
-; GFX940:       ; %bb.0: ; %main_body
-; GFX940-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
-; GFX940-NEXT:    s_load_dword s10, s[4:5], 0x3c
-; GFX940-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX940-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x44
-; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b64_e32 v[0:1], s[6:7]
-; GFX940-NEXT:    v_mov_b32_e32 v2, s10
-; GFX940-NEXT:    buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 idxen offset:4 sc0 nt
-; GFX940-NEXT:    v_mov_b32_e32 v2, 0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    global_store_dwordx2 v2, v[0:1], s[8:9] sc0 sc1
-; GFX940-NEXT:    s_endpgm
+; GFX942-LABEL: struct_buffer_atomic_add_rtn_f64_off4_slc:
+; GFX942:       ; %bb.0: ; %main_body
+; GFX942-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
+; GFX942-NEXT:    s_load_dword s10, s[4:5], 0x3c
+; GFX942-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX942-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x44
+; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b64_e32 v[0:1], s[6:7]
+; GFX942-NEXT:    v_mov_b32_e32 v2, s10
+; GFX942-NEXT:    buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 idxen offset:4 sc0 nt
+; GFX942-NEXT:    v_mov_b32_e32 v2, 0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    global_store_dwordx2 v2, v[0:1], s[8:9] sc0 sc1
+; GFX942-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.struct.buffer.atomic.fadd.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i32 0, i32 2)
   store double %ret, ptr addrspace(1) %out, align 8
@@ -277,16 +277,16 @@ define amdgpu_kernel void @struct_ptr_buffer_atomic_add_noret_f64(ptr addrspace(
 ; GFX90A-NEXT:    buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 idxen
 ; GFX90A-NEXT:    s_endpgm
 ;
-; GFX940-LABEL: struct_ptr_buffer_atomic_add_noret_f64:
-; GFX940:       ; %bb.0: ; %main_body
-; GFX940-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
-; GFX940-NEXT:    s_load_dword s8, s[4:5], 0x3c
-; GFX940-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b64_e32 v[0:1], s[6:7]
-; GFX940-NEXT:    v_mov_b32_e32 v2, s8
-; GFX940-NEXT:    buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 idxen
-; GFX940-NEXT:    s_endpgm
+; GFX942-LABEL: struct_ptr_buffer_atomic_add_noret_f64:
+; GFX942:       ; %bb.0: ; %main_body
+; GFX942-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
+; GFX942-NEXT:    s_load_dword s8, s[4:5], 0x3c
+; GFX942-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b64_e32 v[0:1], s[6:7]
+; GFX942-NEXT:    v_mov_b32_e32 v2, s8
+; GFX942-NEXT:    buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 idxen
+; GFX942-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0, i32 0)
   ret void
@@ -300,12 +300,12 @@ define amdgpu_ps void @struct_ptr_buffer_atomic_add_rtn_f64(ptr addrspace(8) inr
 ; GFX90A-NEXT:    flat_store_dwordx2 v[0:1], v[0:1]
 ; GFX90A-NEXT:    s_endpgm
 ;
-; GFX940-LABEL: struct_ptr_buffer_atomic_add_rtn_f64:
-; GFX940:       ; %bb.0: ; %main_body
-; GFX940-NEXT:    buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 idxen sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    flat_store_dwordx2 v[0:1], v[0:1] sc0 sc1
-; GFX940-NEXT:    s_endpgm
+; GFX942-LABEL: struct_ptr_buffer_atomic_add_rtn_f64:
+; GFX942:       ; %bb.0: ; %main_body
+; GFX942-NEXT:    buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 idxen sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    flat_store_dwordx2 v[0:1], v[0:1] sc0 sc1
+; GFX942-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0, i32 0)
   store double %ret, ptr undef
@@ -328,20 +328,20 @@ define amdgpu_kernel void @struct_ptr_buffer_atomic_add_rtn_f64_off4_slc(ptr add
 ; GFX90A-NEXT:    global_store_dwordx2 v2, v[0:1], s[8:9]
 ; GFX90A-NEXT:    s_endpgm
 ;
-; GFX940-LABEL: struct_ptr_buffer_atomic_add_rtn_f64_off4_slc:
-; GFX940:       ; %bb.0: ; %main_body
-; GFX940-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
-; GFX940-NEXT:    s_load_dword s10, s[4:5], 0x3c
-; GFX940-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX940-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x44
-; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b64_e32 v[0:1], s[6:7]
-; GFX940-NEXT:    v_mov_b32_e32 v2, s10
-; GFX940-NEXT:    buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 idxen offset:4 sc0 nt
-; GFX940-NEXT:    v_mov_b32_e32 v2, 0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    global_store_dwordx2 v2, v[0:1], s[8:9] sc0 sc1
-; GFX940-NEXT:    s_endpgm
+; GFX942-LABEL: struct_ptr_buffer_atomic_add_rtn_f64_off4_slc:
+; GFX942:       ; %bb.0: ; %main_body
+; GFX942-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
+; GFX942-NEXT:    s_load_dword s10, s[4:5], 0x3c
+; GFX942-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX942-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x44
+; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b64_e32 v[0:1], s[6:7]
+; GFX942-NEXT:    v_mov_b32_e32 v2, s10
+; GFX942-NEXT:    buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 idxen offset:4 sc0 nt
+; GFX942-NEXT:    v_mov_b32_e32 v2, 0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    global_store_dwordx2 v2, v[0:1], s[8:9] sc0 sc1
+; GFX942-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 4, i32 0, i32 2)
   store double %ret, ptr addrspace(1) %out, align 8
@@ -360,16 +360,16 @@ define amdgpu_kernel void @raw_buffer_atomic_min_noret_f64(<4 x i32> %rsrc, doub
 ; GFX90A-NEXT:    buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 offen
 ; GFX90A-NEXT:    s_endpgm
 ;
-; GFX940-LABEL: raw_buffer_atomic_min_noret_f64:
-; GFX940:       ; %bb.0: ; %main_body
-; GFX940-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
-; GFX940-NEXT:    s_load_dword s8, s[4:5], 0x3c
-; GFX940-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b64_e32 v[0:1], s[6:7]
-; GFX940-NEXT:    v_mov_b32_e32 v2, s8
-; GFX940-NEXT:    buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 offen
-; GFX940-NEXT:    s_endpgm
+; GFX942-LABEL: raw_buffer_atomic_min_noret_f64:
+; GFX942:       ; %bb.0: ; %main_body
+; GFX942-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
+; GFX942-NEXT:    s_load_dword s8, s[4:5], 0x3c
+; GFX942-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b64_e32 v[0:1], s[6:7]
+; GFX942-NEXT:    v_mov_b32_e32 v2, s8
+; GFX942-NEXT:    buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 offen
+; GFX942-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.raw.buffer.atomic.fmin.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0)
   ret void
@@ -383,12 +383,12 @@ define amdgpu_ps void @raw_buffer_atomic_min_rtn_f64(<4 x i32> inreg %rsrc, doub
 ; GFX90A-NEXT:    flat_store_dwordx2 v[0:1], v[0:1]
 ; GFX90A-NEXT:    s_endpgm
 ;
-; GFX940-LABEL: raw_buffer_atomic_min_rtn_f64:
-; GFX940:       ; %bb.0: ; %main_body
-; GFX940-NEXT:    buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 offen sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    flat_store_dwordx2 v[0:1], v[0:1] sc0 sc1
-; GFX940-NEXT:    s_endpgm
+; GFX942-LABEL: raw_buffer_atomic_min_rtn_f64:
+; GFX942:       ; %bb.0: ; %main_body
+; GFX942-NEXT:    buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 offen sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    flat_store_dwordx2 v[0:1], v[0:1] sc0 sc1
+; GFX942-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.raw.buffer.atomic.fmin.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0)
   store double %ret, ptr undef
@@ -411,20 +411,20 @@ define amdgpu_kernel void @raw_buffer_atomic_min_rtn_f64_off4_slc(<4 x i32> %rsr
 ; GFX90A-NEXT:    global_store_dwordx2 v2, v[0:1], s[8:9]
 ; GFX90A-NEXT:    s_endpgm
 ;
-; GFX940-LABEL: raw_buffer_atomic_min_rtn_f64_off4_slc:
-; GFX940:       ; %bb.0: ; %main_body
-; GFX940-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
-; GFX940-NEXT:    s_load_dword s10, s[4:5], 0x3c
-; GFX940-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX940-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x44
-; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b64_e32 v[0:1], s[6:7]
-; GFX940-NEXT:    v_mov_b32_e32 v2, s10
-; GFX940-NEXT:    buffer_atomic_min_f64 v[0:1], v2, s[0:3], 4 offen sc0 nt
-; GFX940-NEXT:    v_mov_b32_e32 v2, 0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    global_store_dwordx2 v2, v[0:1], s[8:9] sc0 sc1
-; GFX940-NEXT:    s_endpgm
+; GFX942-LABEL: raw_buffer_atomic_min_rtn_f64_off4_slc:
+; GFX942:       ; %bb.0: ; %main_body
+; GFX942-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
+; GFX942-NEXT:    s_load_dword s10, s[4:5], 0x3c
+; GFX942-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX942-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x44
+; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b64_e32 v[0:1], s[6:7]
+; GFX942-NEXT:    v_mov_b32_e32 v2, s10
+; GFX942-NEXT:    buffer_atomic_min_f64 v[0:1], v2, s[0:3], 4 offen sc0 nt
+; GFX942-NEXT:    v_mov_b32_e32 v2, 0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    global_store_dwordx2 v2, v[0:1], s[8:9] sc0 sc1
+; GFX942-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.raw.buffer.atomic.fmin.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i32 2)
   store double %ret, ptr addrspace(1) %out, align 8
@@ -443,16 +443,16 @@ define amdgpu_kernel void @raw_ptr_buffer_atomic_min_noret_f64(ptr addrspace(8)
 ; GFX90A-NEXT:    buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 offen
 ; GFX90A-NEXT:    s_endpgm
 ;
-; GFX940-LABEL: raw_ptr_buffer_atomic_min_noret_f64:
-; GFX940:       ; %bb.0: ; %main_body
-; GFX940-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
-; GFX940-NEXT:    s_load_dword s8, s[4:5], 0x3c
-; GFX940-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b64_e32 v[0:1], s[6:7]
-; GFX940-NEXT:    v_mov_b32_e32 v2, s8
-; GFX940-NEXT:    buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 offen
-; GFX940-NEXT:    s_endpgm
+; GFX942-LABEL: raw_ptr_buffer_atomic_min_noret_f64:
+; GFX942:       ; %bb.0: ; %main_body
+; GFX942-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
+; GFX942-NEXT:    s_load_dword s8, s[4:5], 0x3c
+; GFX942-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b64_e32 v[0:1], s[6:7]
+; GFX942-NEXT:    v_mov_b32_e32 v2, s8
+; GFX942-NEXT:    buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 offen
+; GFX942-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fmin.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0)
   ret void
@@ -466,12 +466,12 @@ define amdgpu_ps void @raw_ptr_buffer_atomic_min_rtn_f64(ptr addrspace(8) inreg
 ; GFX90A-NEXT:    flat_store_dwordx2 v[0:1], v[0:1]
 ; GFX90A-NEXT:    s_endpgm
 ;
-; GFX940-LABEL: raw_ptr_buffer_atomic_min_rtn_f64:
-; GFX940:       ; %bb.0: ; %main_body
-; GFX940-NEXT:    buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 offen sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    flat_store_dwordx2 v[0:1], v[0:1] sc0 sc1
-; GFX940-NEXT:    s_endpgm
+; GFX942-LABEL: raw_ptr_buffer_atomic_min_rtn_f64:
+; GFX942:       ; %bb.0: ; %main_body
+; GFX942-NEXT:    buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 offen sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    flat_store_dwordx2 v[0:1], v[0:1] sc0 sc1
+; GFX942-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fmin.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0)
   store double %ret, ptr undef
@@ -494,20 +494,20 @@ define amdgpu_kernel void @raw_ptr_buffer_atomic_min_rtn_f64_off4_slc(ptr addrsp
 ; GFX90A-NEXT:    global_store_dwordx2 v2, v[0:1], s[8:9]
 ; GFX90A-NEXT:    s_endpgm
 ;
-; GFX940-LABEL: raw_ptr_buffer_atomic_min_rtn_f64_off4_slc:
-; GFX940:       ; %bb.0: ; %main_body
-; GFX940-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
-; GFX940-NEXT:    s_load_dword s10, s[4:5], 0x3c
-; GFX940-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX940-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x44
-; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b64_e32 v[0:1], s[6:7]
-; GFX940-NEXT:    v_mov_b32_e32 v2, s10
-; GFX940-NEXT:    buffer_atomic_min_f64 v[0:1], v2, s[0:3], 4 offen sc0 nt
-; GFX940-NEXT:    v_mov_b32_e32 v2, 0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    global_store_dwordx2 v2, v[0:1], s[8:9] sc0 sc1
-; GFX940-NEXT:    s_endpgm
+; GFX942-LABEL: raw_ptr_buffer_atomic_min_rtn_f64_off4_slc:
+; GFX942:       ; %bb.0: ; %main_body
+; GFX942-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
+; GFX942-NEXT:    s_load_dword s10, s[4:5], 0x3c
+; GFX942-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX942-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x44
+; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b64_e32 v[0:1], s[6:7]
+; GFX942-NEXT:    v_mov_b32_e32 v2, s10
+; GFX942-NEXT:    buffer_atomic_min_f64 v[0:1], v2, s[0:3], 4 offen sc0 nt
+; GFX942-NEXT:    v_mov_b32_e32 v2, 0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    global_store_dwordx2 v2, v[0:1], s[8:9] sc0 sc1
+; GFX942-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fmin.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 4, i32 2)
   store double %ret, ptr addrspace(1) %out, align 8
@@ -526,16 +526,16 @@ define amdgpu_kernel void @struct_buffer_atomic_min_noret_f64(<4 x i32> %rsrc, d
 ; GFX90A-NEXT:    buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 idxen
 ; GFX90A-NEXT:    s_endpgm
 ;
-; GFX940-LABEL: struct_buffer_atomic_min_noret_f64:
-; GFX940:       ; %bb.0: ; %main_body
-; GFX940-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
-; GFX940-NEXT:    s_load_dword s8, s[4:5], 0x3c
-; GFX940-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b64_e32 v[0:1], s[6:7]
-; GFX940-NEXT:    v_mov_b32_e32 v2, s8
-; GFX940-NEXT:    buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 idxen
-; GFX940-NEXT:    s_endpgm
+; GFX942-LABEL: struct_buffer_atomic_min_noret_f64:
+; GFX942:       ; %bb.0: ; %main_body
+; GFX942-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
+; GFX942-NEXT:    s_load_dword s8, s[4:5], 0x3c
+; GFX942-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b64_e32 v[0:1], s[6:7]
+; GFX942-NEXT:    v_mov_b32_e32 v2, s8
+; GFX942-NEXT:    buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 idxen
+; GFX942-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.struct.buffer.atomic.fmin.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 0)
   ret void
@@ -549,12 +549,12 @@ define amdgpu_ps void @struct_buffer_atomic_min_rtn_f64(<4 x i32> inreg %rsrc, d
 ; GFX90A-NEXT:    flat_store_dwordx2 v[0:1], v[0:1]
 ; GFX90A-NEXT:    s_endpgm
 ;
-; GFX940-LABEL: struct_buffer_atomic_min_rtn_f64:
-; GFX940:       ; %bb.0: ; %main_body
-; GFX940-NEXT:    buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 idxen sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    flat_store_dwordx2 v[0:1], v[0:1] sc0 sc1
-; GFX940-NEXT:    s_endpgm
+; GFX942-LABEL: struct_buffer_atomic_min_rtn_f64:
+; GFX942:       ; %bb.0: ; %main_body
+; GFX942-NEXT:    buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 idxen sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    flat_store_dwordx2 v[0:1], v[0:1] sc0 sc1
+; GFX942-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.struct.buffer.atomic.fmin.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 0)
   store double %ret, ptr undef
@@ -577,20 +577,20 @@ define amdgpu_kernel void @struct_buffer_atomic_min_rtn_f64_off4_slc(<4 x i32> %
 ; GFX90A-NEXT:    global_store_dwordx2 v2, v[0:1], s[8:9]
 ; GFX90A-NEXT:    s_endpgm
 ;
-; GFX940-LABEL: struct_buffer_atomic_min_rtn_f64_off4_slc:
-; GFX940:       ; %bb.0: ; %main_body
-; GFX940-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
-; GFX940-NEXT:    s_load_dword s10, s[4:5], 0x3c
-; GFX940-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX940-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x44
-; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b64_e32 v[0:1], s[6:7]
-; GFX940-NEXT:    v_mov_b32_e32 v2, s10
-; GFX940-NEXT:    buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 idxen offset:4 sc0 nt
-; GFX940-NEXT:    v_mov_b32_e32 v2, 0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    global_store_dwordx2 v2, v[0:1], s[8:9] sc0 sc1
-; GFX940-NEXT:    s_endpgm
+; GFX942-LABEL: struct_buffer_atomic_min_rtn_f64_off4_slc:
+; GFX942:       ; %bb.0: ; %main_body
+; GFX942-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
+; GFX942-NEXT:    s_load_dword s10, s[4:5], 0x3c
+; GFX942-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX942-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x44
+; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b64_e32 v[0:1], s[6:7]
+; GFX942-NEXT:    v_mov_b32_e32 v2, s10
+; GFX942-NEXT:    buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 idxen offset:4 sc0 nt
+; GFX942-NEXT:    v_mov_b32_e32 v2, 0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    global_store_dwordx2 v2, v[0:1], s[8:9] sc0 sc1
+; GFX942-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.struct.buffer.atomic.fmin.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i32 0, i32 2)
   store double %ret, ptr addrspace(1) %out, align 8
@@ -609,16 +609,16 @@ define amdgpu_kernel void @struct_ptr_buffer_atomic_min_noret_f64(ptr addrspace(
 ; GFX90A-NEXT:    buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 idxen
 ; GFX90A-NEXT:    s_endpgm
 ;
-; GFX940-LABEL: struct_ptr_buffer_atomic_min_noret_f64:
-; GFX940:       ; %bb.0: ; %main_body
-; GFX940-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
-; GFX940-NEXT:    s_load_dword s8, s[4:5], 0x3c
-; GFX940-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b64_e32 v[0:1], s[6:7]
-; GFX940-NEXT:    v_mov_b32_e32 v2, s8
-; GFX940-NEXT:    buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 idxen
-; GFX940-NEXT:    s_endpgm
+; GFX942-LABEL: struct_ptr_buffer_atomic_min_noret_f64:
+; GFX942:       ; %bb.0: ; %main_body
+; GFX942-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
+; GFX942-NEXT:    s_load_dword s8, s[4:5], 0x3c
+; GFX942-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b64_e32 v[0:1], s[6:7]
+; GFX942-NEXT:    v_mov_b32_e32 v2, s8
+; GFX942-NEXT:    buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 idxen
+; GFX942-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fmin.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0, i32 0)
   ret void
@@ -632,12 +632,12 @@ define amdgpu_ps void @struct_ptr_buffer_atomic_min_rtn_f64(ptr addrspace(8) inr
 ; GFX90A-NEXT:    flat_store_dwordx2 v[0:1], v[0:1]
 ; GFX90A-NEXT:    s_endpgm
 ;
-; GFX940-LABEL: struct_ptr_buffer_atomic_min_rtn_f64:
-; GFX940:       ; %bb.0: ; %main_body
-; GFX940-NEXT:    buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 idxen sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    flat_store_dwordx2 v[0:1], v[0:1] sc0 sc1
-; GFX940-NEXT:    s_endpgm
+; GFX942-LABEL: struct_ptr_buffer_atomic_min_rtn_f64:
+; GFX942:       ; %bb.0: ; %main_body
+; GFX942-NEXT:    buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 idxen sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    flat_store_dwordx2 v[0:1], v[0:1] sc0 sc1
+; GFX942-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fmin.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0, i32 0)
   store double %ret, ptr undef
@@ -660,20 +660,20 @@ define amdgpu_kernel void @struct_ptr_buffer_atomic_min_rtn_f64_off4_slc(ptr add
 ; GFX90A-NEXT:    global_store_dwordx2 v2, v[0:1], s[8:9]
 ; GFX90A-NEXT:    s_endpgm
 ;
-; GFX940-LABEL: struct_ptr_buffer_atomic_min_rtn_f64_off4_slc:
-; GFX940:       ; %bb.0: ; %main_body
-; GFX940-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
-; GFX940-NEXT:    s_load_dword s10, s[4:5], 0x3c
-; GFX940-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX940-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x44
-; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b64_e32 v[0:1], s[6:7]
-; GFX940-NEXT:    v_mov_b32_e32 v2, s10
-; GFX940-NEXT:    buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 idxen offset:4 sc0 nt
-; GFX940-NEXT:    v_mov_b32_e32 v2, 0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    global_store_dwordx2 v2, v[0:1], s[8:9] sc0 sc1
-; GFX940-NEXT:    s_endpgm
+; GFX942-LABEL: struct_ptr_buffer_atomic_min_rtn_f64_off4_slc:
+; GFX942:       ; %bb.0: ; %main_body
+; GFX942-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
+; GFX942-NEXT:    s_load_dword s10, s[4:5], 0x3c
+; GFX942-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX942-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x44
+; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b64_e32 v[0:1], s[6:7]
+; GFX942-NEXT:    v_mov_b32_e32 v2, s10
+; GFX942-NEXT:    buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 idxen offset:4 sc0 nt
+; GFX942-NEXT:    v_mov_b32_e32 v2, 0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    global_store_dwordx2 v2, v[0:1], s[8:9] sc0 sc1
+; GFX942-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fmin.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 4, i32 0, i32 2)
   store double %ret, ptr addrspace(1) %out, align 8
@@ -692,16 +692,16 @@ define amdgpu_kernel void @raw_buffer_atomic_max_noret_f64(<4 x i32> %rsrc, doub
 ; GFX90A-NEXT:    buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 offen
 ; GFX90A-NEXT:    s_endpgm
 ;
-; GFX940-LABEL: raw_buffer_atomic_max_noret_f64:
-; GFX940:       ; %bb.0: ; %main_body
-; GFX940-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
-; GFX940-NEXT:    s_load_dword s8, s[4:5], 0x3c
-; GFX940-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b64_e32 v[0:1], s[6:7]
-; GFX940-NEXT:    v_mov_b32_e32 v2, s8
-; GFX940-NEXT:    buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 offen
-; GFX940-NEXT:    s_endpgm
+; GFX942-LABEL: raw_buffer_atomic_max_noret_f64:
+; GFX942:       ; %bb.0: ; %main_body
+; GFX942-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
+; GFX942-NEXT:    s_load_dword s8, s[4:5], 0x3c
+; GFX942-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b64_e32 v[0:1], s[6:7]
+; GFX942-NEXT:    v_mov_b32_e32 v2, s8
+; GFX942-NEXT:    buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 offen
+; GFX942-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.raw.buffer.atomic.fmax.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0)
   ret void
@@ -715,12 +715,12 @@ define amdgpu_ps void @raw_buffer_atomic_max_rtn_f64(<4 x i32> inreg %rsrc, doub
 ; GFX90A-NEXT:    flat_store_dwordx2 v[0:1], v[0:1]
 ; GFX90A-NEXT:    s_endpgm
 ;
-; GFX940-LABEL: raw_buffer_atomic_max_rtn_f64:
-; GFX940:       ; %bb.0: ; %main_body
-; GFX940-NEXT:    buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 offen sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    flat_store_dwordx2 v[0:1], v[0:1] sc0 sc1
-; GFX940-NEXT:    s_endpgm
+; GFX942-LABEL: raw_buffer_atomic_max_rtn_f64:
+; GFX942:       ; %bb.0: ; %main_body
+; GFX942-NEXT:    buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 offen sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    flat_store_dwordx2 v[0:1], v[0:1] sc0 sc1
+; GFX942-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.raw.buffer.atomic.fmax.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0)
   store double %ret, ptr undef
@@ -743,20 +743,20 @@ define amdgpu_kernel void @raw_buffer_atomic_max_rtn_f64_off4_slc(<4 x i32> %rsr
 ; GFX90A-NEXT:    global_store_dwordx2 v2, v[0:1], s[8:9]
 ; GFX90A-NEXT:    s_endpgm
 ;
-; GFX940-LABEL: raw_buffer_atomic_max_rtn_f64_off4_slc:
-; GFX940:       ; %bb.0: ; %main_body
-; GFX940-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
-; GFX940-NEXT:    s_load_dword s10, s[4:5], 0x3c
-; GFX940-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX940-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x44
-; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b64_e32 v[0:1], s[6:7]
-; GFX940-NEXT:    v_mov_b32_e32 v2, s10
-; GFX940-NEXT:    buffer_atomic_max_f64 v[0:1], v2, s[0:3], 4 offen sc0 nt
-; GFX940-NEXT:    v_mov_b32_e32 v2, 0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    global_store_dwordx2 v2, v[0:1], s[8:9] sc0 sc1
-; GFX940-NEXT:    s_endpgm
+; GFX942-LABEL: raw_buffer_atomic_max_rtn_f64_off4_slc:
+; GFX942:       ; %bb.0: ; %main_body
+; GFX942-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
+; GFX942-NEXT:    s_load_dword s10, s[4:5], 0x3c
+; GFX942-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX942-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x44
+; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b64_e32 v[0:1], s[6:7]
+; GFX942-NEXT:    v_mov_b32_e32 v2, s10
+; GFX942-NEXT:    buffer_atomic_max_f64 v[0:1], v2, s[0:3], 4 offen sc0 nt
+; GFX942-NEXT:    v_mov_b32_e32 v2, 0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    global_store_dwordx2 v2, v[0:1], s[8:9] sc0 sc1
+; GFX942-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.raw.buffer.atomic.fmax.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i32 2)
   store double %ret, ptr addrspace(1) %out, align 8
@@ -775,16 +775,16 @@ define amdgpu_kernel void @raw_ptr_buffer_atomic_max_noret_f64(ptr addrspace(8)
 ; GFX90A-NEXT:    buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 offen
 ; GFX90A-NEXT:    s_endpgm
 ;
-; GFX940-LABEL: raw_ptr_buffer_atomic_max_noret_f64:
-; GFX940:       ; %bb.0: ; %main_body
-; GFX940-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
-; GFX940-NEXT:    s_load_dword s8, s[4:5], 0x3c
-; GFX940-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b64_e32 v[0:1], s[6:7]
-; GFX940-NEXT:    v_mov_b32_e32 v2, s8
-; GFX940-NEXT:    buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 offen
-; GFX940-NEXT:    s_endpgm
+; GFX942-LABEL: raw_ptr_buffer_atomic_max_noret_f64:
+; GFX942:       ; %bb.0: ; %main_body
+; GFX942-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
+; GFX942-NEXT:    s_load_dword s8, s[4:5], 0x3c
+; GFX942-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b64_e32 v[0:1], s[6:7]
+; GFX942-NEXT:    v_mov_b32_e32 v2, s8
+; GFX942-NEXT:    buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 offen
+; GFX942-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fmax.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0)
   ret void
@@ -798,12 +798,12 @@ define amdgpu_ps void @raw_ptr_buffer_atomic_max_rtn_f64(ptr addrspace(8) inreg
 ; GFX90A-NEXT:    flat_store_dwordx2 v[0:1], v[0:1]
 ; GFX90A-NEXT:    s_endpgm
 ;
-; GFX940-LABEL: raw_ptr_buffer_atomic_max_rtn_f64:
-; GFX940:       ; %bb.0: ; %main_body
-; GFX940-NEXT:    buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 offen sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    flat_store_dwordx2 v[0:1], v[0:1] sc0 sc1
-; GFX940-NEXT:    s_endpgm
+; GFX942-LABEL: raw_ptr_buffer_atomic_max_rtn_f64:
+; GFX942:       ; %bb.0: ; %main_body
+; GFX942-NEXT:    buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 offen sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    flat_store_dwordx2 v[0:1], v[0:1] sc0 sc1
+; GFX942-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fmax.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0)
   store double %ret, ptr undef
@@ -826,20 +826,20 @@ define amdgpu_kernel void @raw_ptr_buffer_atomic_max_rtn_f64_off4_slc(ptr addrsp
 ; GFX90A-NEXT:    global_store_dwordx2 v2, v[0:1], s[8:9]
 ; GFX90A-NEXT:    s_endpgm
 ;
-; GFX940-LABEL: raw_ptr_buffer_atomic_max_rtn_f64_off4_slc:
-; GFX940:       ; %bb.0: ; %main_body
-; GFX940-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
-; GFX940-NEXT:    s_load_dword s10, s[4:5], 0x3c
-; GFX940-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX940-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x44
-; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b64_e32 v[0:1], s[6:7]
-; GFX940-NEXT:    v_mov_b32_e32 v2, s10
-; GFX940-NEXT:    buffer_atomic_max_f64 v[0:1], v2, s[0:3], 4 offen sc0 nt
-; GFX940-NEXT:    v_mov_b32_e32 v2, 0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    global_store_dwordx2 v2, v[0:1], s[8:9] sc0 sc1
-; GFX940-NEXT:    s_endpgm
+; GFX942-LABEL: raw_ptr_buffer_atomic_max_rtn_f64_off4_slc:
+; GFX942:       ; %bb.0: ; %main_body
+; GFX942-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
+; GFX942-NEXT:    s_load_dword s10, s[4:5], 0x3c
+; GFX942-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX942-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x44
+; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b64_e32 v[0:1], s[6:7]
+; GFX942-NEXT:    v_mov_b32_e32 v2, s10
+; GFX942-NEXT:    buffer_atomic_max_f64 v[0:1], v2, s[0:3], 4 offen sc0 nt
+; GFX942-NEXT:    v_mov_b32_e32 v2, 0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    global_store_dwordx2 v2, v[0:1], s[8:9] sc0 sc1
+; GFX942-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fmax.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 4, i32 2)
   store double %ret, ptr addrspace(1) %out, align 8
@@ -858,16 +858,16 @@ define amdgpu_kernel void @struct_buffer_atomic_max_noret_f64(<4 x i32> %rsrc, d
 ; GFX90A-NEXT:    buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 idxen
 ; GFX90A-NEXT:    s_endpgm
 ;
-; GFX940-LABEL: struct_buffer_atomic_max_noret_f64:
-; GFX940:       ; %bb.0: ; %main_body
-; GFX940-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
-; GFX940-NEXT:    s_load_dword s8, s[4:5], 0x3c
-; GFX940-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b64_e32 v[0:1], s[6:7]
-; GFX940-NEXT:    v_mov_b32_e32 v2, s8
-; GFX940-NEXT:    buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 idxen
-; GFX940-NEXT:    s_endpgm
+; GFX942-LABEL: struct_buffer_atomic_max_noret_f64:
+; GFX942:       ; %bb.0: ; %main_body
+; GFX942-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
+; GFX942-NEXT:    s_load_dword s8, s[4:5], 0x3c
+; GFX942-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b64_e32 v[0:1], s[6:7]
+; GFX942-NEXT:    v_mov_b32_e32 v2, s8
+; GFX942-NEXT:    buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 idxen
+; GFX942-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.struct.buffer.atomic.fmax.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 0)
   ret void
@@ -881,12 +881,12 @@ define amdgpu_ps void @struct_buffer_atomic_max_rtn_f64(<4 x i32> inreg %rsrc, d
 ; GFX90A-NEXT:    flat_store_dwordx2 v[0:1], v[0:1]
 ; GFX90A-NEXT:    s_endpgm
 ;
-; GFX940-LABEL: struct_buffer_atomic_max_rtn_f64:
-; GFX940:       ; %bb.0: ; %main_body
-; GFX940-NEXT:    buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 idxen sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    flat_store_dwordx2 v[0:1], v[0:1] sc0 sc1
-; GFX940-NEXT:    s_endpgm
+; GFX942-LABEL: struct_buffer_atomic_max_rtn_f64:
+; GFX942:       ; %bb.0: ; %main_body
+; GFX942-NEXT:    buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 idxen sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    flat_store_dwordx2 v[0:1], v[0:1] sc0 sc1
+; GFX942-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.struct.buffer.atomic.fmax.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 0)
   store double %ret, ptr undef
@@ -909,20 +909,20 @@ define amdgpu_kernel void @struct_buffer_atomic_max_rtn_f64_off4_slc(<4 x i32> %
 ; GFX90A-NEXT:    global_store_dwordx2 v2, v[0:1], s[8:9]
 ; GFX90A-NEXT:    s_endpgm
 ;
-; GFX940-LABEL: struct_buffer_atomic_max_rtn_f64_off4_slc:
-; GFX940:       ; %bb.0: ; %main_body
-; GFX940-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
-; GFX940-NEXT:    s_load_dword s10, s[4:5], 0x3c
-; GFX940-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX940-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x44
-; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b64_e32 v[0:1], s[6:7]
-; GFX940-NEXT:    v_mov_b32_e32 v2, s10
-; GFX940-NEXT:    buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 idxen offset:4 sc0 nt
-; GFX940-NEXT:    v_mov_b32_e32 v2, 0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    global_store_dwordx2 v2, v[0:1], s[8:9] sc0 sc1
-; GFX940-NEXT:    s_endpgm
+; GFX942-LABEL: struct_buffer_atomic_max_rtn_f64_off4_slc:
+; GFX942:       ; %bb.0: ; %main_body
+; GFX942-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
+; GFX942-NEXT:    s_load_dword s10, s[4:5], 0x3c
+; GFX942-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX942-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x44
+; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b64_e32 v[0:1], s[6:7]
+; GFX942-NEXT:    v_mov_b32_e32 v2, s10
+; GFX942-NEXT:    buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 idxen offset:4 sc0 nt
+; GFX942-NEXT:    v_mov_b32_e32 v2, 0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    global_store_dwordx2 v2, v[0:1], s[8:9] sc0 sc1
+; GFX942-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.struct.buffer.atomic.fmax.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i32 0, i32 2)
   store double %ret, ptr addrspace(1) %out, align 8
@@ -941,16 +941,16 @@ define amdgpu_kernel void @struct_ptr_buffer_atomic_max_noret_f64(ptr addrspace(
 ; GFX90A-NEXT:    buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 idxen
 ; GFX90A-NEXT:    s_endpgm
 ;
-; GFX940-LABEL: struct_ptr_buffer_atomic_max_noret_f64:
-; GFX940:       ; %bb.0: ; %main_body
-; GFX940-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
-; GFX940-NEXT:    s_load_dword s8, s[4:5], 0x3c
-; GFX940-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b64_e32 v[0:1], s[6:7]
-; GFX940-NEXT:    v_mov_b32_e32 v2, s8
-; GFX940-NEXT:    buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 idxen
-; GFX940-NEXT:    s_endpgm
+; GFX942-LABEL: struct_ptr_buffer_atomic_max_noret_f64:
+; GFX942:       ; %bb.0: ; %main_body
+; GFX942-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
+; GFX942-NEXT:    s_load_dword s8, s[4:5], 0x3c
+; GFX942-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b64_e32 v[0:1], s[6:7]
+; GFX942-NEXT:    v_mov_b32_e32 v2, s8
+; GFX942-NEXT:    buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 idxen
+; GFX942-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fmax.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0, i32 0)
   ret void
@@ -964,12 +964,12 @@ define amdgpu_ps void @struct_ptr_buffer_atomic_max_rtn_f64(ptr addrspace(8) inr
 ; GFX90A-NEXT:    flat_store_dwordx2 v[0:1], v[0:1]
 ; GFX90A-NEXT:    s_endpgm
 ;
-; GFX940-LABEL: struct_ptr_buffer_atomic_max_rtn_f64:
-; GFX940:       ; %bb.0: ; %main_body
-; GFX940-NEXT:    buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 idxen sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    flat_store_dwordx2 v[0:1], v[0:1] sc0 sc1
-; GFX940-NEXT:    s_endpgm
+; GFX942-LABEL: struct_ptr_buffer_atomic_max_rtn_f64:
+; GFX942:       ; %bb.0: ; %main_body
+; GFX942-NEXT:    buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 idxen sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    flat_store_dwordx2 v[0:1], v[0:1] sc0 sc1
+; GFX942-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fmax.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0, i32 0)
   store double %ret, ptr undef
@@ -992,20 +992,20 @@ define amdgpu_kernel void @struct_ptr_buffer_atomic_max_rtn_f64_off4_slc(ptr add
 ; GFX90A-NEXT:    global_store_dwordx2 v2, v[0:1], s[8:9]
 ; GFX90A-NEXT:    s_endpgm
 ;
-; GFX940-LABEL: struct_ptr_buffer_atomic_max_rtn_f64_off4_slc:
-; GFX940:       ; %bb.0: ; %main_body
-; GFX940-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
-; GFX940-NEXT:    s_load_dword s10, s[4:5], 0x3c
-; GFX940-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX940-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x44
-; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b64_e32 v[0:1], s[6:7]
-; GFX940-NEXT:    v_mov_b32_e32 v2, s10
-; GFX940-NEXT:    buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 idxen offset:4 sc0 nt
-; GFX940-NEXT:    v_mov_b32_e32 v2, 0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    global_store_dwordx2 v2, v[0:1], s[8:9] sc0 sc1
-; GFX940-NEXT:    s_endpgm
+; GFX942-LABEL: struct_ptr_buffer_atomic_max_rtn_f64_off4_slc:
+; GFX942:       ; %bb.0: ; %main_body
+; GFX942-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
+; GFX942-NEXT:    s_load_dword s10, s[4:5], 0x3c
+; GFX942-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX942-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x44
+; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b64_e32 v[0:1], s[6:7]
+; GFX942-NEXT:    v_mov_b32_e32 v2, s10
+; GFX942-NEXT:    buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 idxen offset:4 sc0 nt
+; GFX942-NEXT:    v_mov_b32_e32 v2, 0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    global_store_dwordx2 v2, v[0:1], s[8:9] sc0 sc1
+; GFX942-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fmax.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 4, i32 0, i32 2)
   store double %ret, ptr addrspace(1) %out, align 8
@@ -1027,17 +1027,17 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat(ptr addrspace(1) %pt
 ; GFX90A-NEXT:    buffer_wbinvl1_vol
 ; GFX90A-NEXT:    s_endpgm
 ;
-; GFX940-LABEL: global_atomic_fadd_f64_noret_pat:
-; GFX940:       ; %bb.0: ; %main_body
-; GFX940-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX940-NEXT:    v_mov_b32_e32 v2, 0
-; GFX940-NEXT:    v_mov_b64_e32 v[0:1], 4.0
-; GFX940-NEXT:    buffer_wbl2 sc0 sc1
-; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-NEXT:    global_atomic_add_f64 v2, v[0:1], s[0:1] sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc0 sc1
-; GFX940-NEXT:    s_endpgm
+; GFX942-LABEL: global_atomic_fadd_f64_noret_pat:
+; GFX942:       ; %bb.0: ; %main_body
+; GFX942-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX942-NEXT:    v_mov_b32_e32 v2, 0
+; GFX942-NEXT:    v_mov_b64_e32 v[0:1], 4.0
+; GFX942-NEXT:    buffer_wbl2 sc0 sc1
+; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-NEXT:    global_atomic_add_f64 v2, v[0:1], s[0:1] sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc0 sc1
+; GFX942-NEXT:    s_endpgm
 main_body:
   %ret = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 seq_cst, !amdgpu.no.fine.grained.memory !0
   ret void
@@ -1056,17 +1056,17 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_agent(ptr addrspace(
 ; GFX90A-NEXT:    buffer_wbinvl1_vol
 ; GFX90A-NEXT:    s_endpgm
 ;
-; GFX940-LABEL: global_atomic_fadd_f64_noret_pat_agent:
-; GFX940:       ; %bb.0: ; %main_body
-; GFX940-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX940-NEXT:    v_mov_b32_e32 v2, 0
-; GFX940-NEXT:    v_mov_b64_e32 v[0:1], 4.0
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-NEXT:    global_atomic_add_f64 v2, v[0:1], s[0:1]
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_endpgm
+; GFX942-LABEL: global_atomic_fadd_f64_noret_pat_agent:
+; GFX942:       ; %bb.0: ; %main_body
+; GFX942-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX942-NEXT:    v_mov_b32_e32 v2, 0
+; GFX942-NEXT:    v_mov_b64_e32 v[0:1], 4.0
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-NEXT:    global_atomic_add_f64 v2, v[0:1], s[0:1]
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_endpgm
 main_body:
   %ret = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
   ret void
@@ -1087,17 +1087,17 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_system(ptr addrspace
 ; GFX90A-NEXT:    buffer_wbinvl1_vol
 ; GFX90A-NEXT:    s_endpgm
 ;
-; GFX940-LABEL: global_atomic_fadd_f64_noret_pat_system:
-; GFX940:       ; %bb.0: ; %main_body
-; GFX940-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX940-NEXT:    v_mov_b32_e32 v2, 0
-; GFX940-NEXT:    v_mov_b64_e32 v[0:1], 4.0
-; GFX940-NEXT:    buffer_wbl2 sc0 sc1
-; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-NEXT:    global_atomic_add_f64 v2, v[0:1], s[0:1] sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc0 sc1
-; GFX940-NEXT:    s_endpgm
+; GFX942-LABEL: global_atomic_fadd_f64_noret_pat_system:
+; GFX942:       ; %bb.0: ; %main_body
+; GFX942-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX942-NEXT:    v_mov_b32_e32 v2, 0
+; GFX942-NEXT:    v_mov_b64_e32 v[0:1], 4.0
+; GFX942-NEXT:    buffer_wbl2 sc0 sc1
+; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-NEXT:    global_atomic_add_f64 v2, v[0:1], s[0:1] sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc0 sc1
+; GFX942-NEXT:    s_endpgm
 main_body:
   %ret = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 syncscope("one-as") seq_cst, !amdgpu.no.fine.grained.memory !0
   ret void
@@ -1116,17 +1116,17 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_flush(ptr addrspace(
 ; GFX90A-NEXT:    buffer_wbinvl1_vol
 ; GFX90A-NEXT:    s_endpgm
 ;
-; GFX940-LABEL: global_atomic_fadd_f64_noret_pat_flush:
-; GFX940:       ; %bb.0: ; %main_body
-; GFX940-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX940-NEXT:    v_mov_b32_e32 v2, 0
-; GFX940-NEXT:    v_mov_b64_e32 v[0:1], 4.0
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-NEXT:    global_atomic_add_f64 v2, v[0:1], s[0:1]
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_endpgm
+; GFX942-LABEL: global_atomic_fadd_f64_noret_pat_flush:
+; GFX942:       ; %bb.0: ; %main_body
+; GFX942-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX942-NEXT:    v_mov_b32_e32 v2, 0
+; GFX942-NEXT:    v_mov_b64_e32 v[0:1], 4.0
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-NEXT:    global_atomic_add_f64 v2, v[0:1], s[0:1]
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_endpgm
 main_body:
   %ret = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
   ret void
@@ -1145,15 +1145,15 @@ define double @global_atomic_fadd_f64_rtn_pat(ptr addrspace(1) %ptr, double %dat
 ; GFX90A-NEXT:    buffer_wbinvl1_vol
 ; GFX90A-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: global_atomic_fadd_f64_rtn_pat:
-; GFX940:       ; %bb.0: ; %main_body
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b64_e32 v[2:3], 4.0
-; GFX940-NEXT:    buffer_wbl2 sc0 sc1
-; GFX940-NEXT:    global_atomic_add_f64 v[0:1], v[0:1], v[2:3], off sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc0 sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: global_atomic_fadd_f64_rtn_pat:
+; GFX942:       ; %bb.0: ; %main_body
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b64_e32 v[2:3], 4.0
+; GFX942-NEXT:    buffer_wbl2 sc0 sc1
+; GFX942-NEXT:    global_atomic_add_f64 v[0:1], v[0:1], v[2:3], off sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc0 sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 main_body:
   %ret = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 seq_cst, !amdgpu.no.fine.grained.memory !0
   ret double %ret
@@ -1170,15 +1170,15 @@ define double @global_atomic_fadd_f64_rtn_pat_agent(ptr addrspace(1) %ptr, doubl
 ; GFX90A-NEXT:    buffer_wbinvl1_vol
 ; GFX90A-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: global_atomic_fadd_f64_rtn_pat_agent:
-; GFX940:       ; %bb.0: ; %main_body
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b64_e32 v[2:3], 4.0
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    global_atomic_add_f64 v[0:1], v[0:1], v[2:3], off sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: global_atomic_fadd_f64_rtn_pat_agent:
+; GFX942:       ; %bb.0: ; %main_body
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b64_e32 v[2:3], 4.0
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    global_atomic_add_f64 v[0:1], v[0:1], v[2:3], off sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 main_body:
   %ret = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
   ret double %ret
@@ -1197,15 +1197,15 @@ define double @global_atomic_fadd_f64_rtn_pat_system(ptr addrspace(1) %ptr, doub
 ; GFX90A-NEXT:    buffer_wbinvl1_vol
 ; GFX90A-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: global_atomic_fadd_f64_rtn_pat_system:
-; GFX940:       ; %bb.0: ; %main_body
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b64_e32 v[2:3], 4.0
-; GFX940-NEXT:    buffer_wbl2 sc0 sc1
-; GFX940-NEXT:    global_atomic_add_f64 v[0:1], v[0:1], v[2:3], off sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc0 sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: global_atomic_fadd_f64_rtn_pat_system:
+; GFX942:       ; %bb.0: ; %main_body
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b64_e32 v[2:3], 4.0
+; GFX942-NEXT:    buffer_wbl2 sc0 sc1
+; GFX942-NEXT:    global_atomic_add_f64 v[0:1], v[0:1], v[2:3], off sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc0 sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 main_body:
   %ret = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 syncscope("one-as") seq_cst, !amdgpu.no.fine.grained.memory !0
   ret double %ret
@@ -1235,17 +1235,17 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_agent_safe(ptr addrs
 ; GFX90A-NEXT:  ; %bb.2: ; %atomicrmw.end
 ; GFX90A-NEXT:    s_endpgm
 ;
-; GFX940-LABEL: global_atomic_fadd_f64_noret_pat_agent_safe:
-; GFX940:       ; %bb.0: ; %main_body
-; GFX940-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX940-NEXT:    v_mov_b32_e32 v2, 0
-; GFX940-NEXT:    v_mov_b64_e32 v[0:1], 4.0
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-NEXT:    global_atomic_add_f64 v2, v[0:1], s[0:1]
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_endpgm
+; GFX942-LABEL: global_atomic_fadd_f64_noret_pat_agent_safe:
+; GFX942:       ; %bb.0: ; %main_body
+; GFX942-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX942-NEXT:    v_mov_b32_e32 v2, 0
+; GFX942-NEXT:    v_mov_b64_e32 v[0:1], 4.0
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-NEXT:    global_atomic_add_f64 v2, v[0:1], s[0:1]
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_endpgm
 main_body:
   %ret = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 syncscope("agent") seq_cst
   ret void
@@ -1266,17 +1266,17 @@ define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat(ptr %ptr) #1 {
 ; GFX90A-NEXT:    buffer_wbinvl1_vol
 ; GFX90A-NEXT:    s_endpgm
 ;
-; GFX940-LABEL: flat_atomic_fadd_f64_noret_pat:
-; GFX940:       ; %bb.0: ; %main_body
-; GFX940-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX940-NEXT:    v_mov_b64_e32 v[0:1], 4.0
-; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b64_e32 v[2:3], s[0:1]
-; GFX940-NEXT:    buffer_wbl2 sc0 sc1
-; GFX940-NEXT:    flat_atomic_add_f64 v[2:3], v[0:1] sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc0 sc1
-; GFX940-NEXT:    s_endpgm
+; GFX942-LABEL: flat_atomic_fadd_f64_noret_pat:
+; GFX942:       ; %bb.0: ; %main_body
+; GFX942-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX942-NEXT:    v_mov_b64_e32 v[0:1], 4.0
+; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b64_e32 v[2:3], s[0:1]
+; GFX942-NEXT:    buffer_wbl2 sc0 sc1
+; GFX942-NEXT:    flat_atomic_add_f64 v[2:3], v[0:1] sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc0 sc1
+; GFX942-NEXT:    s_endpgm
 main_body:
   %ret = atomicrmw fadd ptr %ptr, double 4.0 seq_cst, !noalias.addrspace !1, !amdgpu.no.fine.grained.memory !0
   ret void
@@ -1295,17 +1295,17 @@ define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_agent(ptr %ptr) #1 {
 ; GFX90A-NEXT:    buffer_wbinvl1_vol
 ; GFX90A-NEXT:    s_endpgm
 ;
-; GFX940-LABEL: flat_atomic_fadd_f64_noret_pat_agent:
-; GFX940:       ; %bb.0: ; %main_body
-; GFX940-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX940-NEXT:    v_mov_b64_e32 v[0:1], 4.0
-; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b64_e32 v[2:3], s[0:1]
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_add_f64 v[2:3], v[0:1]
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_endpgm
+; GFX942-LABEL: flat_atomic_fadd_f64_noret_pat_agent:
+; GFX942:       ; %bb.0: ; %main_body
+; GFX942-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX942-NEXT:    v_mov_b64_e32 v[0:1], 4.0
+; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b64_e32 v[2:3], s[0:1]
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_add_f64 v[2:3], v[0:1]
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_endpgm
 main_body:
   %ret = atomicrmw fadd ptr %ptr, double 4.0 syncscope("agent") seq_cst, !noalias.addrspace !1, !amdgpu.no.fine.grained.memory !0
   ret void
@@ -1326,17 +1326,17 @@ define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_system(ptr %ptr) #1 {
 ; GFX90A-NEXT:    buffer_wbinvl1_vol
 ; GFX90A-NEXT:    s_endpgm
 ;
-; GFX940-LABEL: flat_atomic_fadd_f64_noret_pat_system:
-; GFX940:       ; %bb.0: ; %main_body
-; GFX940-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX940-NEXT:    v_mov_b64_e32 v[0:1], 4.0
-; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b64_e32 v[2:3], s[0:1]
-; GFX940-NEXT:    buffer_wbl2 sc0 sc1
-; GFX940-NEXT:    flat_atomic_add_f64 v[2:3], v[0:1] sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc0 sc1
-; GFX940-NEXT:    s_endpgm
+; GFX942-LABEL: flat_atomic_fadd_f64_noret_pat_system:
+; GFX942:       ; %bb.0: ; %main_body
+; GFX942-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX942-NEXT:    v_mov_b64_e32 v[0:1], 4.0
+; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b64_e32 v[2:3], s[0:1]
+; GFX942-NEXT:    buffer_wbl2 sc0 sc1
+; GFX942-NEXT:    flat_atomic_add_f64 v[2:3], v[0:1] sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc0 sc1
+; GFX942-NEXT:    s_endpgm
 main_body:
   %ret = atomicrmw fadd ptr %ptr, double 4.0 syncscope("one-as") seq_cst, !noalias.addrspace !1, !amdgpu.no.fine.grained.memory !0
   ret void
@@ -1355,15 +1355,15 @@ define double @flat_atomic_fadd_f64_rtn_pat(ptr %ptr) #1 {
 ; GFX90A-NEXT:    buffer_wbinvl1_vol
 ; GFX90A-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_atomic_fadd_f64_rtn_pat:
-; GFX940:       ; %bb.0: ; %main_body
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b64_e32 v[2:3], 4.0
-; GFX940-NEXT:    buffer_wbl2 sc0 sc1
-; GFX940-NEXT:    flat_atomic_add_f64 v[0:1], v[0:1], v[2:3] sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc0 sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_atomic_fadd_f64_rtn_pat:
+; GFX942:       ; %bb.0: ; %main_body
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b64_e32 v[2:3], 4.0
+; GFX942-NEXT:    buffer_wbl2 sc0 sc1
+; GFX942-NEXT:    flat_atomic_add_f64 v[0:1], v[0:1], v[2:3] sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc0 sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 main_body:
   %ret = atomicrmw fadd ptr %ptr, double 4.0 seq_cst, !noalias.addrspace !1, !amdgpu.no.fine.grained.memory !0
   ret double %ret
@@ -1380,15 +1380,15 @@ define double @flat_atomic_fadd_f64_rtn_pat_agent(ptr %ptr) #1 {
 ; GFX90A-NEXT:    buffer_wbinvl1_vol
 ; GFX90A-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_atomic_fadd_f64_rtn_pat_agent:
-; GFX940:       ; %bb.0: ; %main_body
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b64_e32 v[2:3], 4.0
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_add_f64 v[0:1], v[0:1], v[2:3] sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_atomic_fadd_f64_rtn_pat_agent:
+; GFX942:       ; %bb.0: ; %main_body
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b64_e32 v[2:3], 4.0
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_add_f64 v[0:1], v[0:1], v[2:3] sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 main_body:
   %ret = atomicrmw fadd ptr %ptr, double 4.0 syncscope("agent") seq_cst, !noalias.addrspace !1, !amdgpu.no.fine.grained.memory !0
   ret double %ret
@@ -1408,16 +1408,16 @@ define double @flat_atomic_fadd_f64_rtn_pat_system(ptr %ptr) #1 {
 ; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX90A-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: flat_atomic_fadd_f64_rtn_pat_system:
-; GFX940:       ; %bb.0: ; %main_body
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b64_e32 v[2:3], 4.0
-; GFX940-NEXT:    buffer_wbl2 sc0 sc1
-; GFX940-NEXT:    flat_atomic_add_f64 v[0:1], v[0:1], v[2:3] sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc0 sc1
-; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: flat_atomic_fadd_f64_rtn_pat_system:
+; GFX942:       ; %bb.0: ; %main_body
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b64_e32 v[2:3], 4.0
+; GFX942-NEXT:    buffer_wbl2 sc0 sc1
+; GFX942-NEXT:    flat_atomic_add_f64 v[0:1], v[0:1], v[2:3] sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc0 sc1
+; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 main_body:
   %ret = atomicrmw fadd ptr %ptr, double 4.0 syncscope("one-as") seq_cst, !noalias.addrspace !1, !amdgpu.no.fine.grained.memory !0
   ret double %ret
@@ -1447,17 +1447,17 @@ define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_agent_safe(ptr %ptr) {
 ; GFX90A-NEXT:  ; %bb.2: ; %atomicrmw.end
 ; GFX90A-NEXT:    s_endpgm
 ;
-; GFX940-LABEL: flat_atomic_fadd_f64_noret_pat_agent_safe:
-; GFX940:       ; %bb.0: ; %main_body
-; GFX940-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX940-NEXT:    v_mov_b64_e32 v[0:1], 4.0
-; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b64_e32 v[2:3], s[0:1]
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    flat_atomic_add_f64 v[2:3], v[0:1]
-; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_endpgm
+; GFX942-LABEL: flat_atomic_fadd_f64_noret_pat_agent_safe:
+; GFX942:       ; %bb.0: ; %main_body
+; GFX942-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX942-NEXT:    v_mov_b64_e32 v[0:1], 4.0
+; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b64_e32 v[2:3], s[0:1]
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    flat_atomic_add_f64 v[2:3], v[0:1]
+; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_endpgm
 main_body:
   %ret = atomicrmw fadd ptr %ptr, double 4.0 syncscope("agent") seq_cst, !noalias.addrspace !1
   ret void
@@ -1475,16 +1475,16 @@ define amdgpu_kernel void @local_atomic_fadd_f64_noret(ptr addrspace(3) %ptr, do
 ; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX90A-NEXT:    s_endpgm
 ;
-; GFX940-LABEL: local_atomic_fadd_f64_noret:
-; GFX940:       ; %bb.0: ; %main_body
-; GFX940-NEXT:    s_load_dword s2, s[4:5], 0x24
-; GFX940-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x2c
-; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v2, s2
-; GFX940-NEXT:    v_mov_b64_e32 v[0:1], s[0:1]
-; GFX940-NEXT:    ds_add_f64 v2, v[0:1]
-; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-NEXT:    s_endpgm
+; GFX942-LABEL: local_atomic_fadd_f64_noret:
+; GFX942:       ; %bb.0: ; %main_body
+; GFX942-NEXT:    s_load_dword s2, s[4:5], 0x24
+; GFX942-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x2c
+; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v2, s2
+; GFX942-NEXT:    v_mov_b64_e32 v[0:1], s[0:1]
+; GFX942-NEXT:    ds_add_f64 v2, v[0:1]
+; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.ds.fadd.f64(ptr addrspace(3) %ptr, double %data, i32 0, i32 0, i1 0)
   ret void
@@ -1500,14 +1500,14 @@ define double @local_atomic_fadd_f64_rtn(ptr addrspace(3) %ptr, double %data) {
 ; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX90A-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: local_atomic_fadd_f64_rtn:
-; GFX940:       ; %bb.0: ; %main_body
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v3, v2
-; GFX940-NEXT:    v_mov_b32_e32 v2, v1
-; GFX940-NEXT:    ds_add_rtn_f64 v[0:1], v0, v[2:3]
-; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: local_atomic_fadd_f64_rtn:
+; GFX942:       ; %bb.0: ; %main_body
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v3, v2
+; GFX942-NEXT:    v_mov_b32_e32 v2, v1
+; GFX942-NEXT:    ds_add_rtn_f64 v[0:1], v0, v[2:3]
+; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 main_body:
   %ret = call double @llvm.amdgcn.ds.fadd.f64(ptr addrspace(3) %ptr, double %data, i32 0, i32 0, i1 0)
   ret double %ret
@@ -1525,15 +1525,15 @@ define amdgpu_kernel void @local_atomic_fadd_f64_noret_pat(ptr addrspace(3) %ptr
 ; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX90A-NEXT:    s_endpgm
 ;
-; GFX940-LABEL: local_atomic_fadd_f64_noret_pat:
-; GFX940:       ; %bb.0: ; %main_body
-; GFX940-NEXT:    s_load_dword s0, s[4:5], 0x24
-; GFX940-NEXT:    v_mov_b64_e32 v[0:1], 4.0
-; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v2, s0
-; GFX940-NEXT:    ds_add_f64 v2, v[0:1]
-; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-NEXT:    s_endpgm
+; GFX942-LABEL: local_atomic_fadd_f64_noret_pat:
+; GFX942:       ; %bb.0: ; %main_body
+; GFX942-NEXT:    s_load_dword s0, s[4:5], 0x24
+; GFX942-NEXT:    v_mov_b64_e32 v[0:1], 4.0
+; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v2, s0
+; GFX942-NEXT:    ds_add_f64 v2, v[0:1]
+; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-NEXT:    s_endpgm
 main_body:
   %ret = atomicrmw fadd ptr addrspace(3) %ptr, double 4.0 seq_cst, !amdgpu.no.fine.grained.memory !0
   ret void
@@ -1551,15 +1551,15 @@ define amdgpu_kernel void @local_atomic_fadd_f64_noret_pat_flush(ptr addrspace(3
 ; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX90A-NEXT:    s_endpgm
 ;
-; GFX940-LABEL: local_atomic_fadd_f64_noret_pat_flush:
-; GFX940:       ; %bb.0: ; %main_body
-; GFX940-NEXT:    s_load_dword s0, s[4:5], 0x24
-; GFX940-NEXT:    v_mov_b64_e32 v[0:1], 4.0
-; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v2, s0
-; GFX940-NEXT:    ds_add_f64 v2, v[0:1]
-; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-NEXT:    s_endpgm
+; GFX942-LABEL: local_atomic_fadd_f64_noret_pat_flush:
+; GFX942:       ; %bb.0: ; %main_body
+; GFX942-NEXT:    s_load_dword s0, s[4:5], 0x24
+; GFX942-NEXT:    v_mov_b64_e32 v[0:1], 4.0
+; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v2, s0
+; GFX942-NEXT:    ds_add_f64 v2, v[0:1]
+; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-NEXT:    s_endpgm
 main_body:
   %ret = atomicrmw fadd ptr addrspace(3) %ptr, double 4.0 seq_cst, !amdgpu.no.fine.grained.memory !0
   ret void
@@ -1577,15 +1577,15 @@ define amdgpu_kernel void @local_atomic_fadd_f64_noret_pat_flush_safe(ptr addrsp
 ; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX90A-NEXT:    s_endpgm
 ;
-; GFX940-LABEL: local_atomic_fadd_f64_noret_pat_flush_safe:
-; GFX940:       ; %bb.0: ; %main_body
-; GFX940-NEXT:    s_load_dword s0, s[4:5], 0x24
-; GFX940-NEXT:    v_mov_b64_e32 v[0:1], 4.0
-; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v2, s0
-; GFX940-NEXT:    ds_add_f64 v2, v[0:1]
-; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-NEXT:    s_endpgm
+; GFX942-LABEL: local_atomic_fadd_f64_noret_pat_flush_safe:
+; GFX942:       ; %bb.0: ; %main_body
+; GFX942-NEXT:    s_load_dword s0, s[4:5], 0x24
+; GFX942-NEXT:    v_mov_b64_e32 v[0:1], 4.0
+; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v2, s0
+; GFX942-NEXT:    ds_add_f64 v2, v[0:1]
+; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-NEXT:    s_endpgm
 main_body:
   %ret = atomicrmw fadd ptr addrspace(3) %ptr, double 4.0 seq_cst
   ret void
@@ -1601,13 +1601,13 @@ define double @local_atomic_fadd_f64_rtn_pat(ptr addrspace(3) %ptr, double %data
 ; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX90A-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: local_atomic_fadd_f64_rtn_pat:
-; GFX940:       ; %bb.0: ; %main_body
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b64_e32 v[2:3], 4.0
-; GFX940-NEXT:    ds_add_rtn_f64 v[0:1], v0, v[2:3]
-; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: local_atomic_fadd_f64_rtn_pat:
+; GFX942:       ; %bb.0: ; %main_body
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b64_e32 v[2:3], 4.0
+; GFX942-NEXT:    ds_add_rtn_f64 v[0:1], v0, v[2:3]
+; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 main_body:
   %ret = atomicrmw fadd ptr addrspace(3) %ptr, double 4.0 seq_cst, !amdgpu.no.fine.grained.memory !0
   ret double %ret
@@ -1623,14 +1623,14 @@ define double @local_atomic_fadd_f64_rtn_ieee_unsafe(ptr addrspace(3) %ptr, doub
 ; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX90A-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: local_atomic_fadd_f64_rtn_ieee_unsafe:
-; GFX940:       ; %bb.0: ; %main_body
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v3, v2
-; GFX940-NEXT:    v_mov_b32_e32 v2, v1
-; GFX940-NEXT:    ds_add_rtn_f64 v[0:1], v0, v[2:3]
-; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: local_atomic_fadd_f64_rtn_ieee_unsafe:
+; GFX942:       ; %bb.0: ; %main_body
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v3, v2
+; GFX942-NEXT:    v_mov_b32_e32 v2, v1
+; GFX942-NEXT:    ds_add_rtn_f64 v[0:1], v0, v[2:3]
+; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 main_body:
   %ret = call double @llvm.amdgcn.ds.fadd.f64(ptr addrspace(3) %ptr, double %data, i32 0, i32 0, i1 0)
   ret double %ret
@@ -1646,14 +1646,14 @@ define double @local_atomic_fadd_f64_rtn_ieee_safe(ptr addrspace(3) %ptr, double
 ; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX90A-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: local_atomic_fadd_f64_rtn_ieee_safe:
-; GFX940:       ; %bb.0: ; %main_body
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v3, v2
-; GFX940-NEXT:    v_mov_b32_e32 v2, v1
-; GFX940-NEXT:    ds_add_rtn_f64 v[0:1], v0, v[2:3]
-; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: local_atomic_fadd_f64_rtn_ieee_safe:
+; GFX942:       ; %bb.0: ; %main_body
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v3, v2
+; GFX942-NEXT:    v_mov_b32_e32 v2, v1
+; GFX942-NEXT:    ds_add_rtn_f64 v[0:1], v0, v[2:3]
+; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 main_body:
   %ret = call double @llvm.amdgcn.ds.fadd.f64(ptr addrspace(3) %ptr, double %data, i32 0, i32 0, i1 0)
   ret double %ret
diff --git llvm/test/CodeGen/AMDGPU/gfx940-hazards.mir llvm/test/CodeGen/AMDGPU/gfx942-hazards.mir
similarity index 99%
rename from llvm/test/CodeGen/AMDGPU/gfx940-hazards.mir
rename to llvm/test/CodeGen/AMDGPU/gfx942-hazards.mir
index 348beb7bcf3c..2c760baf8a5e 100644
--- llvm/test/CodeGen/AMDGPU/gfx940-hazards.mir
+++ llvm/test/CodeGen/AMDGPU/gfx942-hazards.mir
@@ -1,4 +1,4 @@
-# RUN: llc -mtriple=amdgcn -mcpu=gfx940 -verify-machineinstrs -run-pass post-RA-hazard-rec %s -o - | FileCheck -check-prefix=GCN %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx942 -verify-machineinstrs -run-pass post-RA-hazard-rec %s -o - | FileCheck -check-prefix=GCN %s
 
 # GCN-LABEL: name: trans32_write_non_trans32_read
 # GCN:      V_RCP_F32
diff --git llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f32-no-rtn.ll llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f32-no-rtn.ll
index 8a7762fb4b6c..8eb6cecd6ba9 100644
--- llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f32-no-rtn.ll
+++ llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f32-no-rtn.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx908 -verify-machineinstrs -stop-after=amdgpu-isel -amdgpu-atomic-optimizer-strategy=DPP < %s | FileCheck -check-prefixes=GFX908_GFX11_GFX12,GFX908 %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs -stop-after=amdgpu-isel -amdgpu-atomic-optimizer-strategy=DPP < %s | FileCheck -check-prefixes=GFX90A_GFX940 %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx940 -verify-machineinstrs -stop-after=amdgpu-isel -amdgpu-atomic-optimizer-strategy=DPP < %s | FileCheck -check-prefixes=GFX90A_GFX940 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs -stop-after=amdgpu-isel -amdgpu-atomic-optimizer-strategy=DPP < %s | FileCheck -check-prefixes=GFX90A_GFX942 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -verify-machineinstrs -stop-after=amdgpu-isel -amdgpu-atomic-optimizer-strategy=DPP < %s | FileCheck -check-prefixes=GFX90A_GFX942 %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs -stop-after=amdgpu-isel -amdgpu-atomic-optimizer-strategy=DPP < %s | FileCheck -check-prefixes=GFX908_GFX11_GFX12,GFX11_GFX12 %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -stop-after=amdgpu-isel -amdgpu-atomic-optimizer-strategy=DPP < %s | FileCheck -check-prefixes=GFX908_GFX11_GFX12,GFX11_GFX12 %s
 
@@ -18,17 +18,17 @@ define amdgpu_ps void @global_atomic_fadd_f32_no_rtn_atomicrmw(ptr addrspace(1)
   ; GFX908_GFX11_GFX12-NEXT:   GLOBAL_ATOMIC_ADD_F32 killed [[COPY3]], [[COPY]], 0, 0, implicit $exec :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr, addrspace 1)
   ; GFX908_GFX11_GFX12-NEXT:   S_ENDPGM 0
   ;
-  ; GFX90A_GFX940-LABEL: name: global_atomic_fadd_f32_no_rtn_atomicrmw
-  ; GFX90A_GFX940: bb.0 (%ir-block.0):
-  ; GFX90A_GFX940-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2
-  ; GFX90A_GFX940-NEXT: {{  $}}
-  ; GFX90A_GFX940-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY3:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]]
-  ; GFX90A_GFX940-NEXT:   GLOBAL_ATOMIC_ADD_F32 killed [[COPY3]], [[COPY]], 0, 0, implicit $exec :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr, addrspace 1)
-  ; GFX90A_GFX940-NEXT:   S_ENDPGM 0
+  ; GFX90A_GFX942-LABEL: name: global_atomic_fadd_f32_no_rtn_atomicrmw
+  ; GFX90A_GFX942: bb.0 (%ir-block.0):
+  ; GFX90A_GFX942-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2
+  ; GFX90A_GFX942-NEXT: {{  $}}
+  ; GFX90A_GFX942-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY3:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]]
+  ; GFX90A_GFX942-NEXT:   GLOBAL_ATOMIC_ADD_F32 killed [[COPY3]], [[COPY]], 0, 0, implicit $exec :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr, addrspace 1)
+  ; GFX90A_GFX942-NEXT:   S_ENDPGM 0
   %ret = atomicrmw fadd ptr addrspace(1) %ptr, float %data syncscope("wavefront") monotonic, !amdgpu.no.fine.grained.memory !0, !amdgpu.ignore.denormal.mode !0
   ret void
 }
@@ -96,67 +96,67 @@ define amdgpu_ps void @global_atomic_fadd_f32_saddr_no_rtn_atomicrmw(ptr addrspa
   ; GFX908-NEXT:   SI_END_CF [[SI_IF]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec
   ; GFX908-NEXT:   S_ENDPGM 0
   ;
-  ; GFX90A_GFX940-LABEL: name: global_atomic_fadd_f32_saddr_no_rtn_atomicrmw
-  ; GFX90A_GFX940: bb.0 (%ir-block.0):
-  ; GFX90A_GFX940-NEXT:   successors: %bb.1(0x40000000), %bb.4(0x40000000)
-  ; GFX90A_GFX940-NEXT:   liveins: $sgpr0, $sgpr1, $vgpr0
-  ; GFX90A_GFX940-NEXT: {{  $}}
-  ; GFX90A_GFX940-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr0
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY3:%[0-9]+]]:sreg_64_xexec_xnull = COPY [[REG_SEQUENCE]]
-  ; GFX90A_GFX940-NEXT:   [[SI_PS_LIVE:%[0-9]+]]:sreg_64 = SI_PS_LIVE
-  ; GFX90A_GFX940-NEXT:   [[SI_IF:%[0-9]+]]:sreg_64 = SI_IF killed [[SI_PS_LIVE]], %bb.4, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
-  ; GFX90A_GFX940-NEXT:   S_BRANCH %bb.1
-  ; GFX90A_GFX940-NEXT: {{  $}}
-  ; GFX90A_GFX940-NEXT: bb.1 (%ir-block.5):
-  ; GFX90A_GFX940-NEXT:   successors: %bb.2(0x40000000), %bb.3(0x40000000)
-  ; GFX90A_GFX940-NEXT: {{  $}}
-  ; GFX90A_GFX940-NEXT:   [[COPY4:%[0-9]+]]:sreg_64 = COPY $exec
-  ; GFX90A_GFX940-NEXT:   [[COPY5:%[0-9]+]]:sreg_32 = COPY [[COPY4]].sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY6:%[0-9]+]]:sreg_32 = COPY [[COPY4]].sub0
-  ; GFX90A_GFX940-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
-  ; GFX90A_GFX940-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
-  ; GFX90A_GFX940-NEXT:   [[V_MBCNT_LO_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_MBCNT_LO_U32_B32_e64 killed [[COPY6]], [[COPY7]], implicit $exec
-  ; GFX90A_GFX940-NEXT:   [[V_MBCNT_HI_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_MBCNT_HI_U32_B32_e64 killed [[COPY5]], killed [[V_MBCNT_LO_U32_B32_e64_]], implicit $exec
-  ; GFX90A_GFX940-NEXT:   [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -2147483648, implicit $exec
-  ; GFX90A_GFX940-NEXT:   [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
-  ; GFX90A_GFX940-NEXT:   [[V_SET_INACTIVE_B32_:%[0-9]+]]:vgpr_32 = V_SET_INACTIVE_B32 0, [[COPY]], 0, [[V_MOV_B32_e32_]], killed [[DEF]], implicit $exec
-  ; GFX90A_GFX940-NEXT:   [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_SET_INACTIVE_B32_]], 273, 15, 15, 0, implicit $exec
-  ; GFX90A_GFX940-NEXT:   [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_SET_INACTIVE_B32_]], 0, killed [[V_MOV_B32_dpp]], 0, 0, implicit $mode, implicit $exec
-  ; GFX90A_GFX940-NEXT:   [[V_MOV_B32_dpp1:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_ADD_F32_e64_]], 274, 15, 15, 0, implicit $exec
-  ; GFX90A_GFX940-NEXT:   [[V_ADD_F32_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_]], 0, killed [[V_MOV_B32_dpp1]], 0, 0, implicit $mode, implicit $exec
-  ; GFX90A_GFX940-NEXT:   [[V_MOV_B32_dpp2:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_ADD_F32_e64_1]], 276, 15, 15, 0, implicit $exec
-  ; GFX90A_GFX940-NEXT:   [[V_ADD_F32_e64_2:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_1]], 0, killed [[V_MOV_B32_dpp2]], 0, 0, implicit $mode, implicit $exec
-  ; GFX90A_GFX940-NEXT:   [[V_MOV_B32_dpp3:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_ADD_F32_e64_2]], 280, 15, 15, 0, implicit $exec
-  ; GFX90A_GFX940-NEXT:   [[V_ADD_F32_e64_3:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_2]], 0, killed [[V_MOV_B32_dpp3]], 0, 0, implicit $mode, implicit $exec
-  ; GFX90A_GFX940-NEXT:   [[V_MOV_B32_dpp4:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_ADD_F32_e64_3]], 322, 10, 15, 0, implicit $exec
-  ; GFX90A_GFX940-NEXT:   [[V_ADD_F32_e64_4:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_3]], 0, killed [[V_MOV_B32_dpp4]], 0, 0, implicit $mode, implicit $exec
-  ; GFX90A_GFX940-NEXT:   [[V_MOV_B32_dpp5:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_ADD_F32_e64_4]], 323, 12, 15, 0, implicit $exec
-  ; GFX90A_GFX940-NEXT:   [[V_ADD_F32_e64_5:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_4]], 0, killed [[V_MOV_B32_dpp5]], 0, 0, implicit $mode, implicit $exec
-  ; GFX90A_GFX940-NEXT:   [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 63
-  ; GFX90A_GFX940-NEXT:   [[V_READLANE_B32_:%[0-9]+]]:sreg_32 = V_READLANE_B32 killed [[V_ADD_F32_e64_5]], killed [[S_MOV_B32_1]]
-  ; GFX90A_GFX940-NEXT:   early-clobber %1:sgpr_32 = STRICT_WWM killed [[V_READLANE_B32_]], implicit $exec
-  ; GFX90A_GFX940-NEXT:   [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 killed [[V_MBCNT_HI_U32_B32_e64_]], [[S_MOV_B32_]], implicit $exec
-  ; GFX90A_GFX940-NEXT:   [[SI_IF1:%[0-9]+]]:sreg_64 = SI_IF killed [[V_CMP_EQ_U32_e64_]], %bb.3, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
-  ; GFX90A_GFX940-NEXT:   S_BRANCH %bb.2
-  ; GFX90A_GFX940-NEXT: {{  $}}
-  ; GFX90A_GFX940-NEXT: bb.2 (%ir-block.31):
-  ; GFX90A_GFX940-NEXT:   successors: %bb.3(0x80000000)
-  ; GFX90A_GFX940-NEXT: {{  $}}
-  ; GFX90A_GFX940-NEXT:   [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
-  ; GFX90A_GFX940-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY %1
-  ; GFX90A_GFX940-NEXT:   GLOBAL_ATOMIC_ADD_F32_SADDR killed [[V_MOV_B32_e32_1]], [[COPY8]], [[COPY3]], 0, 0, implicit $exec :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr, addrspace 1)
-  ; GFX90A_GFX940-NEXT: {{  $}}
-  ; GFX90A_GFX940-NEXT: bb.3.Flow:
-  ; GFX90A_GFX940-NEXT:   successors: %bb.4(0x80000000)
-  ; GFX90A_GFX940-NEXT: {{  $}}
-  ; GFX90A_GFX940-NEXT:   SI_END_CF [[SI_IF1]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec
-  ; GFX90A_GFX940-NEXT: {{  $}}
-  ; GFX90A_GFX940-NEXT: bb.4 (%ir-block.33):
-  ; GFX90A_GFX940-NEXT:   SI_END_CF [[SI_IF]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec
-  ; GFX90A_GFX940-NEXT:   S_ENDPGM 0
+  ; GFX90A_GFX942-LABEL: name: global_atomic_fadd_f32_saddr_no_rtn_atomicrmw
+  ; GFX90A_GFX942: bb.0 (%ir-block.0):
+  ; GFX90A_GFX942-NEXT:   successors: %bb.1(0x40000000), %bb.4(0x40000000)
+  ; GFX90A_GFX942-NEXT:   liveins: $sgpr0, $sgpr1, $vgpr0
+  ; GFX90A_GFX942-NEXT: {{  $}}
+  ; GFX90A_GFX942-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY3:%[0-9]+]]:sreg_64_xexec_xnull = COPY [[REG_SEQUENCE]]
+  ; GFX90A_GFX942-NEXT:   [[SI_PS_LIVE:%[0-9]+]]:sreg_64 = SI_PS_LIVE
+  ; GFX90A_GFX942-NEXT:   [[SI_IF:%[0-9]+]]:sreg_64 = SI_IF killed [[SI_PS_LIVE]], %bb.4, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+  ; GFX90A_GFX942-NEXT:   S_BRANCH %bb.1
+  ; GFX90A_GFX942-NEXT: {{  $}}
+  ; GFX90A_GFX942-NEXT: bb.1 (%ir-block.5):
+  ; GFX90A_GFX942-NEXT:   successors: %bb.2(0x40000000), %bb.3(0x40000000)
+  ; GFX90A_GFX942-NEXT: {{  $}}
+  ; GFX90A_GFX942-NEXT:   [[COPY4:%[0-9]+]]:sreg_64 = COPY $exec
+  ; GFX90A_GFX942-NEXT:   [[COPY5:%[0-9]+]]:sreg_32 = COPY [[COPY4]].sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY6:%[0-9]+]]:sreg_32 = COPY [[COPY4]].sub0
+  ; GFX90A_GFX942-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+  ; GFX90A_GFX942-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
+  ; GFX90A_GFX942-NEXT:   [[V_MBCNT_LO_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_MBCNT_LO_U32_B32_e64 killed [[COPY6]], [[COPY7]], implicit $exec
+  ; GFX90A_GFX942-NEXT:   [[V_MBCNT_HI_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_MBCNT_HI_U32_B32_e64 killed [[COPY5]], killed [[V_MBCNT_LO_U32_B32_e64_]], implicit $exec
+  ; GFX90A_GFX942-NEXT:   [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -2147483648, implicit $exec
+  ; GFX90A_GFX942-NEXT:   [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
+  ; GFX90A_GFX942-NEXT:   [[V_SET_INACTIVE_B32_:%[0-9]+]]:vgpr_32 = V_SET_INACTIVE_B32 0, [[COPY]], 0, [[V_MOV_B32_e32_]], killed [[DEF]], implicit $exec
+  ; GFX90A_GFX942-NEXT:   [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_SET_INACTIVE_B32_]], 273, 15, 15, 0, implicit $exec
+  ; GFX90A_GFX942-NEXT:   [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_SET_INACTIVE_B32_]], 0, killed [[V_MOV_B32_dpp]], 0, 0, implicit $mode, implicit $exec
+  ; GFX90A_GFX942-NEXT:   [[V_MOV_B32_dpp1:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_ADD_F32_e64_]], 274, 15, 15, 0, implicit $exec
+  ; GFX90A_GFX942-NEXT:   [[V_ADD_F32_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_]], 0, killed [[V_MOV_B32_dpp1]], 0, 0, implicit $mode, implicit $exec
+  ; GFX90A_GFX942-NEXT:   [[V_MOV_B32_dpp2:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_ADD_F32_e64_1]], 276, 15, 15, 0, implicit $exec
+  ; GFX90A_GFX942-NEXT:   [[V_ADD_F32_e64_2:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_1]], 0, killed [[V_MOV_B32_dpp2]], 0, 0, implicit $mode, implicit $exec
+  ; GFX90A_GFX942-NEXT:   [[V_MOV_B32_dpp3:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_ADD_F32_e64_2]], 280, 15, 15, 0, implicit $exec
+  ; GFX90A_GFX942-NEXT:   [[V_ADD_F32_e64_3:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_2]], 0, killed [[V_MOV_B32_dpp3]], 0, 0, implicit $mode, implicit $exec
+  ; GFX90A_GFX942-NEXT:   [[V_MOV_B32_dpp4:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_ADD_F32_e64_3]], 322, 10, 15, 0, implicit $exec
+  ; GFX90A_GFX942-NEXT:   [[V_ADD_F32_e64_4:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_3]], 0, killed [[V_MOV_B32_dpp4]], 0, 0, implicit $mode, implicit $exec
+  ; GFX90A_GFX942-NEXT:   [[V_MOV_B32_dpp5:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_ADD_F32_e64_4]], 323, 12, 15, 0, implicit $exec
+  ; GFX90A_GFX942-NEXT:   [[V_ADD_F32_e64_5:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_4]], 0, killed [[V_MOV_B32_dpp5]], 0, 0, implicit $mode, implicit $exec
+  ; GFX90A_GFX942-NEXT:   [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 63
+  ; GFX90A_GFX942-NEXT:   [[V_READLANE_B32_:%[0-9]+]]:sreg_32 = V_READLANE_B32 killed [[V_ADD_F32_e64_5]], killed [[S_MOV_B32_1]]
+  ; GFX90A_GFX942-NEXT:   early-clobber %1:sgpr_32 = STRICT_WWM killed [[V_READLANE_B32_]], implicit $exec
+  ; GFX90A_GFX942-NEXT:   [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 killed [[V_MBCNT_HI_U32_B32_e64_]], [[S_MOV_B32_]], implicit $exec
+  ; GFX90A_GFX942-NEXT:   [[SI_IF1:%[0-9]+]]:sreg_64 = SI_IF killed [[V_CMP_EQ_U32_e64_]], %bb.3, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+  ; GFX90A_GFX942-NEXT:   S_BRANCH %bb.2
+  ; GFX90A_GFX942-NEXT: {{  $}}
+  ; GFX90A_GFX942-NEXT: bb.2 (%ir-block.31):
+  ; GFX90A_GFX942-NEXT:   successors: %bb.3(0x80000000)
+  ; GFX90A_GFX942-NEXT: {{  $}}
+  ; GFX90A_GFX942-NEXT:   [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+  ; GFX90A_GFX942-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY %1
+  ; GFX90A_GFX942-NEXT:   GLOBAL_ATOMIC_ADD_F32_SADDR killed [[V_MOV_B32_e32_1]], [[COPY8]], [[COPY3]], 0, 0, implicit $exec :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr, addrspace 1)
+  ; GFX90A_GFX942-NEXT: {{  $}}
+  ; GFX90A_GFX942-NEXT: bb.3.Flow:
+  ; GFX90A_GFX942-NEXT:   successors: %bb.4(0x80000000)
+  ; GFX90A_GFX942-NEXT: {{  $}}
+  ; GFX90A_GFX942-NEXT:   SI_END_CF [[SI_IF1]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+  ; GFX90A_GFX942-NEXT: {{  $}}
+  ; GFX90A_GFX942-NEXT: bb.4 (%ir-block.33):
+  ; GFX90A_GFX942-NEXT:   SI_END_CF [[SI_IF]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+  ; GFX90A_GFX942-NEXT:   S_ENDPGM 0
   ;
   ; GFX11_GFX12-LABEL: name: global_atomic_fadd_f32_saddr_no_rtn_atomicrmw
   ; GFX11_GFX12: bb.0 (%ir-block.0):
diff --git llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f32-rtn.ll llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f32-rtn.ll
index 1fb34abb41a2..26e0ec9892c4 100644
--- llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f32-rtn.ll
+++ llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f32-rtn.ll
@@ -1,22 +1,22 @@
 ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs -stop-after=amdgpu-isel -amdgpu-atomic-optimizer-strategy=DPP < %s | FileCheck -check-prefixes=GFX90A_GFX940,GFX90A %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx940 -verify-machineinstrs -stop-after=amdgpu-isel -amdgpu-atomic-optimizer-strategy=DPP < %s | FileCheck -check-prefixes=GFX90A_GFX940,GFX940 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs -stop-after=amdgpu-isel -amdgpu-atomic-optimizer-strategy=DPP < %s | FileCheck -check-prefixes=GFX90A_GFX942,GFX90A %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -verify-machineinstrs -stop-after=amdgpu-isel -amdgpu-atomic-optimizer-strategy=DPP < %s | FileCheck -check-prefixes=GFX90A_GFX942,GFX942 %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs -stop-after=amdgpu-isel -amdgpu-atomic-optimizer-strategy=DPP < %s | FileCheck -check-prefix=GFX11 %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -stop-after=amdgpu-isel -amdgpu-atomic-optimizer-strategy=DPP < %s | FileCheck -check-prefix=GFX11 %s
 
 define amdgpu_ps float @global_atomic_fadd_f32_rtn_atomicrmw(ptr addrspace(1) %ptr, float %data) {
-  ; GFX90A_GFX940-LABEL: name: global_atomic_fadd_f32_rtn_atomicrmw
-  ; GFX90A_GFX940: bb.0 (%ir-block.0):
-  ; GFX90A_GFX940-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2
-  ; GFX90A_GFX940-NEXT: {{  $}}
-  ; GFX90A_GFX940-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY3:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]]
-  ; GFX90A_GFX940-NEXT:   [[GLOBAL_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_F32_RTN killed [[COPY3]], [[COPY]], 0, 1, implicit $exec :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr, addrspace 1)
-  ; GFX90A_GFX940-NEXT:   $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_F32_RTN]]
-  ; GFX90A_GFX940-NEXT:   SI_RETURN_TO_EPILOG $vgpr0
+  ; GFX90A_GFX942-LABEL: name: global_atomic_fadd_f32_rtn_atomicrmw
+  ; GFX90A_GFX942: bb.0 (%ir-block.0):
+  ; GFX90A_GFX942-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2
+  ; GFX90A_GFX942-NEXT: {{  $}}
+  ; GFX90A_GFX942-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY3:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]]
+  ; GFX90A_GFX942-NEXT:   [[GLOBAL_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_F32_RTN killed [[COPY3]], [[COPY]], 0, 1, implicit $exec :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr, addrspace 1)
+  ; GFX90A_GFX942-NEXT:   $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_F32_RTN]]
+  ; GFX90A_GFX942-NEXT:   SI_RETURN_TO_EPILOG $vgpr0
   ;
   ; GFX11-LABEL: name: global_atomic_fadd_f32_rtn_atomicrmw
   ; GFX11: bb.0 (%ir-block.0):
@@ -117,87 +117,87 @@ define amdgpu_ps float @global_atomic_fadd_f32_saddr_rtn_atomicrmw(ptr addrspace
   ; GFX90A-NEXT:   $vgpr0 = COPY [[PHI]]
   ; GFX90A-NEXT:   SI_RETURN_TO_EPILOG $vgpr0
   ;
-  ; GFX940-LABEL: name: global_atomic_fadd_f32_saddr_rtn_atomicrmw
-  ; GFX940: bb.0 (%ir-block.0):
-  ; GFX940-NEXT:   successors: %bb.1(0x40000000), %bb.3(0x40000000)
-  ; GFX940-NEXT:   liveins: $sgpr0, $sgpr1, $vgpr0
-  ; GFX940-NEXT: {{  $}}
-  ; GFX940-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GFX940-NEXT:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr1
-  ; GFX940-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr0
-  ; GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
-  ; GFX940-NEXT:   [[COPY3:%[0-9]+]]:sreg_64_xexec_xnull = COPY [[REG_SEQUENCE]]
-  ; GFX940-NEXT:   [[SI_PS_LIVE:%[0-9]+]]:sreg_64 = SI_PS_LIVE
-  ; GFX940-NEXT:   [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
-  ; GFX940-NEXT:   [[SI_IF:%[0-9]+]]:sreg_64 = SI_IF killed [[SI_PS_LIVE]], %bb.3, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
-  ; GFX940-NEXT:   S_BRANCH %bb.1
-  ; GFX940-NEXT: {{  $}}
-  ; GFX940-NEXT: bb.1 (%ir-block.5):
-  ; GFX940-NEXT:   successors: %bb.2(0x40000000), %bb.4(0x40000000)
-  ; GFX940-NEXT: {{  $}}
-  ; GFX940-NEXT:   [[COPY4:%[0-9]+]]:sreg_64 = COPY $exec
-  ; GFX940-NEXT:   [[COPY5:%[0-9]+]]:sreg_32 = COPY [[COPY4]].sub1
-  ; GFX940-NEXT:   [[COPY6:%[0-9]+]]:sreg_32 = COPY [[COPY4]].sub0
-  ; GFX940-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
-  ; GFX940-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
-  ; GFX940-NEXT:   [[V_MBCNT_LO_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_MBCNT_LO_U32_B32_e64 killed [[COPY6]], [[COPY7]], implicit $exec
-  ; GFX940-NEXT:   [[V_MBCNT_HI_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_MBCNT_HI_U32_B32_e64 killed [[COPY5]], killed [[V_MBCNT_LO_U32_B32_e64_]], implicit $exec
-  ; GFX940-NEXT:   [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -2147483648, implicit $exec
-  ; GFX940-NEXT:   [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
-  ; GFX940-NEXT:   [[V_SET_INACTIVE_B32_:%[0-9]+]]:vgpr_32 = V_SET_INACTIVE_B32 0, [[COPY]], 0, [[V_MOV_B32_e32_]], killed [[DEF1]], implicit $exec
-  ; GFX940-NEXT:   [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_SET_INACTIVE_B32_]], 273, 15, 15, 0, implicit $exec
-  ; GFX940-NEXT:   [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_SET_INACTIVE_B32_]], 0, killed [[V_MOV_B32_dpp]], 0, 0, implicit $mode, implicit $exec
-  ; GFX940-NEXT:   [[V_MOV_B32_dpp1:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_ADD_F32_e64_]], 274, 15, 15, 0, implicit $exec
-  ; GFX940-NEXT:   [[V_ADD_F32_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_]], 0, killed [[V_MOV_B32_dpp1]], 0, 0, implicit $mode, implicit $exec
-  ; GFX940-NEXT:   [[V_MOV_B32_dpp2:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_ADD_F32_e64_1]], 276, 15, 15, 0, implicit $exec
-  ; GFX940-NEXT:   [[V_ADD_F32_e64_2:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_1]], 0, killed [[V_MOV_B32_dpp2]], 0, 0, implicit $mode, implicit $exec
-  ; GFX940-NEXT:   [[V_MOV_B32_dpp3:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_ADD_F32_e64_2]], 280, 15, 15, 0, implicit $exec
-  ; GFX940-NEXT:   [[V_ADD_F32_e64_3:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_2]], 0, killed [[V_MOV_B32_dpp3]], 0, 0, implicit $mode, implicit $exec
-  ; GFX940-NEXT:   [[V_MOV_B32_dpp4:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_ADD_F32_e64_3]], 322, 10, 15, 0, implicit $exec
-  ; GFX940-NEXT:   [[V_ADD_F32_e64_4:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_3]], 0, killed [[V_MOV_B32_dpp4]], 0, 0, implicit $mode, implicit $exec
-  ; GFX940-NEXT:   [[V_MOV_B32_dpp5:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_ADD_F32_e64_4]], 323, 12, 15, 0, implicit $exec
-  ; GFX940-NEXT:   [[V_ADD_F32_e64_5:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_4]], 0, killed [[V_MOV_B32_dpp5]], 0, 0, implicit $mode, implicit $exec
-  ; GFX940-NEXT:   [[V_MOV_B32_dpp6:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_ADD_F32_e64_5]], 312, 15, 15, 0, implicit $exec
-  ; GFX940-NEXT:   [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 63
-  ; GFX940-NEXT:   [[V_READLANE_B32_:%[0-9]+]]:sreg_32 = V_READLANE_B32 [[V_ADD_F32_e64_5]], killed [[S_MOV_B32_1]]
-  ; GFX940-NEXT:   early-clobber %2:sgpr_32 = STRICT_WWM killed [[V_READLANE_B32_]], implicit $exec
-  ; GFX940-NEXT:   [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 killed [[V_MBCNT_HI_U32_B32_e64_]], [[S_MOV_B32_]], implicit $exec
-  ; GFX940-NEXT:   [[COPY8:%[0-9]+]]:vreg_1 = COPY [[V_CMP_EQ_U32_e64_]]
-  ; GFX940-NEXT:   [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
-  ; GFX940-NEXT:   [[SI_IF1:%[0-9]+]]:sreg_64 = SI_IF [[V_CMP_EQ_U32_e64_]], %bb.4, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
-  ; GFX940-NEXT:   S_BRANCH %bb.2
-  ; GFX940-NEXT: {{  $}}
-  ; GFX940-NEXT: bb.2 (%ir-block.32):
-  ; GFX940-NEXT:   successors: %bb.4(0x80000000)
-  ; GFX940-NEXT: {{  $}}
-  ; GFX940-NEXT:   [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
-  ; GFX940-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY %2
-  ; GFX940-NEXT:   [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_F32_SADDR_RTN killed [[V_MOV_B32_e32_1]], [[COPY9]], [[COPY3]], 0, 1, implicit $exec :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr, addrspace 1)
-  ; GFX940-NEXT:   S_BRANCH %bb.4
-  ; GFX940-NEXT: {{  $}}
-  ; GFX940-NEXT: bb.3.Flow:
-  ; GFX940-NEXT:   successors: %bb.5(0x80000000)
-  ; GFX940-NEXT: {{  $}}
-  ; GFX940-NEXT:   [[PHI:%[0-9]+]]:vgpr_32 = PHI [[DEF]], %bb.0, %8, %bb.4
-  ; GFX940-NEXT:   SI_END_CF [[SI_IF]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec
-  ; GFX940-NEXT:   S_BRANCH %bb.5
-  ; GFX940-NEXT: {{  $}}
-  ; GFX940-NEXT: bb.4 (%ir-block.35):
-  ; GFX940-NEXT:   successors: %bb.3(0x80000000)
-  ; GFX940-NEXT: {{  $}}
-  ; GFX940-NEXT:   [[PHI1:%[0-9]+]]:vgpr_32 = PHI [[DEF2]], %bb.1, [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN]], %bb.2
-  ; GFX940-NEXT:   SI_END_CF [[SI_IF1]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec
-  ; GFX940-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PHI1]], implicit $exec
-  ; GFX940-NEXT:   early-clobber %45:vgpr_32 = STRICT_WWM [[V_MOV_B32_dpp6]], implicit $exec
-  ; GFX940-NEXT:   [[V_ADD_F32_e64_6:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_READFIRSTLANE_B32_]], 0, killed %45, 0, 0, implicit $mode, implicit $exec
-  ; GFX940-NEXT:   [[COPY10:%[0-9]+]]:sreg_64_xexec = COPY [[COPY8]]
-  ; GFX940-NEXT:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[V_READFIRSTLANE_B32_]]
-  ; GFX940-NEXT:   [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, killed [[V_ADD_F32_e64_6]], 0, [[COPY11]], [[COPY10]], implicit $exec
-  ; GFX940-NEXT:   S_BRANCH %bb.3
-  ; GFX940-NEXT: {{  $}}
-  ; GFX940-NEXT: bb.5 (%ir-block.41):
-  ; GFX940-NEXT:   $vgpr0 = COPY [[PHI]]
-  ; GFX940-NEXT:   SI_RETURN_TO_EPILOG $vgpr0
+  ; GFX942-LABEL: name: global_atomic_fadd_f32_saddr_rtn_atomicrmw
+  ; GFX942: bb.0 (%ir-block.0):
+  ; GFX942-NEXT:   successors: %bb.1(0x40000000), %bb.3(0x40000000)
+  ; GFX942-NEXT:   liveins: $sgpr0, $sgpr1, $vgpr0
+  ; GFX942-NEXT: {{  $}}
+  ; GFX942-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GFX942-NEXT:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+  ; GFX942-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+  ; GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
+  ; GFX942-NEXT:   [[COPY3:%[0-9]+]]:sreg_64_xexec_xnull = COPY [[REG_SEQUENCE]]
+  ; GFX942-NEXT:   [[SI_PS_LIVE:%[0-9]+]]:sreg_64 = SI_PS_LIVE
+  ; GFX942-NEXT:   [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
+  ; GFX942-NEXT:   [[SI_IF:%[0-9]+]]:sreg_64 = SI_IF killed [[SI_PS_LIVE]], %bb.3, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+  ; GFX942-NEXT:   S_BRANCH %bb.1
+  ; GFX942-NEXT: {{  $}}
+  ; GFX942-NEXT: bb.1 (%ir-block.5):
+  ; GFX942-NEXT:   successors: %bb.2(0x40000000), %bb.4(0x40000000)
+  ; GFX942-NEXT: {{  $}}
+  ; GFX942-NEXT:   [[COPY4:%[0-9]+]]:sreg_64 = COPY $exec
+  ; GFX942-NEXT:   [[COPY5:%[0-9]+]]:sreg_32 = COPY [[COPY4]].sub1
+  ; GFX942-NEXT:   [[COPY6:%[0-9]+]]:sreg_32 = COPY [[COPY4]].sub0
+  ; GFX942-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+  ; GFX942-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
+  ; GFX942-NEXT:   [[V_MBCNT_LO_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_MBCNT_LO_U32_B32_e64 killed [[COPY6]], [[COPY7]], implicit $exec
+  ; GFX942-NEXT:   [[V_MBCNT_HI_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_MBCNT_HI_U32_B32_e64 killed [[COPY5]], killed [[V_MBCNT_LO_U32_B32_e64_]], implicit $exec
+  ; GFX942-NEXT:   [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -2147483648, implicit $exec
+  ; GFX942-NEXT:   [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
+  ; GFX942-NEXT:   [[V_SET_INACTIVE_B32_:%[0-9]+]]:vgpr_32 = V_SET_INACTIVE_B32 0, [[COPY]], 0, [[V_MOV_B32_e32_]], killed [[DEF1]], implicit $exec
+  ; GFX942-NEXT:   [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_SET_INACTIVE_B32_]], 273, 15, 15, 0, implicit $exec
+  ; GFX942-NEXT:   [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_SET_INACTIVE_B32_]], 0, killed [[V_MOV_B32_dpp]], 0, 0, implicit $mode, implicit $exec
+  ; GFX942-NEXT:   [[V_MOV_B32_dpp1:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_ADD_F32_e64_]], 274, 15, 15, 0, implicit $exec
+  ; GFX942-NEXT:   [[V_ADD_F32_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_]], 0, killed [[V_MOV_B32_dpp1]], 0, 0, implicit $mode, implicit $exec
+  ; GFX942-NEXT:   [[V_MOV_B32_dpp2:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_ADD_F32_e64_1]], 276, 15, 15, 0, implicit $exec
+  ; GFX942-NEXT:   [[V_ADD_F32_e64_2:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_1]], 0, killed [[V_MOV_B32_dpp2]], 0, 0, implicit $mode, implicit $exec
+  ; GFX942-NEXT:   [[V_MOV_B32_dpp3:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_ADD_F32_e64_2]], 280, 15, 15, 0, implicit $exec
+  ; GFX942-NEXT:   [[V_ADD_F32_e64_3:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_2]], 0, killed [[V_MOV_B32_dpp3]], 0, 0, implicit $mode, implicit $exec
+  ; GFX942-NEXT:   [[V_MOV_B32_dpp4:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_ADD_F32_e64_3]], 322, 10, 15, 0, implicit $exec
+  ; GFX942-NEXT:   [[V_ADD_F32_e64_4:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_3]], 0, killed [[V_MOV_B32_dpp4]], 0, 0, implicit $mode, implicit $exec
+  ; GFX942-NEXT:   [[V_MOV_B32_dpp5:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_ADD_F32_e64_4]], 323, 12, 15, 0, implicit $exec
+  ; GFX942-NEXT:   [[V_ADD_F32_e64_5:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_4]], 0, killed [[V_MOV_B32_dpp5]], 0, 0, implicit $mode, implicit $exec
+  ; GFX942-NEXT:   [[V_MOV_B32_dpp6:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_ADD_F32_e64_5]], 312, 15, 15, 0, implicit $exec
+  ; GFX942-NEXT:   [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 63
+  ; GFX942-NEXT:   [[V_READLANE_B32_:%[0-9]+]]:sreg_32 = V_READLANE_B32 [[V_ADD_F32_e64_5]], killed [[S_MOV_B32_1]]
+  ; GFX942-NEXT:   early-clobber %2:sgpr_32 = STRICT_WWM killed [[V_READLANE_B32_]], implicit $exec
+  ; GFX942-NEXT:   [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 killed [[V_MBCNT_HI_U32_B32_e64_]], [[S_MOV_B32_]], implicit $exec
+  ; GFX942-NEXT:   [[COPY8:%[0-9]+]]:vreg_1 = COPY [[V_CMP_EQ_U32_e64_]]
+  ; GFX942-NEXT:   [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
+  ; GFX942-NEXT:   [[SI_IF1:%[0-9]+]]:sreg_64 = SI_IF [[V_CMP_EQ_U32_e64_]], %bb.4, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+  ; GFX942-NEXT:   S_BRANCH %bb.2
+  ; GFX942-NEXT: {{  $}}
+  ; GFX942-NEXT: bb.2 (%ir-block.32):
+  ; GFX942-NEXT:   successors: %bb.4(0x80000000)
+  ; GFX942-NEXT: {{  $}}
+  ; GFX942-NEXT:   [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+  ; GFX942-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY %2
+  ; GFX942-NEXT:   [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_F32_SADDR_RTN killed [[V_MOV_B32_e32_1]], [[COPY9]], [[COPY3]], 0, 1, implicit $exec :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr, addrspace 1)
+  ; GFX942-NEXT:   S_BRANCH %bb.4
+  ; GFX942-NEXT: {{  $}}
+  ; GFX942-NEXT: bb.3.Flow:
+  ; GFX942-NEXT:   successors: %bb.5(0x80000000)
+  ; GFX942-NEXT: {{  $}}
+  ; GFX942-NEXT:   [[PHI:%[0-9]+]]:vgpr_32 = PHI [[DEF]], %bb.0, %8, %bb.4
+  ; GFX942-NEXT:   SI_END_CF [[SI_IF]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+  ; GFX942-NEXT:   S_BRANCH %bb.5
+  ; GFX942-NEXT: {{  $}}
+  ; GFX942-NEXT: bb.4 (%ir-block.35):
+  ; GFX942-NEXT:   successors: %bb.3(0x80000000)
+  ; GFX942-NEXT: {{  $}}
+  ; GFX942-NEXT:   [[PHI1:%[0-9]+]]:vgpr_32 = PHI [[DEF2]], %bb.1, [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN]], %bb.2
+  ; GFX942-NEXT:   SI_END_CF [[SI_IF1]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+  ; GFX942-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PHI1]], implicit $exec
+  ; GFX942-NEXT:   early-clobber %45:vgpr_32 = STRICT_WWM [[V_MOV_B32_dpp6]], implicit $exec
+  ; GFX942-NEXT:   [[V_ADD_F32_e64_6:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_READFIRSTLANE_B32_]], 0, killed %45, 0, 0, implicit $mode, implicit $exec
+  ; GFX942-NEXT:   [[COPY10:%[0-9]+]]:sreg_64_xexec = COPY [[COPY8]]
+  ; GFX942-NEXT:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[V_READFIRSTLANE_B32_]]
+  ; GFX942-NEXT:   [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, killed [[V_ADD_F32_e64_6]], 0, [[COPY11]], [[COPY10]], implicit $exec
+  ; GFX942-NEXT:   S_BRANCH %bb.3
+  ; GFX942-NEXT: {{  $}}
+  ; GFX942-NEXT: bb.5 (%ir-block.41):
+  ; GFX942-NEXT:   $vgpr0 = COPY [[PHI]]
+  ; GFX942-NEXT:   SI_RETURN_TO_EPILOG $vgpr0
   ;
   ; GFX11-LABEL: name: global_atomic_fadd_f32_saddr_rtn_atomicrmw
   ; GFX11: bb.0 (%ir-block.0):
diff --git llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f64.ll llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f64.ll
index ba94a53dff03..fa4e7f87853d 100644
--- llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f64.ll
+++ llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f64.ll
@@ -1,22 +1,22 @@
 ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -stop-after=amdgpu-isel -amdgpu-atomic-optimizer-strategy=None < %s | FileCheck -check-prefixes=GFX90A_GFX940,GFX90A %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx940 -stop-after=amdgpu-isel -amdgpu-atomic-optimizer-strategy=None < %s | FileCheck -check-prefixes=GFX90A_GFX940,GFX940 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -stop-after=amdgpu-isel -amdgpu-atomic-optimizer-strategy=None < %s | FileCheck -check-prefixes=GFX90A_GFX942,GFX90A %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -stop-after=amdgpu-isel -amdgpu-atomic-optimizer-strategy=None < %s | FileCheck -check-prefixes=GFX90A_GFX942,GFX942 %s
 
 define amdgpu_ps void @global_atomic_fadd_f64_no_rtn_atomicrmw(ptr addrspace(1) %ptr, double %data) {
-  ; GFX90A_GFX940-LABEL: name: global_atomic_fadd_f64_no_rtn_atomicrmw
-  ; GFX90A_GFX940: bb.0 (%ir-block.0):
-  ; GFX90A_GFX940-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
-  ; GFX90A_GFX940-NEXT: {{  $}}
-  ; GFX90A_GFX940-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr3
-  ; GFX90A_GFX940-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY]], %subreg.sub1
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY4:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]]
-  ; GFX90A_GFX940-NEXT:   [[COPY5:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]]
-  ; GFX90A_GFX940-NEXT:   GLOBAL_ATOMIC_ADD_F64 killed [[COPY4]], killed [[COPY5]], 0, 0, implicit $exec :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr, addrspace 1)
-  ; GFX90A_GFX940-NEXT:   S_ENDPGM 0
+  ; GFX90A_GFX942-LABEL: name: global_atomic_fadd_f64_no_rtn_atomicrmw
+  ; GFX90A_GFX942: bb.0 (%ir-block.0):
+  ; GFX90A_GFX942-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
+  ; GFX90A_GFX942-NEXT: {{  $}}
+  ; GFX90A_GFX942-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+  ; GFX90A_GFX942-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY]], %subreg.sub1
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY4:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]]
+  ; GFX90A_GFX942-NEXT:   [[COPY5:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]]
+  ; GFX90A_GFX942-NEXT:   GLOBAL_ATOMIC_ADD_F64 killed [[COPY4]], killed [[COPY5]], 0, 0, implicit $exec :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr, addrspace 1)
+  ; GFX90A_GFX942-NEXT:   S_ENDPGM 0
   %ret = atomicrmw fadd ptr addrspace(1) %ptr, double %data syncscope("wavefront") monotonic, !amdgpu.no.fine.grained.memory !0
   ret void
 }
@@ -67,66 +67,66 @@ define amdgpu_ps double @global_atomic_fadd_f64_rtn_atomicrmw(ptr addrspace(1) %
   ; GFX90A-NEXT:   $sgpr1 = COPY [[COPY13]]
   ; GFX90A-NEXT:   SI_RETURN_TO_EPILOG $sgpr0, $sgpr1
   ;
-  ; GFX940-LABEL: name: global_atomic_fadd_f64_rtn_atomicrmw
-  ; GFX940: bb.0 (%ir-block.0):
-  ; GFX940-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
-  ; GFX940-NEXT: {{  $}}
-  ; GFX940-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr3
-  ; GFX940-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
-  ; GFX940-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-  ; GFX940-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY]], %subreg.sub1
-  ; GFX940-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
-  ; GFX940-NEXT:   [[COPY4:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]]
-  ; GFX940-NEXT:   [[COPY5:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]]
-  ; GFX940-NEXT:   [[GLOBAL_ATOMIC_ADD_F64_RTN:%[0-9]+]]:vreg_64_align2 = GLOBAL_ATOMIC_ADD_F64_RTN killed [[COPY4]], killed [[COPY5]], 0, 1, implicit $exec :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr, addrspace 1)
-  ; GFX940-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_ATOMIC_ADD_F64_RTN]].sub0
-  ; GFX940-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_ATOMIC_ADD_F64_RTN]].sub1
-  ; GFX940-NEXT:   $sgpr0 = COPY [[COPY6]]
-  ; GFX940-NEXT:   $sgpr1 = COPY [[COPY7]]
-  ; GFX940-NEXT:   SI_RETURN_TO_EPILOG $sgpr0, $sgpr1
+  ; GFX942-LABEL: name: global_atomic_fadd_f64_rtn_atomicrmw
+  ; GFX942: bb.0 (%ir-block.0):
+  ; GFX942-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
+  ; GFX942-NEXT: {{  $}}
+  ; GFX942-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+  ; GFX942-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+  ; GFX942-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+  ; GFX942-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY]], %subreg.sub1
+  ; GFX942-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
+  ; GFX942-NEXT:   [[COPY4:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]]
+  ; GFX942-NEXT:   [[COPY5:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]]
+  ; GFX942-NEXT:   [[GLOBAL_ATOMIC_ADD_F64_RTN:%[0-9]+]]:vreg_64_align2 = GLOBAL_ATOMIC_ADD_F64_RTN killed [[COPY4]], killed [[COPY5]], 0, 1, implicit $exec :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr, addrspace 1)
+  ; GFX942-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_ATOMIC_ADD_F64_RTN]].sub0
+  ; GFX942-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_ATOMIC_ADD_F64_RTN]].sub1
+  ; GFX942-NEXT:   $sgpr0 = COPY [[COPY6]]
+  ; GFX942-NEXT:   $sgpr1 = COPY [[COPY7]]
+  ; GFX942-NEXT:   SI_RETURN_TO_EPILOG $sgpr0, $sgpr1
   %ret = atomicrmw fadd ptr addrspace(1) %ptr, double %data syncscope("wavefront") monotonic
   ret double %ret
 }
 
 define amdgpu_ps void @global_atomic_fadd_f64_saddr_no_rtn_atomicrmw(ptr addrspace(1) inreg %ptr, double %data) {
-  ; GFX90A_GFX940-LABEL: name: global_atomic_fadd_f64_saddr_no_rtn_atomicrmw
-  ; GFX90A_GFX940: bb.0 (%ir-block.0):
-  ; GFX90A_GFX940-NEXT:   liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1
-  ; GFX90A_GFX940-NEXT: {{  $}}
-  ; GFX90A_GFX940-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr0
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY]], %subreg.sub1
-  ; GFX90A_GFX940-NEXT:   [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
-  ; GFX90A_GFX940-NEXT:   [[COPY4:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]]
-  ; GFX90A_GFX940-NEXT:   GLOBAL_ATOMIC_ADD_F64_SADDR killed [[V_MOV_B32_e32_]], killed [[COPY4]], killed [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr, addrspace 1)
-  ; GFX90A_GFX940-NEXT:   S_ENDPGM 0
+  ; GFX90A_GFX942-LABEL: name: global_atomic_fadd_f64_saddr_no_rtn_atomicrmw
+  ; GFX90A_GFX942: bb.0 (%ir-block.0):
+  ; GFX90A_GFX942-NEXT:   liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1
+  ; GFX90A_GFX942-NEXT: {{  $}}
+  ; GFX90A_GFX942-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY]], %subreg.sub1
+  ; GFX90A_GFX942-NEXT:   [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+  ; GFX90A_GFX942-NEXT:   [[COPY4:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]]
+  ; GFX90A_GFX942-NEXT:   GLOBAL_ATOMIC_ADD_F64_SADDR killed [[V_MOV_B32_e32_]], killed [[COPY4]], killed [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr, addrspace 1)
+  ; GFX90A_GFX942-NEXT:   S_ENDPGM 0
   %ret = atomicrmw fadd ptr addrspace(1) %ptr, double %data syncscope("wavefront") monotonic, !amdgpu.no.fine.grained.memory !0
   ret void
 }
 
 define amdgpu_ps double @global_atomic_fadd_f64_saddr_rtn_atomicrmw(ptr addrspace(1) inreg %ptr, double %data) {
-  ; GFX90A_GFX940-LABEL: name: global_atomic_fadd_f64_saddr_rtn_atomicrmw
-  ; GFX90A_GFX940: bb.0 (%ir-block.0):
-  ; GFX90A_GFX940-NEXT:   liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1
-  ; GFX90A_GFX940-NEXT: {{  $}}
-  ; GFX90A_GFX940-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr0
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY]], %subreg.sub1
-  ; GFX90A_GFX940-NEXT:   [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
-  ; GFX90A_GFX940-NEXT:   [[COPY4:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]]
-  ; GFX90A_GFX940-NEXT:   [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN:%[0-9]+]]:vreg_64_align2 = GLOBAL_ATOMIC_ADD_F64_SADDR_RTN killed [[V_MOV_B32_e32_]], killed [[COPY4]], killed [[REG_SEQUENCE]], 0, 1, implicit $exec :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr, addrspace 1)
-  ; GFX90A_GFX940-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN]].sub0
-  ; GFX90A_GFX940-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN]].sub1
-  ; GFX90A_GFX940-NEXT:   $sgpr0 = COPY [[COPY5]]
-  ; GFX90A_GFX940-NEXT:   $sgpr1 = COPY [[COPY6]]
-  ; GFX90A_GFX940-NEXT:   SI_RETURN_TO_EPILOG $sgpr0, $sgpr1
+  ; GFX90A_GFX942-LABEL: name: global_atomic_fadd_f64_saddr_rtn_atomicrmw
+  ; GFX90A_GFX942: bb.0 (%ir-block.0):
+  ; GFX90A_GFX942-NEXT:   liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1
+  ; GFX90A_GFX942-NEXT: {{  $}}
+  ; GFX90A_GFX942-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY]], %subreg.sub1
+  ; GFX90A_GFX942-NEXT:   [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+  ; GFX90A_GFX942-NEXT:   [[COPY4:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]]
+  ; GFX90A_GFX942-NEXT:   [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN:%[0-9]+]]:vreg_64_align2 = GLOBAL_ATOMIC_ADD_F64_SADDR_RTN killed [[V_MOV_B32_e32_]], killed [[COPY4]], killed [[REG_SEQUENCE]], 0, 1, implicit $exec :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr, addrspace 1)
+  ; GFX90A_GFX942-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN]].sub0
+  ; GFX90A_GFX942-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN]].sub1
+  ; GFX90A_GFX942-NEXT:   $sgpr0 = COPY [[COPY5]]
+  ; GFX90A_GFX942-NEXT:   $sgpr1 = COPY [[COPY6]]
+  ; GFX90A_GFX942-NEXT:   SI_RETURN_TO_EPILOG $sgpr0, $sgpr1
   %ret = atomicrmw fadd ptr addrspace(1) %ptr, double %data syncscope("wavefront") monotonic, !amdgpu.no.fine.grained.memory !0
   ret double %ret
 }
diff --git llvm/test/CodeGen/AMDGPU/global-atomic-fadd.v2f16-no-rtn.ll llvm/test/CodeGen/AMDGPU/global-atomic-fadd.v2f16-no-rtn.ll
index 02e425e6d10a..82cec179e72d 100644
--- llvm/test/CodeGen/AMDGPU/global-atomic-fadd.v2f16-no-rtn.ll
+++ llvm/test/CodeGen/AMDGPU/global-atomic-fadd.v2f16-no-rtn.ll
@@ -1,10 +1,10 @@
 ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx908 -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX908 %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX940 %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx940 -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX940 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX942 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX942 %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx908 -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX908 %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX940 %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx940 -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX940 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX942 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX942 %s
 
 
 define amdgpu_ps void @global_atomic_fadd_v2f16_no_rtn(ptr addrspace(1) %ptr, <2 x half> %data) {
@@ -20,17 +20,17 @@ define amdgpu_ps void @global_atomic_fadd_v2f16_no_rtn(ptr addrspace(1) %ptr, <2
   ; GFX908-NEXT:   GLOBAL_ATOMIC_PK_ADD_F16 killed [[COPY3]], [[COPY]], 0, 0, implicit $exec :: (load store syncscope("agent") seq_cst (s32) on %ir.ptr, addrspace 1)
   ; GFX908-NEXT:   S_ENDPGM 0
   ;
-  ; GFX90A_GFX940-LABEL: name: global_atomic_fadd_v2f16_no_rtn
-  ; GFX90A_GFX940: bb.0 (%ir-block.0):
-  ; GFX90A_GFX940-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2
-  ; GFX90A_GFX940-NEXT: {{  $}}
-  ; GFX90A_GFX940-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY3:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]]
-  ; GFX90A_GFX940-NEXT:   GLOBAL_ATOMIC_PK_ADD_F16 killed [[COPY3]], [[COPY]], 0, 0, implicit $exec :: (load store syncscope("agent") seq_cst (s32) on %ir.ptr, addrspace 1)
-  ; GFX90A_GFX940-NEXT:   S_ENDPGM 0
+  ; GFX90A_GFX942-LABEL: name: global_atomic_fadd_v2f16_no_rtn
+  ; GFX90A_GFX942: bb.0 (%ir-block.0):
+  ; GFX90A_GFX942-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2
+  ; GFX90A_GFX942-NEXT: {{  $}}
+  ; GFX90A_GFX942-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY3:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]]
+  ; GFX90A_GFX942-NEXT:   GLOBAL_ATOMIC_PK_ADD_F16 killed [[COPY3]], [[COPY]], 0, 0, implicit $exec :: (load store syncscope("agent") seq_cst (s32) on %ir.ptr, addrspace 1)
+  ; GFX90A_GFX942-NEXT:   S_ENDPGM 0
   %ret = atomicrmw fadd ptr addrspace(1) %ptr, <2 x half> %data syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0
   ret void
 }
@@ -48,17 +48,17 @@ define amdgpu_ps void @global_atomic_fadd_v2f16_saddr_no_rtn(ptr addrspace(1) in
   ; GFX908-NEXT:   GLOBAL_ATOMIC_PK_ADD_F16_SADDR killed [[V_MOV_B32_e32_]], [[COPY]], killed [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load store syncscope("agent") seq_cst (s32) on %ir.ptr, addrspace 1)
   ; GFX908-NEXT:   S_ENDPGM 0
   ;
-  ; GFX90A_GFX940-LABEL: name: global_atomic_fadd_v2f16_saddr_no_rtn
-  ; GFX90A_GFX940: bb.0 (%ir-block.0):
-  ; GFX90A_GFX940-NEXT:   liveins: $sgpr0, $sgpr1, $vgpr0
-  ; GFX90A_GFX940-NEXT: {{  $}}
-  ; GFX90A_GFX940-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr0
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
-  ; GFX90A_GFX940-NEXT:   [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
-  ; GFX90A_GFX940-NEXT:   GLOBAL_ATOMIC_PK_ADD_F16_SADDR killed [[V_MOV_B32_e32_]], [[COPY]], killed [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load store syncscope("agent") seq_cst (s32) on %ir.ptr, addrspace 1)
-  ; GFX90A_GFX940-NEXT:   S_ENDPGM 0
+  ; GFX90A_GFX942-LABEL: name: global_atomic_fadd_v2f16_saddr_no_rtn
+  ; GFX90A_GFX942: bb.0 (%ir-block.0):
+  ; GFX90A_GFX942-NEXT:   liveins: $sgpr0, $sgpr1, $vgpr0
+  ; GFX90A_GFX942-NEXT: {{  $}}
+  ; GFX90A_GFX942-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
+  ; GFX90A_GFX942-NEXT:   [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+  ; GFX90A_GFX942-NEXT:   GLOBAL_ATOMIC_PK_ADD_F16_SADDR killed [[V_MOV_B32_e32_]], [[COPY]], killed [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load store syncscope("agent") seq_cst (s32) on %ir.ptr, addrspace 1)
+  ; GFX90A_GFX942-NEXT:   S_ENDPGM 0
   %ret = atomicrmw fadd ptr addrspace(1) %ptr, <2 x half> %data syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0
   ret void
 }
@@ -76,17 +76,17 @@ define amdgpu_ps void @global_atomic_fadd_v2f16_no_rtn_flat(ptr addrspace(1) %pt
   ; GFX908-NEXT:   GLOBAL_ATOMIC_PK_ADD_F16 killed [[COPY3]], [[COPY]], 0, 0, implicit $exec :: (load store syncscope("agent") seq_cst (s32) on %ir.ptr, addrspace 1)
   ; GFX908-NEXT:   S_ENDPGM 0
   ;
-  ; GFX90A_GFX940-LABEL: name: global_atomic_fadd_v2f16_no_rtn_flat
-  ; GFX90A_GFX940: bb.0 (%ir-block.0):
-  ; GFX90A_GFX940-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2
-  ; GFX90A_GFX940-NEXT: {{  $}}
-  ; GFX90A_GFX940-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY3:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]]
-  ; GFX90A_GFX940-NEXT:   GLOBAL_ATOMIC_PK_ADD_F16 killed [[COPY3]], [[COPY]], 0, 0, implicit $exec :: (load store syncscope("agent") seq_cst (s32) on %ir.ptr, addrspace 1)
-  ; GFX90A_GFX940-NEXT:   S_ENDPGM 0
+  ; GFX90A_GFX942-LABEL: name: global_atomic_fadd_v2f16_no_rtn_flat
+  ; GFX90A_GFX942: bb.0 (%ir-block.0):
+  ; GFX90A_GFX942-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2
+  ; GFX90A_GFX942-NEXT: {{  $}}
+  ; GFX90A_GFX942-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY3:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]]
+  ; GFX90A_GFX942-NEXT:   GLOBAL_ATOMIC_PK_ADD_F16 killed [[COPY3]], [[COPY]], 0, 0, implicit $exec :: (load store syncscope("agent") seq_cst (s32) on %ir.ptr, addrspace 1)
+  ; GFX90A_GFX942-NEXT:   S_ENDPGM 0
   %ret = atomicrmw fadd ptr addrspace(1) %ptr, <2 x half> %data syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0
   ret void
 }
@@ -104,17 +104,17 @@ define amdgpu_ps void @global_atomic_fadd_v2f16_saddr_no_rtn_flat(ptr addrspace(
   ; GFX908-NEXT:   GLOBAL_ATOMIC_PK_ADD_F16_SADDR killed [[V_MOV_B32_e32_]], [[COPY]], killed [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load store syncscope("agent") seq_cst (s32) on %ir.ptr, addrspace 1)
   ; GFX908-NEXT:   S_ENDPGM 0
   ;
-  ; GFX90A_GFX940-LABEL: name: global_atomic_fadd_v2f16_saddr_no_rtn_flat
-  ; GFX90A_GFX940: bb.0 (%ir-block.0):
-  ; GFX90A_GFX940-NEXT:   liveins: $sgpr0, $sgpr1, $vgpr0
-  ; GFX90A_GFX940-NEXT: {{  $}}
-  ; GFX90A_GFX940-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr0
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
-  ; GFX90A_GFX940-NEXT:   [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
-  ; GFX90A_GFX940-NEXT:   GLOBAL_ATOMIC_PK_ADD_F16_SADDR killed [[V_MOV_B32_e32_]], [[COPY]], killed [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load store syncscope("agent") seq_cst (s32) on %ir.ptr, addrspace 1)
-  ; GFX90A_GFX940-NEXT:   S_ENDPGM 0
+  ; GFX90A_GFX942-LABEL: name: global_atomic_fadd_v2f16_saddr_no_rtn_flat
+  ; GFX90A_GFX942: bb.0 (%ir-block.0):
+  ; GFX90A_GFX942-NEXT:   liveins: $sgpr0, $sgpr1, $vgpr0
+  ; GFX90A_GFX942-NEXT: {{  $}}
+  ; GFX90A_GFX942-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
+  ; GFX90A_GFX942-NEXT:   [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+  ; GFX90A_GFX942-NEXT:   GLOBAL_ATOMIC_PK_ADD_F16_SADDR killed [[V_MOV_B32_e32_]], [[COPY]], killed [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load store syncscope("agent") seq_cst (s32) on %ir.ptr, addrspace 1)
+  ; GFX90A_GFX942-NEXT:   S_ENDPGM 0
   %ret = atomicrmw fadd ptr addrspace(1) %ptr, <2 x half> %data syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0
   ret void
 }
diff --git llvm/test/CodeGen/AMDGPU/global-atomic-fadd.v2f16-rtn.ll llvm/test/CodeGen/AMDGPU/global-atomic-fadd.v2f16-rtn.ll
index 794a52b6900e..b9e833a7105d 100644
--- llvm/test/CodeGen/AMDGPU/global-atomic-fadd.v2f16-rtn.ll
+++ llvm/test/CodeGen/AMDGPU/global-atomic-fadd.v2f16-rtn.ll
@@ -1,73 +1,73 @@
 ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX940 %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx940 -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX940 %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX940 %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx940 -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX940 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX942 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX942 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX942 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX942 %s
 
 define amdgpu_ps <2 x half> @global_atomic_fadd_v2f16_rtn(ptr addrspace(1) %ptr, <2 x half> %data) {
-  ; GFX90A_GFX940-LABEL: name: global_atomic_fadd_v2f16_rtn
-  ; GFX90A_GFX940: bb.0 (%ir-block.0):
-  ; GFX90A_GFX940-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2
-  ; GFX90A_GFX940-NEXT: {{  $}}
-  ; GFX90A_GFX940-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY3:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]]
-  ; GFX90A_GFX940-NEXT:   [[GLOBAL_ATOMIC_PK_ADD_F16_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_PK_ADD_F16_RTN killed [[COPY3]], [[COPY]], 0, 1, implicit $exec :: (load store syncscope("agent") seq_cst (s32) on %ir.ptr, addrspace 1)
-  ; GFX90A_GFX940-NEXT:   $vgpr0 = COPY [[GLOBAL_ATOMIC_PK_ADD_F16_RTN]]
-  ; GFX90A_GFX940-NEXT:   SI_RETURN_TO_EPILOG $vgpr0
+  ; GFX90A_GFX942-LABEL: name: global_atomic_fadd_v2f16_rtn
+  ; GFX90A_GFX942: bb.0 (%ir-block.0):
+  ; GFX90A_GFX942-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2
+  ; GFX90A_GFX942-NEXT: {{  $}}
+  ; GFX90A_GFX942-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY3:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]]
+  ; GFX90A_GFX942-NEXT:   [[GLOBAL_ATOMIC_PK_ADD_F16_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_PK_ADD_F16_RTN killed [[COPY3]], [[COPY]], 0, 1, implicit $exec :: (load store syncscope("agent") seq_cst (s32) on %ir.ptr, addrspace 1)
+  ; GFX90A_GFX942-NEXT:   $vgpr0 = COPY [[GLOBAL_ATOMIC_PK_ADD_F16_RTN]]
+  ; GFX90A_GFX942-NEXT:   SI_RETURN_TO_EPILOG $vgpr0
   %ret = atomicrmw fadd ptr addrspace(1) %ptr, <2 x half> %data syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0
   ret <2 x half> %ret
 }
 
 define amdgpu_ps <2 x half> @global_atomic_fadd_v2f16_saddr_rtn(ptr addrspace(1) inreg %ptr, <2 x half> %data) {
-  ; GFX90A_GFX940-LABEL: name: global_atomic_fadd_v2f16_saddr_rtn
-  ; GFX90A_GFX940: bb.0 (%ir-block.0):
-  ; GFX90A_GFX940-NEXT:   liveins: $sgpr0, $sgpr1, $vgpr0
-  ; GFX90A_GFX940-NEXT: {{  $}}
-  ; GFX90A_GFX940-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr0
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
-  ; GFX90A_GFX940-NEXT:   [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
-  ; GFX90A_GFX940-NEXT:   [[GLOBAL_ATOMIC_PK_ADD_F16_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_PK_ADD_F16_SADDR_RTN killed [[V_MOV_B32_e32_]], [[COPY]], killed [[REG_SEQUENCE]], 0, 1, implicit $exec :: (load store syncscope("agent") seq_cst (s32) on %ir.ptr, addrspace 1)
-  ; GFX90A_GFX940-NEXT:   $vgpr0 = COPY [[GLOBAL_ATOMIC_PK_ADD_F16_SADDR_RTN]]
-  ; GFX90A_GFX940-NEXT:   SI_RETURN_TO_EPILOG $vgpr0
+  ; GFX90A_GFX942-LABEL: name: global_atomic_fadd_v2f16_saddr_rtn
+  ; GFX90A_GFX942: bb.0 (%ir-block.0):
+  ; GFX90A_GFX942-NEXT:   liveins: $sgpr0, $sgpr1, $vgpr0
+  ; GFX90A_GFX942-NEXT: {{  $}}
+  ; GFX90A_GFX942-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
+  ; GFX90A_GFX942-NEXT:   [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+  ; GFX90A_GFX942-NEXT:   [[GLOBAL_ATOMIC_PK_ADD_F16_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_PK_ADD_F16_SADDR_RTN killed [[V_MOV_B32_e32_]], [[COPY]], killed [[REG_SEQUENCE]], 0, 1, implicit $exec :: (load store syncscope("agent") seq_cst (s32) on %ir.ptr, addrspace 1)
+  ; GFX90A_GFX942-NEXT:   $vgpr0 = COPY [[GLOBAL_ATOMIC_PK_ADD_F16_SADDR_RTN]]
+  ; GFX90A_GFX942-NEXT:   SI_RETURN_TO_EPILOG $vgpr0
   %ret = atomicrmw fadd ptr addrspace(1) %ptr, <2 x half> %data syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0
   ret <2 x half> %ret
 }
 
 define amdgpu_ps <2 x half> @global_atomic_fadd_v2f16_rtn_flat(ptr addrspace(1) %ptr, <2 x half> %data) {
-  ; GFX90A_GFX940-LABEL: name: global_atomic_fadd_v2f16_rtn_flat
-  ; GFX90A_GFX940: bb.0 (%ir-block.0):
-  ; GFX90A_GFX940-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2
-  ; GFX90A_GFX940-NEXT: {{  $}}
-  ; GFX90A_GFX940-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2
-  ; GFX90A_GFX940-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
-  ; GFX90A_GFX940-NEXT:   [[COPY3:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]]
-  ; GFX90A_GFX940-NEXT:   [[GLOBAL_ATOMIC_PK_ADD_F16_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_PK_ADD_F16_RTN killed [[COPY3]], [[COPY]], 0, 1, implicit $exec :: (load store syncscope("agent") seq_cst (s32) on %ir.ptr, addrspace 1)
-  ; GFX90A_GFX940-NEXT:   $vgpr0 = COPY [[GLOBAL_ATOMIC_PK_ADD_F16_RTN]]
-  ; GFX90A_GFX940-NEXT:   SI_RETURN_TO_EPILOG $vgpr0
+  ; GFX90A_GFX942-LABEL: name: global_atomic_fadd_v2f16_rtn_flat
+  ; GFX90A_GFX942: bb.0 (%ir-block.0):
+  ; GFX90A_GFX942-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2
+  ; GFX90A_GFX942-NEXT: {{  $}}
+  ; GFX90A_GFX942-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+  ; GFX90A_GFX942-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
+  ; GFX90A_GFX942-NEXT:   [[COPY3:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]]
+  ; GFX90A_GFX942-NEXT:   [[GLOBAL_ATOMIC_PK_ADD_F16_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_PK_ADD_F16_RTN killed [[COPY3]], [[COPY]], 0, 1, implicit $exec :: (load store syncscope("agent") seq_cst (s32) on %ir.ptr, addrspace 1)
+  ; GFX90A_GFX942-NEXT:   $vgpr0 = COPY [[GLOBAL_ATOMIC_PK_ADD_F16_RTN]]
+  ; GFX90A_GFX942-NEXT:   SI_RETURN_TO_EPILOG $vgpr0
   %ret = atomicrmw fadd ptr addrspace(1) %ptr, <2 x half> %data syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0
   ret <2 x half> %ret
 }
 
 define amdgpu_ps <2 x half> @global_atomic_fadd_v2f16_saddr_rtn_flat(ptr addrspace(1) inreg %ptr, <2 x half> %data) {
-  ; GFX90A_GFX940-LABEL: name: global_atomic_fadd_v2f16_saddr_rtn_flat
-  ; GFX90A_GFX940: bb.0 (%ir-block.0):
-  ; GFX90A_GFX940-NEXT:   liveins: $sgpr0, $sgpr1, $vgpr0
-  ; GFX90A_GFX940-NEXT: {{  $}}
-  ; GFX90A_GFX940-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GFX90A_GFX940-NEXT:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr1
-  ; GFX90A_GFX940-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr0
-  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
-  ; GFX90A_GFX940-NEXT:   [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
-  ; GFX90A_GFX940-NEXT:   [[GLOBAL_ATOMIC_PK_ADD_F16_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_PK_ADD_F16_SADDR_RTN killed [[V_MOV_B32_e32_]], [[COPY]], killed [[REG_SEQUENCE]], 0, 1, implicit $exec :: (load store syncscope("agent") seq_cst (s32) on %ir.ptr, addrspace 1)
-  ; GFX90A_GFX940-NEXT:   $vgpr0 = COPY [[GLOBAL_ATOMIC_PK_ADD_F16_SADDR_RTN]]
-  ; GFX90A_GFX940-NEXT:   SI_RETURN_TO_EPILOG $vgpr0
+  ; GFX90A_GFX942-LABEL: name: global_atomic_fadd_v2f16_saddr_rtn_flat
+  ; GFX90A_GFX942: bb.0 (%ir-block.0):
+  ; GFX90A_GFX942-NEXT:   liveins: $sgpr0, $sgpr1, $vgpr0
+  ; GFX90A_GFX942-NEXT: {{  $}}
+  ; GFX90A_GFX942-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GFX90A_GFX942-NEXT:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+  ; GFX90A_GFX942-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+  ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
+  ; GFX90A_GFX942-NEXT:   [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+  ; GFX90A_GFX942-NEXT:   [[GLOBAL_ATOMIC_PK_ADD_F16_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_PK_ADD_F16_SADDR_RTN killed [[V_MOV_B32_e32_]], [[COPY]], killed [[REG_SEQUENCE]], 0, 1, implicit $exec :: (load store syncscope("agent") seq_cst (s32) on %ir.ptr, addrspace 1)
+  ; GFX90A_GFX942-NEXT:   $vgpr0 = COPY [[GLOBAL_ATOMIC_PK_ADD_F16_SADDR_RTN]]
+  ; GFX90A_GFX942-NEXT:   SI_RETURN_TO_EPILOG $vgpr0
   %ret = atomicrmw fadd ptr addrspace(1) %ptr, <2 x half> %data syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0
   ret <2 x half> %ret
 }
diff --git llvm/test/CodeGen/AMDGPU/global-atomicrmw-fadd.ll llvm/test/CodeGen/AMDGPU/global-atomicrmw-fadd.ll
index 73b4428b03c8..904ef8a4b657 100644
--- llvm/test/CodeGen/AMDGPU/global-atomicrmw-fadd.ll
+++ llvm/test/CodeGen/AMDGPU/global-atomicrmw-fadd.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
 ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 < %s | FileCheck -check-prefix=GFX12 %s
-; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx940 < %s | FileCheck -check-prefix=GFX940 %s
+; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx942 < %s | FileCheck -check-prefix=GFX942 %s
 ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 < %s | FileCheck -check-prefix=GFX11 %s
 ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s
 ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx90a < %s | FileCheck -check-prefix=GFX90A %s
@@ -27,14 +27,14 @@ define float @global_agent_atomic_fadd_ret_f32__amdgpu_no_fine_grained_memory(pt
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: global_agent_atomic_fadd_ret_f32__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    global_atomic_add_f32 v0, v[0:1], v2, off sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: global_agent_atomic_fadd_ret_f32__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    global_atomic_add_f32 v0, v[0:1], v2, off sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: global_agent_atomic_fadd_ret_f32__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -211,14 +211,14 @@ define float @global_agent_atomic_fadd_ret_f32__offset12b_pos__amdgpu_no_fine_gr
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: global_agent_atomic_fadd_ret_f32__offset12b_pos__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    global_atomic_add_f32 v0, v[0:1], v2, off offset:2044 sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: global_agent_atomic_fadd_ret_f32__offset12b_pos__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    global_atomic_add_f32 v0, v[0:1], v2, off offset:2044 sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: global_agent_atomic_fadd_ret_f32__offset12b_pos__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -397,14 +397,14 @@ define float @global_agent_atomic_fadd_ret_f32__offset12b_neg__amdgpu_no_fine_gr
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: global_agent_atomic_fadd_ret_f32__offset12b_neg__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    global_atomic_add_f32 v0, v[0:1], v2, off offset:-2048 sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: global_agent_atomic_fadd_ret_f32__offset12b_neg__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    global_atomic_add_f32 v0, v[0:1], v2, off offset:-2048 sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: global_agent_atomic_fadd_ret_f32__offset12b_neg__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -593,14 +593,14 @@ define void @global_agent_atomic_fadd_noret_f32__amdgpu_no_fine_grained_memory(p
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: global_agent_atomic_fadd_noret_f32__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    global_atomic_add_f32 v[0:1], v2, off
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: global_agent_atomic_fadd_noret_f32__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    global_atomic_add_f32 v[0:1], v2, off
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: global_agent_atomic_fadd_noret_f32__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -771,14 +771,14 @@ define void @global_agent_atomic_fadd_noret_f32__offset12b_pos__amdgpu_no_fine_g
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: global_agent_atomic_fadd_noret_f32__offset12b_pos__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    global_atomic_add_f32 v[0:1], v2, off offset:2044
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: global_agent_atomic_fadd_noret_f32__offset12b_pos__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    global_atomic_add_f32 v[0:1], v2, off offset:2044
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: global_agent_atomic_fadd_noret_f32__offset12b_pos__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -952,14 +952,14 @@ define void @global_agent_atomic_fadd_noret_f32__offset12b_neg__amdgpu_no_fine_g
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: global_agent_atomic_fadd_noret_f32__offset12b_neg__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    global_atomic_add_f32 v[0:1], v2, off offset:-2048
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: global_agent_atomic_fadd_noret_f32__offset12b_neg__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    global_atomic_add_f32 v[0:1], v2, off offset:-2048
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: global_agent_atomic_fadd_noret_f32__offset12b_neg__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -1142,14 +1142,14 @@ define float @global_system_atomic_fadd_ret_f32__offset12b_pos__amdgpu_no_fine_g
 ; GFX12-NEXT:    global_inv scope:SCOPE_SYS
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: global_system_atomic_fadd_ret_f32__offset12b_pos__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_wbl2 sc0 sc1
-; GFX940-NEXT:    global_atomic_add_f32 v0, v[0:1], v2, off offset:2044 sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc0 sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: global_system_atomic_fadd_ret_f32__offset12b_pos__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_wbl2 sc0 sc1
+; GFX942-NEXT:    global_atomic_add_f32 v0, v[0:1], v2, off offset:2044 sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc0 sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: global_system_atomic_fadd_ret_f32__offset12b_pos__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -1331,14 +1331,14 @@ define void @global_system_atomic_fadd_noret_f32__offset12b_pos__amdgpu_no_fine_
 ; GFX12-NEXT:    global_inv scope:SCOPE_SYS
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: global_system_atomic_fadd_noret_f32__offset12b_pos__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_wbl2 sc0 sc1
-; GFX940-NEXT:    global_atomic_add_f32 v[0:1], v2, off offset:2044 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc0 sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: global_system_atomic_fadd_noret_f32__offset12b_pos__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_wbl2 sc0 sc1
+; GFX942-NEXT:    global_atomic_add_f32 v[0:1], v2, off offset:2044 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc0 sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: global_system_atomic_fadd_noret_f32__offset12b_pos__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -1514,14 +1514,14 @@ define float @global_agent_atomic_fadd_ret_f32_maybe_remote(ptr addrspace(1) %pt
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: global_agent_atomic_fadd_ret_f32_maybe_remote:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    global_atomic_add_f32 v0, v[0:1], v2, off offset:2044 sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: global_agent_atomic_fadd_ret_f32_maybe_remote:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    global_atomic_add_f32 v0, v[0:1], v2, off offset:2044 sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: global_agent_atomic_fadd_ret_f32_maybe_remote:
 ; GFX11:       ; %bb.0:
@@ -1716,14 +1716,14 @@ define float @global_agent_atomic_fadd_ret_f32_maybe_remote__amdgpu_ignore_denor
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: global_agent_atomic_fadd_ret_f32_maybe_remote__amdgpu_ignore_denormal_mode:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    global_atomic_add_f32 v0, v[0:1], v2, off offset:2044 sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: global_agent_atomic_fadd_ret_f32_maybe_remote__amdgpu_ignore_denormal_mode:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    global_atomic_add_f32 v0, v[0:1], v2, off offset:2044 sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: global_agent_atomic_fadd_ret_f32_maybe_remote__amdgpu_ignore_denormal_mode:
 ; GFX11:       ; %bb.0:
@@ -1918,14 +1918,14 @@ define void @global_agent_atomic_fadd_noret_f32_maybe_remote__amdgpu_ignore_deno
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: global_agent_atomic_fadd_noret_f32_maybe_remote__amdgpu_ignore_denormal_mode:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    global_atomic_add_f32 v[0:1], v2, off offset:2044
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: global_agent_atomic_fadd_noret_f32_maybe_remote__amdgpu_ignore_denormal_mode:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    global_atomic_add_f32 v[0:1], v2, off offset:2044
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: global_agent_atomic_fadd_noret_f32_maybe_remote__amdgpu_ignore_denormal_mode:
 ; GFX11:       ; %bb.0:
@@ -2113,14 +2113,14 @@ define float @global_agent_atomic_fadd_ret_f32___amdgpu_no_fine_grained_memory(p
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: global_agent_atomic_fadd_ret_f32___amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    global_atomic_add_f32 v0, v[0:1], v2, off offset:2044 sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: global_agent_atomic_fadd_ret_f32___amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    global_atomic_add_f32 v0, v[0:1], v2, off offset:2044 sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: global_agent_atomic_fadd_ret_f32___amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -2299,14 +2299,14 @@ define float @global_agent_atomic_fadd_ret_f32___amdgpu_no_fine_grained_memory__
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: global_agent_atomic_fadd_ret_f32___amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    global_atomic_add_f32 v0, v[0:1], v2, off offset:2044 sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: global_agent_atomic_fadd_ret_f32___amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    global_atomic_add_f32 v0, v[0:1], v2, off offset:2044 sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: global_agent_atomic_fadd_ret_f32___amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode:
 ; GFX11:       ; %bb.0:
@@ -2471,14 +2471,14 @@ define float @global_agent_atomic_fadd_ret_f32_amdgpu_ignore_denormal_mode(ptr a
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: global_agent_atomic_fadd_ret_f32_amdgpu_ignore_denormal_mode:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    global_atomic_add_f32 v0, v[0:1], v2, off offset:2044 sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: global_agent_atomic_fadd_ret_f32_amdgpu_ignore_denormal_mode:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    global_atomic_add_f32 v0, v[0:1], v2, off offset:2044 sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: global_agent_atomic_fadd_ret_f32_amdgpu_ignore_denormal_mode:
 ; GFX11:       ; %bb.0:
@@ -2673,14 +2673,14 @@ define void @global_agent_atomic_fadd_noret_f32_maybe_remote(ptr addrspace(1) %p
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: global_agent_atomic_fadd_noret_f32_maybe_remote:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    global_atomic_add_f32 v[0:1], v2, off offset:2044
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: global_agent_atomic_fadd_noret_f32_maybe_remote:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    global_atomic_add_f32 v[0:1], v2, off offset:2044
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: global_agent_atomic_fadd_noret_f32_maybe_remote:
 ; GFX11:       ; %bb.0:
@@ -2868,14 +2868,14 @@ define void @global_agent_atomic_fadd_noret_f32___amdgpu_no_fine_grained_memory(
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: global_agent_atomic_fadd_noret_f32___amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    global_atomic_add_f32 v[0:1], v2, off offset:2044
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: global_agent_atomic_fadd_noret_f32___amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    global_atomic_add_f32 v[0:1], v2, off offset:2044
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: global_agent_atomic_fadd_noret_f32___amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -3049,14 +3049,14 @@ define void @global_agent_atomic_fadd_noret_f32___amdgpu_no_fine_grained_memory_
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: global_agent_atomic_fadd_noret_f32___amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    global_atomic_add_f32 v[0:1], v2, off offset:2044
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: global_agent_atomic_fadd_noret_f32___amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    global_atomic_add_f32 v[0:1], v2, off offset:2044
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: global_agent_atomic_fadd_noret_f32___amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode:
 ; GFX11:       ; %bb.0:
@@ -3204,14 +3204,14 @@ define void @global_agent_atomic_fadd_noret_f32_amdgpu_ignore_denormal_mode(ptr
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: global_agent_atomic_fadd_noret_f32_amdgpu_ignore_denormal_mode:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    global_atomic_add_f32 v[0:1], v2, off offset:2044
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: global_agent_atomic_fadd_noret_f32_amdgpu_ignore_denormal_mode:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    global_atomic_add_f32 v[0:1], v2, off offset:2044
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: global_agent_atomic_fadd_noret_f32_amdgpu_ignore_denormal_mode:
 ; GFX11:       ; %bb.0:
@@ -3399,14 +3399,14 @@ define float @global_agent_atomic_fadd_ret_f32__amdgpu_no_remote_memory(ptr addr
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: global_agent_atomic_fadd_ret_f32__amdgpu_no_remote_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    global_atomic_add_f32 v0, v[0:1], v2, off sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: global_agent_atomic_fadd_ret_f32__amdgpu_no_remote_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    global_atomic_add_f32 v0, v[0:1], v2, off sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: global_agent_atomic_fadd_ret_f32__amdgpu_no_remote_memory:
 ; GFX11:       ; %bb.0:
@@ -3599,14 +3599,14 @@ define void @global_agent_atomic_fadd_noret_f32__amdgpu_no_remote_memory(ptr add
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: global_agent_atomic_fadd_noret_f32__amdgpu_no_remote_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    global_atomic_add_f32 v[0:1], v2, off
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: global_agent_atomic_fadd_noret_f32__amdgpu_no_remote_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    global_atomic_add_f32 v[0:1], v2, off
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: global_agent_atomic_fadd_noret_f32__amdgpu_no_remote_memory:
 ; GFX11:       ; %bb.0:
@@ -3791,14 +3791,14 @@ define float @global_agent_atomic_fadd_ret_f32__amdgpu_no_remote_memory__amdgpu_
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: global_agent_atomic_fadd_ret_f32__amdgpu_no_remote_memory__amdgpu_ignore_denormal_mode:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    global_atomic_add_f32 v0, v[0:1], v2, off sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: global_agent_atomic_fadd_ret_f32__amdgpu_no_remote_memory__amdgpu_ignore_denormal_mode:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    global_atomic_add_f32 v0, v[0:1], v2, off sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: global_agent_atomic_fadd_ret_f32__amdgpu_no_remote_memory__amdgpu_ignore_denormal_mode:
 ; GFX11:       ; %bb.0:
@@ -3991,14 +3991,14 @@ define void @global_agent_atomic_fadd_noret_f32__amdgpu_no_remote_memory__amdgpu
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: global_agent_atomic_fadd_noret_f32__amdgpu_no_remote_memory__amdgpu_ignore_denormal_mode:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    global_atomic_add_f32 v[0:1], v2, off
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: global_agent_atomic_fadd_noret_f32__amdgpu_no_remote_memory__amdgpu_ignore_denormal_mode:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    global_atomic_add_f32 v[0:1], v2, off
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: global_agent_atomic_fadd_noret_f32__amdgpu_no_remote_memory__amdgpu_ignore_denormal_mode:
 ; GFX11:       ; %bb.0:
@@ -4183,14 +4183,14 @@ define float @global_agent_atomic_fadd_ret_f32__amdgpu_no_remote_memory__amdgpu_
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: global_agent_atomic_fadd_ret_f32__amdgpu_no_remote_memory__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    global_atomic_add_f32 v0, v[0:1], v2, off sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: global_agent_atomic_fadd_ret_f32__amdgpu_no_remote_memory__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    global_atomic_add_f32 v0, v[0:1], v2, off sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: global_agent_atomic_fadd_ret_f32__amdgpu_no_remote_memory__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -4367,14 +4367,14 @@ define void @global_agent_atomic_fadd_noret_f32__amdgpu_no_remote_memory__amdgpu
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: global_agent_atomic_fadd_noret_f32__amdgpu_no_remote_memory__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    global_atomic_add_f32 v[0:1], v2, off
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: global_agent_atomic_fadd_noret_f32__amdgpu_no_remote_memory__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    global_atomic_add_f32 v[0:1], v2, off
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: global_agent_atomic_fadd_noret_f32__amdgpu_no_remote_memory__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -4549,14 +4549,14 @@ define float @global_agent_atomic_fadd_ret_f32__ftz__amdgpu_no_fine_grained_memo
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: global_agent_atomic_fadd_ret_f32__ftz__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    global_atomic_add_f32 v0, v[0:1], v2, off sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: global_agent_atomic_fadd_ret_f32__ftz__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    global_atomic_add_f32 v0, v[0:1], v2, off sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: global_agent_atomic_fadd_ret_f32__ftz__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -4719,14 +4719,14 @@ define float @global_agent_atomic_fadd_ret_f32__offset12b_pos__ftz__amdgpu_no_fi
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: global_agent_atomic_fadd_ret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    global_atomic_add_f32 v0, v[0:1], v2, off offset:2044 sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: global_agent_atomic_fadd_ret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    global_atomic_add_f32 v0, v[0:1], v2, off offset:2044 sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: global_agent_atomic_fadd_ret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -4891,14 +4891,14 @@ define float @global_agent_atomic_fadd_ret_f32__offset12b_neg__ftz__amdgpu_no_fi
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: global_agent_atomic_fadd_ret_f32__offset12b_neg__ftz__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    global_atomic_add_f32 v0, v[0:1], v2, off offset:-2048 sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: global_agent_atomic_fadd_ret_f32__offset12b_neg__ftz__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    global_atomic_add_f32 v0, v[0:1], v2, off offset:-2048 sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: global_agent_atomic_fadd_ret_f32__offset12b_neg__ftz__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -5073,14 +5073,14 @@ define void @global_agent_atomic_fadd_noret_f32__ftz__amdgpu_no_fine_grained_mem
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: global_agent_atomic_fadd_noret_f32__ftz__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    global_atomic_add_f32 v[0:1], v2, off
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: global_agent_atomic_fadd_noret_f32__ftz__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    global_atomic_add_f32 v[0:1], v2, off
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: global_agent_atomic_fadd_noret_f32__ftz__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -5225,14 +5225,14 @@ define void @global_agent_atomic_fadd_noret_f32__offset12b_pos__ftz__amdgpu_no_f
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: global_agent_atomic_fadd_noret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    global_atomic_add_f32 v[0:1], v2, off offset:2044
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: global_agent_atomic_fadd_noret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    global_atomic_add_f32 v[0:1], v2, off offset:2044
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: global_agent_atomic_fadd_noret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -5380,14 +5380,14 @@ define void @global_agent_atomic_fadd_noret_f32__offset12b_neg__ftz__amdgpu_no_f
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: global_agent_atomic_fadd_noret_f32__offset12b_neg__ftz__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    global_atomic_add_f32 v[0:1], v2, off offset:-2048
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: global_agent_atomic_fadd_noret_f32__offset12b_neg__ftz__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    global_atomic_add_f32 v[0:1], v2, off offset:-2048
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: global_agent_atomic_fadd_noret_f32__offset12b_neg__ftz__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -5544,14 +5544,14 @@ define float @global_system_atomic_fadd_ret_f32__offset12b_pos__ftz__amdgpu_no_f
 ; GFX12-NEXT:    global_inv scope:SCOPE_SYS
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: global_system_atomic_fadd_ret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_wbl2 sc0 sc1
-; GFX940-NEXT:    global_atomic_add_f32 v0, v[0:1], v2, off offset:2044 sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc0 sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: global_system_atomic_fadd_ret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_wbl2 sc0 sc1
+; GFX942-NEXT:    global_atomic_add_f32 v0, v[0:1], v2, off offset:2044 sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc0 sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: global_system_atomic_fadd_ret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -5719,14 +5719,14 @@ define void @global_system_atomic_fadd_noret_f32__offset12b_pos__ftz__amdgpu_no_
 ; GFX12-NEXT:    global_inv scope:SCOPE_SYS
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: global_system_atomic_fadd_noret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_wbl2 sc0 sc1
-; GFX940-NEXT:    global_atomic_add_f32 v[0:1], v2, off offset:2044 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc0 sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: global_system_atomic_fadd_noret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_wbl2 sc0 sc1
+; GFX942-NEXT:    global_atomic_add_f32 v[0:1], v2, off offset:2044 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc0 sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: global_system_atomic_fadd_noret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -5876,14 +5876,14 @@ define float @global_agent_atomic_fadd_ret_f32__offset12b_pos__ieee__amdgpu_no_f
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: global_agent_atomic_fadd_ret_f32__offset12b_pos__ieee__amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    global_atomic_add_f32 v0, v[0:1], v2, off offset:2044 sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: global_agent_atomic_fadd_ret_f32__offset12b_pos__ieee__amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    global_atomic_add_f32 v0, v[0:1], v2, off offset:2044 sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: global_agent_atomic_fadd_ret_f32__offset12b_pos__ieee__amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode:
 ; GFX11:       ; %bb.0:
@@ -6048,14 +6048,14 @@ define void @global_agent_atomic_fadd_noret_f32__offset12b_pos__ieee__amdgpu_no_
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: global_agent_atomic_fadd_noret_f32__offset12b_pos__ieee__amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    global_atomic_add_f32 v[0:1], v2, off offset:2044
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: global_agent_atomic_fadd_noret_f32__offset12b_pos__ieee__amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    global_atomic_add_f32 v[0:1], v2, off offset:2044
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: global_agent_atomic_fadd_noret_f32__offset12b_pos__ieee__amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode:
 ; GFX11:       ; %bb.0:
@@ -6203,14 +6203,14 @@ define float @global_agent_atomic_fadd_ret_f32__ftz__amdgpu_no_remote_memory(ptr
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: global_agent_atomic_fadd_ret_f32__ftz__amdgpu_no_remote_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    global_atomic_add_f32 v0, v[0:1], v2, off sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: global_agent_atomic_fadd_ret_f32__ftz__amdgpu_no_remote_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    global_atomic_add_f32 v0, v[0:1], v2, off sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: global_agent_atomic_fadd_ret_f32__ftz__amdgpu_no_remote_memory:
 ; GFX11:       ; %bb.0:
@@ -6403,14 +6403,14 @@ define void @global_agent_atomic_fadd_noret_f32__ftz__amdgpu_no_remote_memory(pt
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: global_agent_atomic_fadd_noret_f32__ftz__amdgpu_no_remote_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    global_atomic_add_f32 v[0:1], v2, off
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: global_agent_atomic_fadd_noret_f32__ftz__amdgpu_no_remote_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    global_atomic_add_f32 v[0:1], v2, off
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: global_agent_atomic_fadd_noret_f32__ftz__amdgpu_no_remote_memory:
 ; GFX11:       ; %bb.0:
@@ -6595,14 +6595,14 @@ define float @global_agent_atomic_fadd_ret_f32__ftz__amdgpu_no_fine_grained_memo
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: global_agent_atomic_fadd_ret_f32__ftz__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    global_atomic_add_f32 v0, v[0:1], v2, off sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: global_agent_atomic_fadd_ret_f32__ftz__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    global_atomic_add_f32 v0, v[0:1], v2, off sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: global_agent_atomic_fadd_ret_f32__ftz__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory:
 ; GFX11:       ; %bb.0:
@@ -6765,14 +6765,14 @@ define void @global_agent_atomic_fadd_noret_f32__ftz__amdgpu_no_fine_grained_mem
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: global_agent_atomic_fadd_noret_f32__ftz__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    global_atomic_add_f32 v[0:1], v2, off
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: global_agent_atomic_fadd_noret_f32__ftz__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    global_atomic_add_f32 v[0:1], v2, off
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: global_agent_atomic_fadd_noret_f32__ftz__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory:
 ; GFX11:       ; %bb.0:
@@ -6938,14 +6938,14 @@ define double @global_agent_atomic_fadd_ret_f64__amdgpu_no_fine_grained_memory(p
 ; GFX12-NEXT:    v_dual_mov_b32 v0, v4 :: v_dual_mov_b32 v1, v5
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: global_agent_atomic_fadd_ret_f64__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    global_atomic_add_f64 v[0:1], v[0:1], v[2:3], off sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: global_agent_atomic_fadd_ret_f64__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    global_atomic_add_f64 v[0:1], v[0:1], v[2:3], off sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: global_agent_atomic_fadd_ret_f64__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -7159,14 +7159,14 @@ define double @global_agent_atomic_fadd_ret_f64__offset12b_pos__amdgpu_no_fine_g
 ; GFX12-NEXT:    v_dual_mov_b32 v0, v4 :: v_dual_mov_b32 v1, v5
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: global_agent_atomic_fadd_ret_f64__offset12b_pos__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    global_atomic_add_f64 v[0:1], v[0:1], v[2:3], off offset:2040 sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: global_agent_atomic_fadd_ret_f64__offset12b_pos__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    global_atomic_add_f64 v[0:1], v[0:1], v[2:3], off offset:2040 sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: global_agent_atomic_fadd_ret_f64__offset12b_pos__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -7381,14 +7381,14 @@ define double @global_agent_atomic_fadd_ret_f64__offset12b_neg__amdgpu_no_fine_g
 ; GFX12-NEXT:    v_dual_mov_b32 v0, v4 :: v_dual_mov_b32 v1, v5
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: global_agent_atomic_fadd_ret_f64__offset12b_neg__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    global_atomic_add_f64 v[0:1], v[0:1], v[2:3], off offset:-2048 sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: global_agent_atomic_fadd_ret_f64__offset12b_neg__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    global_atomic_add_f64 v[0:1], v[0:1], v[2:3], off offset:-2048 sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: global_agent_atomic_fadd_ret_f64__offset12b_neg__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -7609,14 +7609,14 @@ define void @global_agent_atomic_fadd_noret_f64__amdgpu_no_fine_grained_memory(p
 ; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s0
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: global_agent_atomic_fadd_noret_f64__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    global_atomic_add_f64 v[0:1], v[2:3], off
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: global_agent_atomic_fadd_noret_f64__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    global_atomic_add_f64 v[0:1], v[2:3], off
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: global_agent_atomic_fadd_noret_f64__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -7812,14 +7812,14 @@ define void @global_agent_atomic_fadd_noret_f64__offset12b_pos__amdgpu_no_fine_g
 ; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s0
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: global_agent_atomic_fadd_noret_f64__offset12b_pos__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    global_atomic_add_f64 v[0:1], v[2:3], off offset:2040
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: global_agent_atomic_fadd_noret_f64__offset12b_pos__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    global_atomic_add_f64 v[0:1], v[2:3], off offset:2040
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: global_agent_atomic_fadd_noret_f64__offset12b_pos__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -8018,14 +8018,14 @@ define void @global_agent_atomic_fadd_noret_f64__offset12b_neg__amdgpu_no_fine_g
 ; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s0
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: global_agent_atomic_fadd_noret_f64__offset12b_neg__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    global_atomic_add_f64 v[0:1], v[2:3], off offset:-2048
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: global_agent_atomic_fadd_noret_f64__offset12b_neg__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    global_atomic_add_f64 v[0:1], v[2:3], off offset:-2048
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: global_agent_atomic_fadd_noret_f64__offset12b_neg__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -8252,38 +8252,38 @@ define half @global_agent_atomic_fadd_ret_f16__amdgpu_no_fine_grained_memory(ptr
 ; GFX12-NEXT:    v_lshrrev_b32_e32 v0, v3, v5
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: global_agent_atomic_fadd_ret_f16__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v3, v0
-; GFX940-NEXT:    v_and_b32_e32 v0, -4, v3
-; GFX940-NEXT:    global_load_dword v4, v[0:1], off
-; GFX940-NEXT:    v_and_b32_e32 v3, 3, v3
-; GFX940-NEXT:    v_lshlrev_b32_e32 v3, 3, v3
-; GFX940-NEXT:    s_mov_b32 s0, 0xffff
-; GFX940-NEXT:    v_lshlrev_b32_e64 v5, v3, s0
-; GFX940-NEXT:    v_not_b32_e32 v5, v5
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:  .LBB44_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v7, v4
-; GFX940-NEXT:    v_lshrrev_b32_e32 v4, v3, v7
-; GFX940-NEXT:    v_add_f16_e32 v4, v4, v2
-; GFX940-NEXT:    v_lshlrev_b32_e32 v4, v3, v4
-; GFX940-NEXT:    v_and_or_b32 v6, v7, v5, v4
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    global_atomic_cmpswap v4, v[0:1], v[6:7], off sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v4, v7
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB44_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    v_lshrrev_b32_e32 v0, v3, v4
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: global_agent_atomic_fadd_ret_f16__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v3, v0
+; GFX942-NEXT:    v_and_b32_e32 v0, -4, v3
+; GFX942-NEXT:    global_load_dword v4, v[0:1], off
+; GFX942-NEXT:    v_and_b32_e32 v3, 3, v3
+; GFX942-NEXT:    v_lshlrev_b32_e32 v3, 3, v3
+; GFX942-NEXT:    s_mov_b32 s0, 0xffff
+; GFX942-NEXT:    v_lshlrev_b32_e64 v5, v3, s0
+; GFX942-NEXT:    v_not_b32_e32 v5, v5
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:  .LBB44_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v7, v4
+; GFX942-NEXT:    v_lshrrev_b32_e32 v4, v3, v7
+; GFX942-NEXT:    v_add_f16_e32 v4, v4, v2
+; GFX942-NEXT:    v_lshlrev_b32_e32 v4, v3, v4
+; GFX942-NEXT:    v_and_or_b32 v6, v7, v5, v4
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    global_atomic_cmpswap v4, v[0:1], v[6:7], off sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v4, v7
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB44_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    v_lshrrev_b32_e32 v0, v3, v4
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: global_agent_atomic_fadd_ret_f16__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -8592,40 +8592,40 @@ define half @global_agent_atomic_fadd_ret_f16__offset12b_pos__amdgpu_no_fine_gra
 ; GFX12-NEXT:    v_lshrrev_b32_e32 v0, v3, v5
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: global_agent_atomic_fadd_ret_f16__offset12b_pos__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0x7fe
-; GFX940-NEXT:    v_lshl_add_u64 v[6:7], v[0:1], 0, s[0:1]
-; GFX940-NEXT:    v_and_b32_e32 v0, -4, v6
-; GFX940-NEXT:    v_mov_b32_e32 v1, v7
-; GFX940-NEXT:    global_load_dword v4, v[0:1], off
-; GFX940-NEXT:    v_and_b32_e32 v3, 3, v6
-; GFX940-NEXT:    v_lshlrev_b32_e32 v3, 3, v3
-; GFX940-NEXT:    s_mov_b32 s0, 0xffff
-; GFX940-NEXT:    v_lshlrev_b32_e64 v5, v3, s0
-; GFX940-NEXT:    v_not_b32_e32 v5, v5
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:  .LBB45_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v7, v4
-; GFX940-NEXT:    v_lshrrev_b32_e32 v4, v3, v7
-; GFX940-NEXT:    v_add_f16_e32 v4, v4, v2
-; GFX940-NEXT:    v_lshlrev_b32_e32 v4, v3, v4
-; GFX940-NEXT:    v_and_or_b32 v6, v7, v5, v4
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    global_atomic_cmpswap v4, v[0:1], v[6:7], off sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v4, v7
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB45_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    v_lshrrev_b32_e32 v0, v3, v4
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: global_agent_atomic_fadd_ret_f16__offset12b_pos__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0x7fe
+; GFX942-NEXT:    v_lshl_add_u64 v[6:7], v[0:1], 0, s[0:1]
+; GFX942-NEXT:    v_and_b32_e32 v0, -4, v6
+; GFX942-NEXT:    v_mov_b32_e32 v1, v7
+; GFX942-NEXT:    global_load_dword v4, v[0:1], off
+; GFX942-NEXT:    v_and_b32_e32 v3, 3, v6
+; GFX942-NEXT:    v_lshlrev_b32_e32 v3, 3, v3
+; GFX942-NEXT:    s_mov_b32 s0, 0xffff
+; GFX942-NEXT:    v_lshlrev_b32_e64 v5, v3, s0
+; GFX942-NEXT:    v_not_b32_e32 v5, v5
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:  .LBB45_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v7, v4
+; GFX942-NEXT:    v_lshrrev_b32_e32 v4, v3, v7
+; GFX942-NEXT:    v_add_f16_e32 v4, v4, v2
+; GFX942-NEXT:    v_lshlrev_b32_e32 v4, v3, v4
+; GFX942-NEXT:    v_and_or_b32 v6, v7, v5, v4
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    global_atomic_cmpswap v4, v[0:1], v[6:7], off sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v4, v7
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB45_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    v_lshrrev_b32_e32 v0, v3, v4
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: global_agent_atomic_fadd_ret_f16__offset12b_pos__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -8943,41 +8943,41 @@ define half @global_agent_atomic_fadd_ret_f16__offset12b_neg__amdgpu_no_fine_gra
 ; GFX12-NEXT:    v_lshrrev_b32_e32 v0, v3, v5
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: global_agent_atomic_fadd_ret_f16__offset12b_neg__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    s_movk_i32 s0, 0xf800
-; GFX940-NEXT:    s_mov_b32 s1, -1
-; GFX940-NEXT:    v_lshl_add_u64 v[6:7], v[0:1], 0, s[0:1]
-; GFX940-NEXT:    v_and_b32_e32 v0, -4, v6
-; GFX940-NEXT:    v_mov_b32_e32 v1, v7
-; GFX940-NEXT:    global_load_dword v4, v[0:1], off
-; GFX940-NEXT:    v_and_b32_e32 v3, 3, v6
-; GFX940-NEXT:    v_lshlrev_b32_e32 v3, 3, v3
-; GFX940-NEXT:    s_mov_b32 s0, 0xffff
-; GFX940-NEXT:    v_lshlrev_b32_e64 v5, v3, s0
-; GFX940-NEXT:    v_not_b32_e32 v5, v5
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:  .LBB46_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v7, v4
-; GFX940-NEXT:    v_lshrrev_b32_e32 v4, v3, v7
-; GFX940-NEXT:    v_add_f16_e32 v4, v4, v2
-; GFX940-NEXT:    v_lshlrev_b32_e32 v4, v3, v4
-; GFX940-NEXT:    v_and_or_b32 v6, v7, v5, v4
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    global_atomic_cmpswap v4, v[0:1], v[6:7], off sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v4, v7
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB46_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    v_lshrrev_b32_e32 v0, v3, v4
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: global_agent_atomic_fadd_ret_f16__offset12b_neg__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    s_movk_i32 s0, 0xf800
+; GFX942-NEXT:    s_mov_b32 s1, -1
+; GFX942-NEXT:    v_lshl_add_u64 v[6:7], v[0:1], 0, s[0:1]
+; GFX942-NEXT:    v_and_b32_e32 v0, -4, v6
+; GFX942-NEXT:    v_mov_b32_e32 v1, v7
+; GFX942-NEXT:    global_load_dword v4, v[0:1], off
+; GFX942-NEXT:    v_and_b32_e32 v3, 3, v6
+; GFX942-NEXT:    v_lshlrev_b32_e32 v3, 3, v3
+; GFX942-NEXT:    s_mov_b32 s0, 0xffff
+; GFX942-NEXT:    v_lshlrev_b32_e64 v5, v3, s0
+; GFX942-NEXT:    v_not_b32_e32 v5, v5
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:  .LBB46_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v7, v4
+; GFX942-NEXT:    v_lshrrev_b32_e32 v4, v3, v7
+; GFX942-NEXT:    v_add_f16_e32 v4, v4, v2
+; GFX942-NEXT:    v_lshlrev_b32_e32 v4, v3, v4
+; GFX942-NEXT:    v_and_or_b32 v6, v7, v5, v4
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    global_atomic_cmpswap v4, v[0:1], v[6:7], off sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v4, v7
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB46_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    v_lshrrev_b32_e32 v0, v3, v4
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: global_agent_atomic_fadd_ret_f16__offset12b_neg__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -9291,37 +9291,37 @@ define void @global_agent_atomic_fadd_noret_f16__amdgpu_no_fine_grained_memory(p
 ; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s0
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: global_agent_atomic_fadd_noret_f16__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v3, v0
-; GFX940-NEXT:    v_and_b32_e32 v0, -4, v3
-; GFX940-NEXT:    global_load_dword v5, v[0:1], off
-; GFX940-NEXT:    v_and_b32_e32 v3, 3, v3
-; GFX940-NEXT:    v_lshlrev_b32_e32 v3, 3, v3
-; GFX940-NEXT:    s_mov_b32 s0, 0xffff
-; GFX940-NEXT:    v_lshlrev_b32_e64 v4, v3, s0
-; GFX940-NEXT:    v_not_b32_e32 v6, v4
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:  .LBB47_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_lshrrev_b32_e32 v4, v3, v5
-; GFX940-NEXT:    v_add_f16_e32 v4, v4, v2
-; GFX940-NEXT:    v_lshlrev_b32_e32 v4, v3, v4
-; GFX940-NEXT:    v_and_or_b32 v4, v5, v6, v4
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    global_atomic_cmpswap v4, v[0:1], v[4:5], off sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v4, v5
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v5, v4
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB47_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: global_agent_atomic_fadd_noret_f16__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v3, v0
+; GFX942-NEXT:    v_and_b32_e32 v0, -4, v3
+; GFX942-NEXT:    global_load_dword v5, v[0:1], off
+; GFX942-NEXT:    v_and_b32_e32 v3, 3, v3
+; GFX942-NEXT:    v_lshlrev_b32_e32 v3, 3, v3
+; GFX942-NEXT:    s_mov_b32 s0, 0xffff
+; GFX942-NEXT:    v_lshlrev_b32_e64 v4, v3, s0
+; GFX942-NEXT:    v_not_b32_e32 v6, v4
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:  .LBB47_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_lshrrev_b32_e32 v4, v3, v5
+; GFX942-NEXT:    v_add_f16_e32 v4, v4, v2
+; GFX942-NEXT:    v_lshlrev_b32_e32 v4, v3, v4
+; GFX942-NEXT:    v_and_or_b32 v4, v5, v6, v4
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    global_atomic_cmpswap v4, v[0:1], v[4:5], off sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v4, v5
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v5, v4
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB47_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: global_agent_atomic_fadd_noret_f16__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -9619,39 +9619,39 @@ define void @global_agent_atomic_fadd_noret_f16__offset12b_pos__amdgpu_no_fine_g
 ; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s0
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: global_agent_atomic_fadd_noret_f16__offset12b_pos__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0x7fe
-; GFX940-NEXT:    v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1]
-; GFX940-NEXT:    v_and_b32_e32 v0, -4, v4
-; GFX940-NEXT:    v_mov_b32_e32 v1, v5
-; GFX940-NEXT:    global_load_dword v5, v[0:1], off
-; GFX940-NEXT:    v_and_b32_e32 v3, 3, v4
-; GFX940-NEXT:    v_lshlrev_b32_e32 v3, 3, v3
-; GFX940-NEXT:    s_mov_b32 s0, 0xffff
-; GFX940-NEXT:    v_lshlrev_b32_e64 v4, v3, s0
-; GFX940-NEXT:    v_not_b32_e32 v6, v4
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:  .LBB48_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_lshrrev_b32_e32 v4, v3, v5
-; GFX940-NEXT:    v_add_f16_e32 v4, v4, v2
-; GFX940-NEXT:    v_lshlrev_b32_e32 v4, v3, v4
-; GFX940-NEXT:    v_and_or_b32 v4, v5, v6, v4
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    global_atomic_cmpswap v4, v[0:1], v[4:5], off sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v4, v5
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v5, v4
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB48_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: global_agent_atomic_fadd_noret_f16__offset12b_pos__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0x7fe
+; GFX942-NEXT:    v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1]
+; GFX942-NEXT:    v_and_b32_e32 v0, -4, v4
+; GFX942-NEXT:    v_mov_b32_e32 v1, v5
+; GFX942-NEXT:    global_load_dword v5, v[0:1], off
+; GFX942-NEXT:    v_and_b32_e32 v3, 3, v4
+; GFX942-NEXT:    v_lshlrev_b32_e32 v3, 3, v3
+; GFX942-NEXT:    s_mov_b32 s0, 0xffff
+; GFX942-NEXT:    v_lshlrev_b32_e64 v4, v3, s0
+; GFX942-NEXT:    v_not_b32_e32 v6, v4
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:  .LBB48_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_lshrrev_b32_e32 v4, v3, v5
+; GFX942-NEXT:    v_add_f16_e32 v4, v4, v2
+; GFX942-NEXT:    v_lshlrev_b32_e32 v4, v3, v4
+; GFX942-NEXT:    v_and_or_b32 v4, v5, v6, v4
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    global_atomic_cmpswap v4, v[0:1], v[4:5], off sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v4, v5
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v5, v4
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB48_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: global_agent_atomic_fadd_noret_f16__offset12b_pos__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -9957,40 +9957,40 @@ define void @global_agent_atomic_fadd_noret_f16__offset12b_neg__amdgpu_no_fine_g
 ; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s0
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: global_agent_atomic_fadd_noret_f16__offset12b_neg__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    s_movk_i32 s0, 0xf800
-; GFX940-NEXT:    s_mov_b32 s1, -1
-; GFX940-NEXT:    v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1]
-; GFX940-NEXT:    v_and_b32_e32 v0, -4, v4
-; GFX940-NEXT:    v_mov_b32_e32 v1, v5
-; GFX940-NEXT:    global_load_dword v5, v[0:1], off
-; GFX940-NEXT:    v_and_b32_e32 v3, 3, v4
-; GFX940-NEXT:    v_lshlrev_b32_e32 v3, 3, v3
-; GFX940-NEXT:    s_mov_b32 s0, 0xffff
-; GFX940-NEXT:    v_lshlrev_b32_e64 v4, v3, s0
-; GFX940-NEXT:    v_not_b32_e32 v6, v4
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:  .LBB49_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_lshrrev_b32_e32 v4, v3, v5
-; GFX940-NEXT:    v_add_f16_e32 v4, v4, v2
-; GFX940-NEXT:    v_lshlrev_b32_e32 v4, v3, v4
-; GFX940-NEXT:    v_and_or_b32 v4, v5, v6, v4
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    global_atomic_cmpswap v4, v[0:1], v[4:5], off sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v4, v5
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v5, v4
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB49_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: global_agent_atomic_fadd_noret_f16__offset12b_neg__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    s_movk_i32 s0, 0xf800
+; GFX942-NEXT:    s_mov_b32 s1, -1
+; GFX942-NEXT:    v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1]
+; GFX942-NEXT:    v_and_b32_e32 v0, -4, v4
+; GFX942-NEXT:    v_mov_b32_e32 v1, v5
+; GFX942-NEXT:    global_load_dword v5, v[0:1], off
+; GFX942-NEXT:    v_and_b32_e32 v3, 3, v4
+; GFX942-NEXT:    v_lshlrev_b32_e32 v3, 3, v3
+; GFX942-NEXT:    s_mov_b32 s0, 0xffff
+; GFX942-NEXT:    v_lshlrev_b32_e64 v4, v3, s0
+; GFX942-NEXT:    v_not_b32_e32 v6, v4
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:  .LBB49_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_lshrrev_b32_e32 v4, v3, v5
+; GFX942-NEXT:    v_add_f16_e32 v4, v4, v2
+; GFX942-NEXT:    v_lshlrev_b32_e32 v4, v3, v4
+; GFX942-NEXT:    v_and_or_b32 v4, v5, v6, v4
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    global_atomic_cmpswap v4, v[0:1], v[4:5], off sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v4, v5
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v5, v4
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB49_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: global_agent_atomic_fadd_noret_f16__offset12b_neg__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -10285,30 +10285,30 @@ define half @global_agent_atomic_fadd_ret_f16__offset12b_pos__align4__amdgpu_no_
 ; GFX12-NEXT:    v_mov_b32_e32 v0, v3
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: global_agent_atomic_fadd_ret_f16__offset12b_pos__align4__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    global_load_dword v3, v[0:1], off offset:2046
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    s_mov_b32 s2, 0xffff0000
-; GFX940-NEXT:  .LBB50_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v5, v3
-; GFX940-NEXT:    v_add_f16_e32 v3, v5, v2
-; GFX940-NEXT:    v_and_or_b32 v4, v5, s2, v3
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    global_atomic_cmpswap v3, v[0:1], v[4:5], off offset:2046 sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB50_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v0, v3
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: global_agent_atomic_fadd_ret_f16__offset12b_pos__align4__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    global_load_dword v3, v[0:1], off offset:2046
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    s_mov_b32 s2, 0xffff0000
+; GFX942-NEXT:  .LBB50_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v5, v3
+; GFX942-NEXT:    v_add_f16_e32 v3, v5, v2
+; GFX942-NEXT:    v_and_or_b32 v4, v5, s2, v3
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    global_atomic_cmpswap v3, v[0:1], v[4:5], off offset:2046 sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB50_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v0, v3
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: global_agent_atomic_fadd_ret_f16__offset12b_pos__align4__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -10544,29 +10544,29 @@ define void @global_agent_atomic_fadd_noret_f16__offset12b__align4_pos__amdgpu_n
 ; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s0
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: global_agent_atomic_fadd_noret_f16__offset12b__align4_pos__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    global_load_dword v5, v[0:1], off offset:2046
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    s_mov_b32 s2, 0xffff0000
-; GFX940-NEXT:  .LBB51_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_add_f16_e32 v3, v5, v2
-; GFX940-NEXT:    v_and_or_b32 v4, v5, s2, v3
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    global_atomic_cmpswap v3, v[0:1], v[4:5], off offset:2046 sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v5, v3
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB51_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: global_agent_atomic_fadd_noret_f16__offset12b__align4_pos__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    global_load_dword v5, v[0:1], off offset:2046
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    s_mov_b32 s2, 0xffff0000
+; GFX942-NEXT:  .LBB51_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_add_f16_e32 v3, v5, v2
+; GFX942-NEXT:    v_and_or_b32 v4, v5, s2, v3
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    global_atomic_cmpswap v3, v[0:1], v[4:5], off offset:2046 sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v5, v3
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB51_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: global_agent_atomic_fadd_noret_f16__offset12b__align4_pos__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -10811,40 +10811,40 @@ define half @global_system_atomic_fadd_ret_f16__offset12b_pos__amdgpu_no_fine_gr
 ; GFX12-NEXT:    v_lshrrev_b32_e32 v0, v3, v5
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: global_system_atomic_fadd_ret_f16__offset12b_pos__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0x7fe
-; GFX940-NEXT:    v_lshl_add_u64 v[6:7], v[0:1], 0, s[0:1]
-; GFX940-NEXT:    v_and_b32_e32 v0, -4, v6
-; GFX940-NEXT:    v_mov_b32_e32 v1, v7
-; GFX940-NEXT:    global_load_dword v4, v[0:1], off
-; GFX940-NEXT:    v_and_b32_e32 v3, 3, v6
-; GFX940-NEXT:    v_lshlrev_b32_e32 v3, 3, v3
-; GFX940-NEXT:    s_mov_b32 s0, 0xffff
-; GFX940-NEXT:    v_lshlrev_b32_e64 v5, v3, s0
-; GFX940-NEXT:    v_not_b32_e32 v5, v5
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:  .LBB52_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v7, v4
-; GFX940-NEXT:    v_lshrrev_b32_e32 v4, v3, v7
-; GFX940-NEXT:    v_add_f16_e32 v4, v4, v2
-; GFX940-NEXT:    v_lshlrev_b32_e32 v4, v3, v4
-; GFX940-NEXT:    v_and_or_b32 v6, v7, v5, v4
-; GFX940-NEXT:    buffer_wbl2 sc0 sc1
-; GFX940-NEXT:    global_atomic_cmpswap v4, v[0:1], v[6:7], off sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc0 sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v4, v7
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB52_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    v_lshrrev_b32_e32 v0, v3, v4
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: global_system_atomic_fadd_ret_f16__offset12b_pos__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0x7fe
+; GFX942-NEXT:    v_lshl_add_u64 v[6:7], v[0:1], 0, s[0:1]
+; GFX942-NEXT:    v_and_b32_e32 v0, -4, v6
+; GFX942-NEXT:    v_mov_b32_e32 v1, v7
+; GFX942-NEXT:    global_load_dword v4, v[0:1], off
+; GFX942-NEXT:    v_and_b32_e32 v3, 3, v6
+; GFX942-NEXT:    v_lshlrev_b32_e32 v3, 3, v3
+; GFX942-NEXT:    s_mov_b32 s0, 0xffff
+; GFX942-NEXT:    v_lshlrev_b32_e64 v5, v3, s0
+; GFX942-NEXT:    v_not_b32_e32 v5, v5
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:  .LBB52_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v7, v4
+; GFX942-NEXT:    v_lshrrev_b32_e32 v4, v3, v7
+; GFX942-NEXT:    v_add_f16_e32 v4, v4, v2
+; GFX942-NEXT:    v_lshlrev_b32_e32 v4, v3, v4
+; GFX942-NEXT:    v_and_or_b32 v6, v7, v5, v4
+; GFX942-NEXT:    buffer_wbl2 sc0 sc1
+; GFX942-NEXT:    global_atomic_cmpswap v4, v[0:1], v[6:7], off sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc0 sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v4, v7
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB52_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    v_lshrrev_b32_e32 v0, v3, v4
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: global_system_atomic_fadd_ret_f16__offset12b_pos__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -11163,39 +11163,39 @@ define void @global_system_atomic_fadd_noret_f16__offset12b_pos__amdgpu_no_fine_
 ; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s0
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: global_system_atomic_fadd_noret_f16__offset12b_pos__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0x7fe
-; GFX940-NEXT:    v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1]
-; GFX940-NEXT:    v_and_b32_e32 v0, -4, v4
-; GFX940-NEXT:    v_mov_b32_e32 v1, v5
-; GFX940-NEXT:    global_load_dword v5, v[0:1], off
-; GFX940-NEXT:    v_and_b32_e32 v3, 3, v4
-; GFX940-NEXT:    v_lshlrev_b32_e32 v3, 3, v3
-; GFX940-NEXT:    s_mov_b32 s0, 0xffff
-; GFX940-NEXT:    v_lshlrev_b32_e64 v4, v3, s0
-; GFX940-NEXT:    v_not_b32_e32 v6, v4
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:  .LBB53_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_lshrrev_b32_e32 v4, v3, v5
-; GFX940-NEXT:    v_add_f16_e32 v4, v4, v2
-; GFX940-NEXT:    v_lshlrev_b32_e32 v4, v3, v4
-; GFX940-NEXT:    v_and_or_b32 v4, v5, v6, v4
-; GFX940-NEXT:    buffer_wbl2 sc0 sc1
-; GFX940-NEXT:    global_atomic_cmpswap v4, v[0:1], v[4:5], off sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc0 sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v4, v5
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v5, v4
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB53_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: global_system_atomic_fadd_noret_f16__offset12b_pos__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0x7fe
+; GFX942-NEXT:    v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1]
+; GFX942-NEXT:    v_and_b32_e32 v0, -4, v4
+; GFX942-NEXT:    v_mov_b32_e32 v1, v5
+; GFX942-NEXT:    global_load_dword v5, v[0:1], off
+; GFX942-NEXT:    v_and_b32_e32 v3, 3, v4
+; GFX942-NEXT:    v_lshlrev_b32_e32 v3, 3, v3
+; GFX942-NEXT:    s_mov_b32 s0, 0xffff
+; GFX942-NEXT:    v_lshlrev_b32_e64 v4, v3, s0
+; GFX942-NEXT:    v_not_b32_e32 v6, v4
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:  .LBB53_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_lshrrev_b32_e32 v4, v3, v5
+; GFX942-NEXT:    v_add_f16_e32 v4, v4, v2
+; GFX942-NEXT:    v_lshlrev_b32_e32 v4, v3, v4
+; GFX942-NEXT:    v_and_or_b32 v4, v5, v6, v4
+; GFX942-NEXT:    buffer_wbl2 sc0 sc1
+; GFX942-NEXT:    global_atomic_cmpswap v4, v[0:1], v[4:5], off sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc0 sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v4, v5
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v5, v4
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB53_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: global_system_atomic_fadd_noret_f16__offset12b_pos__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -11516,47 +11516,47 @@ define bfloat @global_agent_atomic_fadd_ret_bf16__amdgpu_no_fine_grained_memory(
 ; GFX12-NEXT:    v_lshrrev_b32_e32 v0, v3, v5
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: global_agent_atomic_fadd_ret_bf16__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v3, v0
-; GFX940-NEXT:    v_and_b32_e32 v0, -4, v3
-; GFX940-NEXT:    global_load_dword v5, v[0:1], off
-; GFX940-NEXT:    v_and_b32_e32 v3, 3, v3
-; GFX940-NEXT:    v_lshlrev_b32_e32 v3, 3, v3
-; GFX940-NEXT:    s_mov_b32 s0, 0xffff
-; GFX940-NEXT:    v_lshlrev_b32_e64 v4, v3, s0
-; GFX940-NEXT:    v_not_b32_e32 v4, v4
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
-; GFX940-NEXT:    s_movk_i32 s2, 0x7fff
-; GFX940-NEXT:  .LBB54_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v7, v5
-; GFX940-NEXT:    v_lshrrev_b32_sdwa v5, v3, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_add_f32_e32 v5, v5, v2
-; GFX940-NEXT:    v_bfe_u32 v6, v5, 16, 1
-; GFX940-NEXT:    v_or_b32_e32 v8, 0x400000, v5
-; GFX940-NEXT:    v_add3_u32 v6, v6, v5, s2
-; GFX940-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v5, v6, v8, vcc
-; GFX940-NEXT:    v_lshlrev_b32_sdwa v5, v3, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX940-NEXT:    v_and_or_b32 v6, v7, v4, v5
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    global_atomic_cmpswap v5, v[0:1], v[6:7], off sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v5, v7
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB54_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    v_lshrrev_b32_e32 v0, v3, v5
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: global_agent_atomic_fadd_ret_bf16__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v3, v0
+; GFX942-NEXT:    v_and_b32_e32 v0, -4, v3
+; GFX942-NEXT:    global_load_dword v5, v[0:1], off
+; GFX942-NEXT:    v_and_b32_e32 v3, 3, v3
+; GFX942-NEXT:    v_lshlrev_b32_e32 v3, 3, v3
+; GFX942-NEXT:    s_mov_b32 s0, 0xffff
+; GFX942-NEXT:    v_lshlrev_b32_e64 v4, v3, s0
+; GFX942-NEXT:    v_not_b32_e32 v4, v4
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
+; GFX942-NEXT:    s_movk_i32 s2, 0x7fff
+; GFX942-NEXT:  .LBB54_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v7, v5
+; GFX942-NEXT:    v_lshrrev_b32_sdwa v5, v3, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_add_f32_e32 v5, v5, v2
+; GFX942-NEXT:    v_bfe_u32 v6, v5, 16, 1
+; GFX942-NEXT:    v_or_b32_e32 v8, 0x400000, v5
+; GFX942-NEXT:    v_add3_u32 v6, v6, v5, s2
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v5, v6, v8, vcc
+; GFX942-NEXT:    v_lshlrev_b32_sdwa v5, v3, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX942-NEXT:    v_and_or_b32 v6, v7, v4, v5
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    global_atomic_cmpswap v5, v[0:1], v[6:7], off sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v5, v7
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB54_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    v_lshrrev_b32_e32 v0, v3, v5
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: global_agent_atomic_fadd_ret_bf16__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -11911,49 +11911,49 @@ define bfloat @global_agent_atomic_fadd_ret_bf16__offset12b_pos__amdgpu_no_fine_
 ; GFX12-NEXT:    v_lshrrev_b32_e32 v0, v3, v5
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: global_agent_atomic_fadd_ret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0x7fe
-; GFX940-NEXT:    v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1]
-; GFX940-NEXT:    v_and_b32_e32 v0, -4, v4
-; GFX940-NEXT:    v_mov_b32_e32 v1, v5
-; GFX940-NEXT:    global_load_dword v5, v[0:1], off
-; GFX940-NEXT:    v_and_b32_e32 v3, 3, v4
-; GFX940-NEXT:    v_lshlrev_b32_e32 v3, 3, v3
-; GFX940-NEXT:    s_mov_b32 s0, 0xffff
-; GFX940-NEXT:    v_lshlrev_b32_e64 v4, v3, s0
-; GFX940-NEXT:    v_not_b32_e32 v4, v4
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
-; GFX940-NEXT:    s_movk_i32 s2, 0x7fff
-; GFX940-NEXT:  .LBB55_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v7, v5
-; GFX940-NEXT:    v_lshrrev_b32_sdwa v5, v3, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_add_f32_e32 v5, v5, v2
-; GFX940-NEXT:    v_bfe_u32 v6, v5, 16, 1
-; GFX940-NEXT:    v_or_b32_e32 v8, 0x400000, v5
-; GFX940-NEXT:    v_add3_u32 v6, v6, v5, s2
-; GFX940-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v5, v6, v8, vcc
-; GFX940-NEXT:    v_lshlrev_b32_sdwa v5, v3, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX940-NEXT:    v_and_or_b32 v6, v7, v4, v5
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    global_atomic_cmpswap v5, v[0:1], v[6:7], off sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v5, v7
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB55_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    v_lshrrev_b32_e32 v0, v3, v5
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: global_agent_atomic_fadd_ret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0x7fe
+; GFX942-NEXT:    v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1]
+; GFX942-NEXT:    v_and_b32_e32 v0, -4, v4
+; GFX942-NEXT:    v_mov_b32_e32 v1, v5
+; GFX942-NEXT:    global_load_dword v5, v[0:1], off
+; GFX942-NEXT:    v_and_b32_e32 v3, 3, v4
+; GFX942-NEXT:    v_lshlrev_b32_e32 v3, 3, v3
+; GFX942-NEXT:    s_mov_b32 s0, 0xffff
+; GFX942-NEXT:    v_lshlrev_b32_e64 v4, v3, s0
+; GFX942-NEXT:    v_not_b32_e32 v4, v4
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
+; GFX942-NEXT:    s_movk_i32 s2, 0x7fff
+; GFX942-NEXT:  .LBB55_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v7, v5
+; GFX942-NEXT:    v_lshrrev_b32_sdwa v5, v3, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_add_f32_e32 v5, v5, v2
+; GFX942-NEXT:    v_bfe_u32 v6, v5, 16, 1
+; GFX942-NEXT:    v_or_b32_e32 v8, 0x400000, v5
+; GFX942-NEXT:    v_add3_u32 v6, v6, v5, s2
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v5, v6, v8, vcc
+; GFX942-NEXT:    v_lshlrev_b32_sdwa v5, v3, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX942-NEXT:    v_and_or_b32 v6, v7, v4, v5
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    global_atomic_cmpswap v5, v[0:1], v[6:7], off sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v5, v7
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB55_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    v_lshrrev_b32_e32 v0, v3, v5
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: global_agent_atomic_fadd_ret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -12318,50 +12318,50 @@ define bfloat @global_agent_atomic_fadd_ret_bf16__offset12b_neg__amdgpu_no_fine_
 ; GFX12-NEXT:    v_lshrrev_b32_e32 v0, v3, v5
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: global_agent_atomic_fadd_ret_bf16__offset12b_neg__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    s_movk_i32 s0, 0xf800
-; GFX940-NEXT:    s_mov_b32 s1, -1
-; GFX940-NEXT:    v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1]
-; GFX940-NEXT:    v_and_b32_e32 v0, -4, v4
-; GFX940-NEXT:    v_mov_b32_e32 v1, v5
-; GFX940-NEXT:    global_load_dword v5, v[0:1], off
-; GFX940-NEXT:    v_and_b32_e32 v3, 3, v4
-; GFX940-NEXT:    v_lshlrev_b32_e32 v3, 3, v3
-; GFX940-NEXT:    s_mov_b32 s0, 0xffff
-; GFX940-NEXT:    v_lshlrev_b32_e64 v4, v3, s0
-; GFX940-NEXT:    v_not_b32_e32 v4, v4
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
-; GFX940-NEXT:    s_movk_i32 s2, 0x7fff
-; GFX940-NEXT:  .LBB56_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v7, v5
-; GFX940-NEXT:    v_lshrrev_b32_sdwa v5, v3, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_add_f32_e32 v5, v5, v2
-; GFX940-NEXT:    v_bfe_u32 v6, v5, 16, 1
-; GFX940-NEXT:    v_or_b32_e32 v8, 0x400000, v5
-; GFX940-NEXT:    v_add3_u32 v6, v6, v5, s2
-; GFX940-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v5, v6, v8, vcc
-; GFX940-NEXT:    v_lshlrev_b32_sdwa v5, v3, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX940-NEXT:    v_and_or_b32 v6, v7, v4, v5
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    global_atomic_cmpswap v5, v[0:1], v[6:7], off sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v5, v7
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB56_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    v_lshrrev_b32_e32 v0, v3, v5
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: global_agent_atomic_fadd_ret_bf16__offset12b_neg__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    s_movk_i32 s0, 0xf800
+; GFX942-NEXT:    s_mov_b32 s1, -1
+; GFX942-NEXT:    v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1]
+; GFX942-NEXT:    v_and_b32_e32 v0, -4, v4
+; GFX942-NEXT:    v_mov_b32_e32 v1, v5
+; GFX942-NEXT:    global_load_dword v5, v[0:1], off
+; GFX942-NEXT:    v_and_b32_e32 v3, 3, v4
+; GFX942-NEXT:    v_lshlrev_b32_e32 v3, 3, v3
+; GFX942-NEXT:    s_mov_b32 s0, 0xffff
+; GFX942-NEXT:    v_lshlrev_b32_e64 v4, v3, s0
+; GFX942-NEXT:    v_not_b32_e32 v4, v4
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
+; GFX942-NEXT:    s_movk_i32 s2, 0x7fff
+; GFX942-NEXT:  .LBB56_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v7, v5
+; GFX942-NEXT:    v_lshrrev_b32_sdwa v5, v3, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_add_f32_e32 v5, v5, v2
+; GFX942-NEXT:    v_bfe_u32 v6, v5, 16, 1
+; GFX942-NEXT:    v_or_b32_e32 v8, 0x400000, v5
+; GFX942-NEXT:    v_add3_u32 v6, v6, v5, s2
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v5, v6, v8, vcc
+; GFX942-NEXT:    v_lshlrev_b32_sdwa v5, v3, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX942-NEXT:    v_and_or_b32 v6, v7, v4, v5
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    global_atomic_cmpswap v5, v[0:1], v[6:7], off sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v5, v7
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB56_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    v_lshrrev_b32_e32 v0, v3, v5
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: global_agent_atomic_fadd_ret_bf16__offset12b_neg__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -12721,46 +12721,46 @@ define void @global_agent_atomic_fadd_noret_bf16__amdgpu_no_fine_grained_memory(
 ; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s0
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: global_agent_atomic_fadd_noret_bf16__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v3, v0
-; GFX940-NEXT:    v_and_b32_e32 v0, -4, v3
-; GFX940-NEXT:    global_load_dword v5, v[0:1], off
-; GFX940-NEXT:    v_and_b32_e32 v3, 3, v3
-; GFX940-NEXT:    v_lshlrev_b32_e32 v3, 3, v3
-; GFX940-NEXT:    s_mov_b32 s0, 0xffff
-; GFX940-NEXT:    v_lshlrev_b32_e64 v4, v3, s0
-; GFX940-NEXT:    v_not_b32_e32 v6, v4
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
-; GFX940-NEXT:    s_movk_i32 s2, 0x7fff
-; GFX940-NEXT:  .LBB57_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_lshrrev_b32_sdwa v4, v3, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_add_f32_e32 v4, v4, v2
-; GFX940-NEXT:    v_bfe_u32 v7, v4, 16, 1
-; GFX940-NEXT:    v_or_b32_e32 v8, 0x400000, v4
-; GFX940-NEXT:    v_add3_u32 v7, v7, v4, s2
-; GFX940-NEXT:    v_cmp_u_f32_e32 vcc, v4, v4
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v4, v7, v8, vcc
-; GFX940-NEXT:    v_lshlrev_b32_sdwa v4, v3, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX940-NEXT:    v_and_or_b32 v4, v5, v6, v4
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    global_atomic_cmpswap v4, v[0:1], v[4:5], off sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v4, v5
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v5, v4
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB57_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: global_agent_atomic_fadd_noret_bf16__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v3, v0
+; GFX942-NEXT:    v_and_b32_e32 v0, -4, v3
+; GFX942-NEXT:    global_load_dword v5, v[0:1], off
+; GFX942-NEXT:    v_and_b32_e32 v3, 3, v3
+; GFX942-NEXT:    v_lshlrev_b32_e32 v3, 3, v3
+; GFX942-NEXT:    s_mov_b32 s0, 0xffff
+; GFX942-NEXT:    v_lshlrev_b32_e64 v4, v3, s0
+; GFX942-NEXT:    v_not_b32_e32 v6, v4
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
+; GFX942-NEXT:    s_movk_i32 s2, 0x7fff
+; GFX942-NEXT:  .LBB57_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_lshrrev_b32_sdwa v4, v3, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_add_f32_e32 v4, v4, v2
+; GFX942-NEXT:    v_bfe_u32 v7, v4, 16, 1
+; GFX942-NEXT:    v_or_b32_e32 v8, 0x400000, v4
+; GFX942-NEXT:    v_add3_u32 v7, v7, v4, s2
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v4, v4
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v4, v7, v8, vcc
+; GFX942-NEXT:    v_lshlrev_b32_sdwa v4, v3, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX942-NEXT:    v_and_or_b32 v4, v5, v6, v4
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    global_atomic_cmpswap v4, v[0:1], v[4:5], off sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v4, v5
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v5, v4
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB57_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: global_agent_atomic_fadd_noret_bf16__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -13104,48 +13104,48 @@ define void @global_agent_atomic_fadd_noret_bf16__offset12b_pos__amdgpu_no_fine_
 ; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s0
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: global_agent_atomic_fadd_noret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0x7fe
-; GFX940-NEXT:    v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1]
-; GFX940-NEXT:    v_and_b32_e32 v0, -4, v4
-; GFX940-NEXT:    v_mov_b32_e32 v1, v5
-; GFX940-NEXT:    global_load_dword v3, v[0:1], off
-; GFX940-NEXT:    v_and_b32_e32 v4, 3, v4
-; GFX940-NEXT:    v_lshlrev_b32_e32 v4, 3, v4
-; GFX940-NEXT:    s_mov_b32 s0, 0xffff
-; GFX940-NEXT:    v_lshlrev_b32_e64 v5, v4, s0
-; GFX940-NEXT:    v_not_b32_e32 v5, v5
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    v_lshlrev_b32_e32 v6, 16, v2
-; GFX940-NEXT:    s_movk_i32 s2, 0x7fff
-; GFX940-NEXT:  .LBB58_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_lshrrev_b32_sdwa v2, v4, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_add_f32_e32 v2, v2, v6
-; GFX940-NEXT:    v_bfe_u32 v7, v2, 16, 1
-; GFX940-NEXT:    v_or_b32_e32 v8, 0x400000, v2
-; GFX940-NEXT:    v_add3_u32 v7, v7, v2, s2
-; GFX940-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v2, v7, v8, vcc
-; GFX940-NEXT:    v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX940-NEXT:    v_and_or_b32 v2, v3, v5, v2
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    global_atomic_cmpswap v2, v[0:1], v[2:3], off sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v3, v2
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB58_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: global_agent_atomic_fadd_noret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0x7fe
+; GFX942-NEXT:    v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1]
+; GFX942-NEXT:    v_and_b32_e32 v0, -4, v4
+; GFX942-NEXT:    v_mov_b32_e32 v1, v5
+; GFX942-NEXT:    global_load_dword v3, v[0:1], off
+; GFX942-NEXT:    v_and_b32_e32 v4, 3, v4
+; GFX942-NEXT:    v_lshlrev_b32_e32 v4, 3, v4
+; GFX942-NEXT:    s_mov_b32 s0, 0xffff
+; GFX942-NEXT:    v_lshlrev_b32_e64 v5, v4, s0
+; GFX942-NEXT:    v_not_b32_e32 v5, v5
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    v_lshlrev_b32_e32 v6, 16, v2
+; GFX942-NEXT:    s_movk_i32 s2, 0x7fff
+; GFX942-NEXT:  .LBB58_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_lshrrev_b32_sdwa v2, v4, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_add_f32_e32 v2, v2, v6
+; GFX942-NEXT:    v_bfe_u32 v7, v2, 16, 1
+; GFX942-NEXT:    v_or_b32_e32 v8, 0x400000, v2
+; GFX942-NEXT:    v_add3_u32 v7, v7, v2, s2
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v7, v8, vcc
+; GFX942-NEXT:    v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX942-NEXT:    v_and_or_b32 v2, v3, v5, v2
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    global_atomic_cmpswap v2, v[0:1], v[2:3], off sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v3, v2
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB58_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: global_agent_atomic_fadd_noret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -13498,49 +13498,49 @@ define void @global_agent_atomic_fadd_noret_bf16__offset12b_neg__amdgpu_no_fine_
 ; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s0
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: global_agent_atomic_fadd_noret_bf16__offset12b_neg__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    s_movk_i32 s0, 0xf800
-; GFX940-NEXT:    s_mov_b32 s1, -1
-; GFX940-NEXT:    v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1]
-; GFX940-NEXT:    v_and_b32_e32 v0, -4, v4
-; GFX940-NEXT:    v_mov_b32_e32 v1, v5
-; GFX940-NEXT:    global_load_dword v3, v[0:1], off
-; GFX940-NEXT:    v_and_b32_e32 v4, 3, v4
-; GFX940-NEXT:    v_lshlrev_b32_e32 v4, 3, v4
-; GFX940-NEXT:    s_mov_b32 s0, 0xffff
-; GFX940-NEXT:    v_lshlrev_b32_e64 v5, v4, s0
-; GFX940-NEXT:    v_not_b32_e32 v5, v5
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    v_lshlrev_b32_e32 v6, 16, v2
-; GFX940-NEXT:    s_movk_i32 s2, 0x7fff
-; GFX940-NEXT:  .LBB59_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_lshrrev_b32_sdwa v2, v4, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_add_f32_e32 v2, v2, v6
-; GFX940-NEXT:    v_bfe_u32 v7, v2, 16, 1
-; GFX940-NEXT:    v_or_b32_e32 v8, 0x400000, v2
-; GFX940-NEXT:    v_add3_u32 v7, v7, v2, s2
-; GFX940-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v2, v7, v8, vcc
-; GFX940-NEXT:    v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX940-NEXT:    v_and_or_b32 v2, v3, v5, v2
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    global_atomic_cmpswap v2, v[0:1], v[2:3], off sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v3, v2
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB59_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: global_agent_atomic_fadd_noret_bf16__offset12b_neg__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    s_movk_i32 s0, 0xf800
+; GFX942-NEXT:    s_mov_b32 s1, -1
+; GFX942-NEXT:    v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1]
+; GFX942-NEXT:    v_and_b32_e32 v0, -4, v4
+; GFX942-NEXT:    v_mov_b32_e32 v1, v5
+; GFX942-NEXT:    global_load_dword v3, v[0:1], off
+; GFX942-NEXT:    v_and_b32_e32 v4, 3, v4
+; GFX942-NEXT:    v_lshlrev_b32_e32 v4, 3, v4
+; GFX942-NEXT:    s_mov_b32 s0, 0xffff
+; GFX942-NEXT:    v_lshlrev_b32_e64 v5, v4, s0
+; GFX942-NEXT:    v_not_b32_e32 v5, v5
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    v_lshlrev_b32_e32 v6, 16, v2
+; GFX942-NEXT:    s_movk_i32 s2, 0x7fff
+; GFX942-NEXT:  .LBB59_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_lshrrev_b32_sdwa v2, v4, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_add_f32_e32 v2, v2, v6
+; GFX942-NEXT:    v_bfe_u32 v7, v2, 16, 1
+; GFX942-NEXT:    v_or_b32_e32 v8, 0x400000, v2
+; GFX942-NEXT:    v_add3_u32 v7, v7, v2, s2
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v7, v8, vcc
+; GFX942-NEXT:    v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX942-NEXT:    v_and_or_b32 v2, v3, v5, v2
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    global_atomic_cmpswap v2, v[0:1], v[2:3], off sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v3, v2
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB59_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: global_agent_atomic_fadd_noret_bf16__offset12b_neg__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -13882,40 +13882,40 @@ define bfloat @global_agent_atomic_fadd_ret_bf16__offset12b_pos__align4__amdgpu_
 ; GFX12-NEXT:    v_mov_b32_e32 v0, v3
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: global_agent_atomic_fadd_ret_bf16__offset12b_pos__align4__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    global_load_dword v3, v[0:1], off offset:2046
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
-; GFX940-NEXT:    s_movk_i32 s2, 0x7fff
-; GFX940-NEXT:    s_mov_b32 s3, 0xffff0000
-; GFX940-NEXT:  .LBB60_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v5, v3
-; GFX940-NEXT:    v_lshlrev_b32_e32 v3, 16, v5
-; GFX940-NEXT:    v_add_f32_e32 v3, v3, v2
-; GFX940-NEXT:    v_bfe_u32 v4, v3, 16, 1
-; GFX940-NEXT:    v_or_b32_e32 v6, 0x400000, v3
-; GFX940-NEXT:    v_add3_u32 v4, v4, v3, s2
-; GFX940-NEXT:    v_cmp_u_f32_e32 vcc, v3, v3
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v3, v4, v6, vcc
-; GFX940-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
-; GFX940-NEXT:    v_and_or_b32 v4, v5, s3, v3
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    global_atomic_cmpswap v3, v[0:1], v[4:5], off offset:2046 sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB60_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v0, v3
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: global_agent_atomic_fadd_ret_bf16__offset12b_pos__align4__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    global_load_dword v3, v[0:1], off offset:2046
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
+; GFX942-NEXT:    s_movk_i32 s2, 0x7fff
+; GFX942-NEXT:    s_mov_b32 s3, 0xffff0000
+; GFX942-NEXT:  .LBB60_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v5, v3
+; GFX942-NEXT:    v_lshlrev_b32_e32 v3, 16, v5
+; GFX942-NEXT:    v_add_f32_e32 v3, v3, v2
+; GFX942-NEXT:    v_bfe_u32 v4, v3, 16, 1
+; GFX942-NEXT:    v_or_b32_e32 v6, 0x400000, v3
+; GFX942-NEXT:    v_add3_u32 v4, v4, v3, s2
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v3, v3
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v3, v4, v6, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
+; GFX942-NEXT:    v_and_or_b32 v4, v5, s3, v3
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    global_atomic_cmpswap v3, v[0:1], v[4:5], off offset:2046 sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB60_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v0, v3
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: global_agent_atomic_fadd_ret_bf16__offset12b_pos__align4__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -14204,39 +14204,39 @@ define void @global_agent_atomic_fadd_noret_bf16__offset12b__align4_pos__amdgpu_
 ; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s0
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: global_agent_atomic_fadd_noret_bf16__offset12b__align4_pos__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    global_load_dword v3, v[0:1], off offset:2046
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    v_lshlrev_b32_e32 v4, 16, v2
-; GFX940-NEXT:    s_movk_i32 s2, 0x7fff
-; GFX940-NEXT:    s_mov_b32 s3, 0xffff0000
-; GFX940-NEXT:  .LBB61_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_lshlrev_b32_e32 v2, 16, v3
-; GFX940-NEXT:    v_add_f32_e32 v2, v2, v4
-; GFX940-NEXT:    v_bfe_u32 v5, v2, 16, 1
-; GFX940-NEXT:    v_or_b32_e32 v6, 0x400000, v2
-; GFX940-NEXT:    v_add3_u32 v5, v5, v2, s2
-; GFX940-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v2, v5, v6, vcc
-; GFX940-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
-; GFX940-NEXT:    v_and_or_b32 v2, v3, s3, v2
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:2046 sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v3, v2
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB61_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: global_agent_atomic_fadd_noret_bf16__offset12b__align4_pos__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    global_load_dword v3, v[0:1], off offset:2046
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    v_lshlrev_b32_e32 v4, 16, v2
+; GFX942-NEXT:    s_movk_i32 s2, 0x7fff
+; GFX942-NEXT:    s_mov_b32 s3, 0xffff0000
+; GFX942-NEXT:  .LBB61_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_lshlrev_b32_e32 v2, 16, v3
+; GFX942-NEXT:    v_add_f32_e32 v2, v2, v4
+; GFX942-NEXT:    v_bfe_u32 v5, v2, 16, 1
+; GFX942-NEXT:    v_or_b32_e32 v6, 0x400000, v2
+; GFX942-NEXT:    v_add3_u32 v5, v5, v2, s2
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v5, v6, vcc
+; GFX942-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
+; GFX942-NEXT:    v_and_or_b32 v2, v3, s3, v2
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:2046 sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v3, v2
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB61_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: global_agent_atomic_fadd_noret_bf16__offset12b__align4_pos__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -14534,49 +14534,49 @@ define bfloat @global_system_atomic_fadd_ret_bf16__offset12b_pos__amdgpu_no_fine
 ; GFX12-NEXT:    v_lshrrev_b32_e32 v0, v3, v5
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: global_system_atomic_fadd_ret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0x7fe
-; GFX940-NEXT:    v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1]
-; GFX940-NEXT:    v_and_b32_e32 v0, -4, v4
-; GFX940-NEXT:    v_mov_b32_e32 v1, v5
-; GFX940-NEXT:    global_load_dword v5, v[0:1], off
-; GFX940-NEXT:    v_and_b32_e32 v3, 3, v4
-; GFX940-NEXT:    v_lshlrev_b32_e32 v3, 3, v3
-; GFX940-NEXT:    s_mov_b32 s0, 0xffff
-; GFX940-NEXT:    v_lshlrev_b32_e64 v4, v3, s0
-; GFX940-NEXT:    v_not_b32_e32 v4, v4
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
-; GFX940-NEXT:    s_movk_i32 s2, 0x7fff
-; GFX940-NEXT:  .LBB62_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v7, v5
-; GFX940-NEXT:    v_lshrrev_b32_sdwa v5, v3, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_add_f32_e32 v5, v5, v2
-; GFX940-NEXT:    v_bfe_u32 v6, v5, 16, 1
-; GFX940-NEXT:    v_or_b32_e32 v8, 0x400000, v5
-; GFX940-NEXT:    v_add3_u32 v6, v6, v5, s2
-; GFX940-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v5, v6, v8, vcc
-; GFX940-NEXT:    v_lshlrev_b32_sdwa v5, v3, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX940-NEXT:    v_and_or_b32 v6, v7, v4, v5
-; GFX940-NEXT:    buffer_wbl2 sc0 sc1
-; GFX940-NEXT:    global_atomic_cmpswap v5, v[0:1], v[6:7], off sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc0 sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v5, v7
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB62_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    v_lshrrev_b32_e32 v0, v3, v5
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: global_system_atomic_fadd_ret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0x7fe
+; GFX942-NEXT:    v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1]
+; GFX942-NEXT:    v_and_b32_e32 v0, -4, v4
+; GFX942-NEXT:    v_mov_b32_e32 v1, v5
+; GFX942-NEXT:    global_load_dword v5, v[0:1], off
+; GFX942-NEXT:    v_and_b32_e32 v3, 3, v4
+; GFX942-NEXT:    v_lshlrev_b32_e32 v3, 3, v3
+; GFX942-NEXT:    s_mov_b32 s0, 0xffff
+; GFX942-NEXT:    v_lshlrev_b32_e64 v4, v3, s0
+; GFX942-NEXT:    v_not_b32_e32 v4, v4
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
+; GFX942-NEXT:    s_movk_i32 s2, 0x7fff
+; GFX942-NEXT:  .LBB62_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v7, v5
+; GFX942-NEXT:    v_lshrrev_b32_sdwa v5, v3, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_add_f32_e32 v5, v5, v2
+; GFX942-NEXT:    v_bfe_u32 v6, v5, 16, 1
+; GFX942-NEXT:    v_or_b32_e32 v8, 0x400000, v5
+; GFX942-NEXT:    v_add3_u32 v6, v6, v5, s2
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v5, v6, v8, vcc
+; GFX942-NEXT:    v_lshlrev_b32_sdwa v5, v3, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX942-NEXT:    v_and_or_b32 v6, v7, v4, v5
+; GFX942-NEXT:    buffer_wbl2 sc0 sc1
+; GFX942-NEXT:    global_atomic_cmpswap v5, v[0:1], v[6:7], off sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc0 sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v5, v7
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB62_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    v_lshrrev_b32_e32 v0, v3, v5
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: global_system_atomic_fadd_ret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -14942,48 +14942,48 @@ define void @global_system_atomic_fadd_noret_bf16__offset12b_pos__amdgpu_no_fine
 ; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s0
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: global_system_atomic_fadd_noret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0x7fe
-; GFX940-NEXT:    v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1]
-; GFX940-NEXT:    v_and_b32_e32 v0, -4, v4
-; GFX940-NEXT:    v_mov_b32_e32 v1, v5
-; GFX940-NEXT:    global_load_dword v3, v[0:1], off
-; GFX940-NEXT:    v_and_b32_e32 v4, 3, v4
-; GFX940-NEXT:    v_lshlrev_b32_e32 v4, 3, v4
-; GFX940-NEXT:    s_mov_b32 s0, 0xffff
-; GFX940-NEXT:    v_lshlrev_b32_e64 v5, v4, s0
-; GFX940-NEXT:    v_not_b32_e32 v5, v5
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    v_lshlrev_b32_e32 v6, 16, v2
-; GFX940-NEXT:    s_movk_i32 s2, 0x7fff
-; GFX940-NEXT:  .LBB63_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_lshrrev_b32_sdwa v2, v4, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX940-NEXT:    s_nop 0
-; GFX940-NEXT:    v_add_f32_e32 v2, v2, v6
-; GFX940-NEXT:    v_bfe_u32 v7, v2, 16, 1
-; GFX940-NEXT:    v_or_b32_e32 v8, 0x400000, v2
-; GFX940-NEXT:    v_add3_u32 v7, v7, v2, s2
-; GFX940-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
-; GFX940-NEXT:    s_nop 1
-; GFX940-NEXT:    v_cndmask_b32_e32 v2, v7, v8, vcc
-; GFX940-NEXT:    v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX940-NEXT:    v_and_or_b32 v2, v3, v5, v2
-; GFX940-NEXT:    buffer_wbl2 sc0 sc1
-; GFX940-NEXT:    global_atomic_cmpswap v2, v[0:1], v[2:3], off sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc0 sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v3, v2
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB63_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: global_system_atomic_fadd_noret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0x7fe
+; GFX942-NEXT:    v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1]
+; GFX942-NEXT:    v_and_b32_e32 v0, -4, v4
+; GFX942-NEXT:    v_mov_b32_e32 v1, v5
+; GFX942-NEXT:    global_load_dword v3, v[0:1], off
+; GFX942-NEXT:    v_and_b32_e32 v4, 3, v4
+; GFX942-NEXT:    v_lshlrev_b32_e32 v4, 3, v4
+; GFX942-NEXT:    s_mov_b32 s0, 0xffff
+; GFX942-NEXT:    v_lshlrev_b32_e64 v5, v4, s0
+; GFX942-NEXT:    v_not_b32_e32 v5, v5
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    v_lshlrev_b32_e32 v6, 16, v2
+; GFX942-NEXT:    s_movk_i32 s2, 0x7fff
+; GFX942-NEXT:  .LBB63_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_lshrrev_b32_sdwa v2, v4, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_add_f32_e32 v2, v2, v6
+; GFX942-NEXT:    v_bfe_u32 v7, v2, 16, 1
+; GFX942-NEXT:    v_or_b32_e32 v8, 0x400000, v2
+; GFX942-NEXT:    v_add3_u32 v7, v7, v2, s2
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX942-NEXT:    s_nop 1
+; GFX942-NEXT:    v_cndmask_b32_e32 v2, v7, v8, vcc
+; GFX942-NEXT:    v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX942-NEXT:    v_and_or_b32 v2, v3, v5, v2
+; GFX942-NEXT:    buffer_wbl2 sc0 sc1
+; GFX942-NEXT:    global_atomic_cmpswap v2, v[0:1], v[2:3], off sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc0 sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v3, v2
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB63_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: global_system_atomic_fadd_noret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -15301,14 +15301,14 @@ define <2 x half> @global_agent_atomic_fadd_ret_v2f16__amdgpu_no_fine_grained_me
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: global_agent_atomic_fadd_ret_v2f16__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    global_atomic_pk_add_f16 v0, v[0:1], v2, off sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: global_agent_atomic_fadd_ret_v2f16__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    global_atomic_pk_add_f16 v0, v[0:1], v2, off sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: global_agent_atomic_fadd_ret_v2f16__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -15533,14 +15533,14 @@ define <2 x half> @global_agent_atomic_fadd_ret_v2f16__offset12b_pos__amdgpu_no_
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: global_agent_atomic_fadd_ret_v2f16__offset12b_pos__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    global_atomic_pk_add_f16 v0, v[0:1], v2, off offset:2044 sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: global_agent_atomic_fadd_ret_v2f16__offset12b_pos__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    global_atomic_pk_add_f16 v0, v[0:1], v2, off offset:2044 sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: global_agent_atomic_fadd_ret_v2f16__offset12b_pos__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -15767,14 +15767,14 @@ define <2 x half> @global_agent_atomic_fadd_ret_v2f16__offset12b_neg__amdgpu_no_
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: global_agent_atomic_fadd_ret_v2f16__offset12b_neg__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    global_atomic_pk_add_f16 v0, v[0:1], v2, off offset:-2048 sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: global_agent_atomic_fadd_ret_v2f16__offset12b_neg__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    global_atomic_pk_add_f16 v0, v[0:1], v2, off offset:-2048 sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: global_agent_atomic_fadd_ret_v2f16__offset12b_neg__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -16005,14 +16005,14 @@ define void @global_agent_atomic_fadd_noret_v2f16__amdgpu_no_fine_grained_memory
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: global_agent_atomic_fadd_noret_v2f16__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    global_atomic_pk_add_f16 v[0:1], v2, off
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: global_agent_atomic_fadd_noret_v2f16__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    global_atomic_pk_add_f16 v[0:1], v2, off
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: global_agent_atomic_fadd_noret_v2f16__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -16215,14 +16215,14 @@ define void @global_agent_atomic_fadd_noret_v2f16__offset12b_pos__amdgpu_no_fine
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: global_agent_atomic_fadd_noret_v2f16__offset12b_pos__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    global_atomic_pk_add_f16 v[0:1], v2, off offset:2044
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: global_agent_atomic_fadd_noret_v2f16__offset12b_pos__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    global_atomic_pk_add_f16 v[0:1], v2, off offset:2044
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: global_agent_atomic_fadd_noret_v2f16__offset12b_pos__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -16428,14 +16428,14 @@ define void @global_agent_atomic_fadd_noret_v2f16__offset12b_neg__amdgpu_no_fine
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: global_agent_atomic_fadd_noret_v2f16__offset12b_neg__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    global_atomic_pk_add_f16 v[0:1], v2, off offset:-2048
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: global_agent_atomic_fadd_noret_v2f16__offset12b_neg__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    global_atomic_pk_add_f16 v[0:1], v2, off offset:-2048
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: global_agent_atomic_fadd_noret_v2f16__offset12b_neg__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -16650,14 +16650,14 @@ define <2 x half> @global_system_atomic_fadd_ret_v2f16__offset12b_pos__amdgpu_no
 ; GFX12-NEXT:    global_inv scope:SCOPE_SYS
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: global_system_atomic_fadd_ret_v2f16__offset12b_pos__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_wbl2 sc0 sc1
-; GFX940-NEXT:    global_atomic_pk_add_f16 v0, v[0:1], v2, off offset:2044 sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc0 sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: global_system_atomic_fadd_ret_v2f16__offset12b_pos__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_wbl2 sc0 sc1
+; GFX942-NEXT:    global_atomic_pk_add_f16 v0, v[0:1], v2, off offset:2044 sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc0 sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: global_system_atomic_fadd_ret_v2f16__offset12b_pos__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -16887,14 +16887,14 @@ define void @global_system_atomic_fadd_noret_v2f16__offset12b_pos__amdgpu_no_fin
 ; GFX12-NEXT:    global_inv scope:SCOPE_SYS
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: global_system_atomic_fadd_noret_v2f16__offset12b_pos__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_wbl2 sc0 sc1
-; GFX940-NEXT:    global_atomic_pk_add_f16 v[0:1], v2, off offset:2044 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc0 sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: global_system_atomic_fadd_noret_v2f16__offset12b_pos__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_wbl2 sc0 sc1
+; GFX942-NEXT:    global_atomic_pk_add_f16 v[0:1], v2, off offset:2044 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc0 sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: global_system_atomic_fadd_noret_v2f16__offset12b_pos__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -17102,14 +17102,14 @@ define <2 x half> @global_agent_atomic_fadd_ret_v2f16__amdgpu_no_remote_memory(p
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: global_agent_atomic_fadd_ret_v2f16__amdgpu_no_remote_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    global_atomic_pk_add_f16 v0, v[0:1], v2, off sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: global_agent_atomic_fadd_ret_v2f16__amdgpu_no_remote_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    global_atomic_pk_add_f16 v0, v[0:1], v2, off sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: global_agent_atomic_fadd_ret_v2f16__amdgpu_no_remote_memory:
 ; GFX11:       ; %bb.0:
@@ -17348,14 +17348,14 @@ define void @global_agent_atomic_fadd_noret_v2f16__amdgpu_no_remote_memory(ptr a
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: global_agent_atomic_fadd_noret_v2f16__amdgpu_no_remote_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    global_atomic_pk_add_f16 v[0:1], v2, off
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: global_agent_atomic_fadd_noret_v2f16__amdgpu_no_remote_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    global_atomic_pk_add_f16 v[0:1], v2, off
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: global_agent_atomic_fadd_noret_v2f16__amdgpu_no_remote_memory:
 ; GFX11:       ; %bb.0:
@@ -17584,14 +17584,14 @@ define <2 x half> @global_agent_atomic_fadd_ret_v2f16__amdgpu_no_fine_grained_me
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: global_agent_atomic_fadd_ret_v2f16__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    global_atomic_pk_add_f16 v0, v[0:1], v2, off sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: global_agent_atomic_fadd_ret_v2f16__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    global_atomic_pk_add_f16 v0, v[0:1], v2, off sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: global_agent_atomic_fadd_ret_v2f16__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory:
 ; GFX11:       ; %bb.0:
@@ -17816,14 +17816,14 @@ define void @global_agent_atomic_fadd_noret_v2f16__amdgpu_no_fine_grained_memory
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: global_agent_atomic_fadd_noret_v2f16__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    global_atomic_pk_add_f16 v[0:1], v2, off
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: global_agent_atomic_fadd_noret_v2f16__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    global_atomic_pk_add_f16 v[0:1], v2, off
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: global_agent_atomic_fadd_noret_v2f16__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory:
 ; GFX11:       ; %bb.0:
@@ -18026,14 +18026,14 @@ define <2 x half> @global_agent_atomic_fadd_ret_v2f16__maybe_remote(ptr addrspac
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: global_agent_atomic_fadd_ret_v2f16__maybe_remote:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    global_atomic_pk_add_f16 v0, v[0:1], v2, off sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: global_agent_atomic_fadd_ret_v2f16__maybe_remote:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    global_atomic_pk_add_f16 v0, v[0:1], v2, off sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: global_agent_atomic_fadd_ret_v2f16__maybe_remote:
 ; GFX11:       ; %bb.0:
@@ -18272,14 +18272,14 @@ define void @global_agent_atomic_fadd_noret_v2f16__maybe_remote(ptr addrspace(1)
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: global_agent_atomic_fadd_noret_v2f16__maybe_remote:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    global_atomic_pk_add_f16 v[0:1], v2, off
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: global_agent_atomic_fadd_noret_v2f16__maybe_remote:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    global_atomic_pk_add_f16 v[0:1], v2, off
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: global_agent_atomic_fadd_noret_v2f16__maybe_remote:
 ; GFX11:       ; %bb.0:
@@ -18512,14 +18512,14 @@ define <2 x bfloat> @global_agent_atomic_fadd_ret_v2bf16__amdgpu_no_fine_grained
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: global_agent_atomic_fadd_ret_v2bf16__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    global_atomic_pk_add_bf16 v0, v[0:1], v2, off sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: global_agent_atomic_fadd_ret_v2bf16__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    global_atomic_pk_add_bf16 v0, v[0:1], v2, off sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: global_agent_atomic_fadd_ret_v2bf16__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -18840,14 +18840,14 @@ define <2 x bfloat> @global_agent_atomic_fadd_ret_v2bf16__offset12b_pos__amdgpu_
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: global_agent_atomic_fadd_ret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    global_atomic_pk_add_bf16 v0, v[0:1], v2, off offset:2044 sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: global_agent_atomic_fadd_ret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    global_atomic_pk_add_bf16 v0, v[0:1], v2, off offset:2044 sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: global_agent_atomic_fadd_ret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -19170,14 +19170,14 @@ define <2 x bfloat> @global_agent_atomic_fadd_ret_v2bf16__offset12b_neg__amdgpu_
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: global_agent_atomic_fadd_ret_v2bf16__offset12b_neg__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    global_atomic_pk_add_bf16 v0, v[0:1], v2, off offset:-2048 sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: global_agent_atomic_fadd_ret_v2bf16__offset12b_neg__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    global_atomic_pk_add_bf16 v0, v[0:1], v2, off offset:-2048 sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: global_agent_atomic_fadd_ret_v2bf16__offset12b_neg__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -19504,14 +19504,14 @@ define void @global_agent_atomic_fadd_noret_v2bf16__amdgpu_no_fine_grained_memor
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: global_agent_atomic_fadd_noret_v2bf16__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    global_atomic_pk_add_bf16 v[0:1], v2, off
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: global_agent_atomic_fadd_noret_v2bf16__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    global_atomic_pk_add_bf16 v[0:1], v2, off
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: global_agent_atomic_fadd_noret_v2bf16__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -19822,14 +19822,14 @@ define void @global_agent_atomic_fadd_noret_v2bf16__offset12b_pos__amdgpu_no_fin
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: global_agent_atomic_fadd_noret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    global_atomic_pk_add_bf16 v[0:1], v2, off offset:2044
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: global_agent_atomic_fadd_noret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    global_atomic_pk_add_bf16 v[0:1], v2, off offset:2044
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: global_agent_atomic_fadd_noret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -20143,14 +20143,14 @@ define void @global_agent_atomic_fadd_noret_v2bf16__offset12b_neg__amdgpu_no_fin
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: global_agent_atomic_fadd_noret_v2bf16__offset12b_neg__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    global_atomic_pk_add_bf16 v[0:1], v2, off offset:-2048
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: global_agent_atomic_fadd_noret_v2bf16__offset12b_neg__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    global_atomic_pk_add_bf16 v[0:1], v2, off offset:-2048
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: global_agent_atomic_fadd_noret_v2bf16__offset12b_neg__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -20473,14 +20473,14 @@ define <2 x bfloat> @global_system_atomic_fadd_ret_v2bf16__offset12b_pos__amdgpu
 ; GFX12-NEXT:    global_inv scope:SCOPE_SYS
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: global_system_atomic_fadd_ret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_wbl2 sc0 sc1
-; GFX940-NEXT:    global_atomic_pk_add_bf16 v0, v[0:1], v2, off offset:2044 sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc0 sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: global_system_atomic_fadd_ret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_wbl2 sc0 sc1
+; GFX942-NEXT:    global_atomic_pk_add_bf16 v0, v[0:1], v2, off offset:2044 sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc0 sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: global_system_atomic_fadd_ret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -20806,14 +20806,14 @@ define void @global_system_atomic_fadd_noret_v2bf16__offset12b_pos__amdgpu_no_fi
 ; GFX12-NEXT:    global_inv scope:SCOPE_SYS
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: global_system_atomic_fadd_noret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_wbl2 sc0 sc1
-; GFX940-NEXT:    global_atomic_pk_add_bf16 v[0:1], v2, off offset:2044 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc0 sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: global_system_atomic_fadd_noret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_wbl2 sc0 sc1
+; GFX942-NEXT:    global_atomic_pk_add_bf16 v[0:1], v2, off offset:2044 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc0 sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: global_system_atomic_fadd_noret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -21129,14 +21129,14 @@ define <2 x bfloat> @global_agent_atomic_fadd_ret_v2bf16__amdgpu_no_remote_memor
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: global_agent_atomic_fadd_ret_v2bf16__amdgpu_no_remote_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    global_atomic_pk_add_bf16 v0, v[0:1], v2, off sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: global_agent_atomic_fadd_ret_v2bf16__amdgpu_no_remote_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    global_atomic_pk_add_bf16 v0, v[0:1], v2, off sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: global_agent_atomic_fadd_ret_v2bf16__amdgpu_no_remote_memory:
 ; GFX11:       ; %bb.0:
@@ -21457,14 +21457,14 @@ define void @global_agent_atomic_fadd_noret_v2bf16__amdgpu_no_remote_memory(ptr
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: global_agent_atomic_fadd_noret_v2bf16__amdgpu_no_remote_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    global_atomic_pk_add_bf16 v[0:1], v2, off
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: global_agent_atomic_fadd_noret_v2bf16__amdgpu_no_remote_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    global_atomic_pk_add_bf16 v[0:1], v2, off
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: global_agent_atomic_fadd_noret_v2bf16__amdgpu_no_remote_memory:
 ; GFX11:       ; %bb.0:
@@ -21775,14 +21775,14 @@ define <2 x bfloat> @global_agent_atomic_fadd_ret_v2bf16__amdgpu_no_fine_grained
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: global_agent_atomic_fadd_ret_v2bf16__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    global_atomic_pk_add_bf16 v0, v[0:1], v2, off sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: global_agent_atomic_fadd_ret_v2bf16__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    global_atomic_pk_add_bf16 v0, v[0:1], v2, off sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: global_agent_atomic_fadd_ret_v2bf16__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory:
 ; GFX11:       ; %bb.0:
@@ -22103,14 +22103,14 @@ define void @global_agent_atomic_fadd_noret_v2bf16__amdgpu_no_fine_grained_memor
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: global_agent_atomic_fadd_noret_v2bf16__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    global_atomic_pk_add_bf16 v[0:1], v2, off
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: global_agent_atomic_fadd_noret_v2bf16__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    global_atomic_pk_add_bf16 v[0:1], v2, off
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: global_agent_atomic_fadd_noret_v2bf16__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory:
 ; GFX11:       ; %bb.0:
@@ -22421,14 +22421,14 @@ define <2 x bfloat> @global_agent_atomic_fadd_ret_v2bf16__maybe_remote(ptr addrs
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: global_agent_atomic_fadd_ret_v2bf16__maybe_remote:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    global_atomic_pk_add_bf16 v0, v[0:1], v2, off sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: global_agent_atomic_fadd_ret_v2bf16__maybe_remote:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    global_atomic_pk_add_bf16 v0, v[0:1], v2, off sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: global_agent_atomic_fadd_ret_v2bf16__maybe_remote:
 ; GFX11:       ; %bb.0:
@@ -22749,14 +22749,14 @@ define void @global_agent_atomic_fadd_noret_v2bf16__maybe_remote(ptr addrspace(1
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: global_agent_atomic_fadd_noret_v2bf16__maybe_remote:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    global_atomic_pk_add_bf16 v[0:1], v2, off
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: global_agent_atomic_fadd_noret_v2bf16__maybe_remote:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    global_atomic_pk_add_bf16 v[0:1], v2, off
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: global_agent_atomic_fadd_noret_v2bf16__maybe_remote:
 ; GFX11:       ; %bb.0:
@@ -23079,25 +23079,25 @@ define amdgpu_kernel void @infer_as_before_atomic(ptr addrspace(4) %arg) #1 {
 ; GFX12-NEXT:  .LBB92_2:
 ; GFX12-NEXT:    s_endpgm
 ;
-; GFX940-LABEL: infer_as_before_atomic:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_mov_b64 s[0:1], exec
-; GFX940-NEXT:    v_mbcnt_lo_u32_b32 v0, s0, 0
-; GFX940-NEXT:    v_mbcnt_hi_u32_b32 v0, s1, v0
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
-; GFX940-NEXT:    s_and_saveexec_b64 s[2:3], vcc
-; GFX940-NEXT:    s_cbranch_execz .LBB92_2
-; GFX940-NEXT:  ; %bb.1:
-; GFX940-NEXT:    s_load_dwordx2 s[2:3], s[4:5], 0x0
-; GFX940-NEXT:    s_bcnt1_i32_b64 s0, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v0, 0
-; GFX940-NEXT:    v_cvt_f32_ubyte0_e32 v1, s0
-; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-NEXT:    s_load_dwordx2 s[2:3], s[2:3], 0x0
-; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-NEXT:    global_atomic_add_f32 v0, v1, s[2:3]
-; GFX940-NEXT:  .LBB92_2:
-; GFX940-NEXT:    s_endpgm
+; GFX942-LABEL: infer_as_before_atomic:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_mov_b64 s[0:1], exec
+; GFX942-NEXT:    v_mbcnt_lo_u32_b32 v0, s0, 0
+; GFX942-NEXT:    v_mbcnt_hi_u32_b32 v0, s1, v0
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
+; GFX942-NEXT:    s_and_saveexec_b64 s[2:3], vcc
+; GFX942-NEXT:    s_cbranch_execz .LBB92_2
+; GFX942-NEXT:  ; %bb.1:
+; GFX942-NEXT:    s_load_dwordx2 s[2:3], s[4:5], 0x0
+; GFX942-NEXT:    s_bcnt1_i32_b64 s0, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v0, 0
+; GFX942-NEXT:    v_cvt_f32_ubyte0_e32 v1, s0
+; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-NEXT:    s_load_dwordx2 s[2:3], s[2:3], 0x0
+; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942-NEXT:    global_atomic_add_f32 v0, v1, s[2:3]
+; GFX942-NEXT:  .LBB92_2:
+; GFX942-NEXT:    s_endpgm
 ;
 ; GFX11-LABEL: infer_as_before_atomic:
 ; GFX11:       ; %bb.0:
diff --git llvm/test/CodeGen/AMDGPU/global-atomicrmw-fmax.ll llvm/test/CodeGen/AMDGPU/global-atomicrmw-fmax.ll
index cd6ed1e6b98c..e8d73914ad30 100644
--- llvm/test/CodeGen/AMDGPU/global-atomicrmw-fmax.ll
+++ llvm/test/CodeGen/AMDGPU/global-atomicrmw-fmax.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
 ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 < %s | FileCheck -check-prefix=GFX12 %s
-; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx940 < %s | FileCheck -check-prefix=GFX940 %s
+; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx942 < %s | FileCheck -check-prefix=GFX942 %s
 ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 < %s | FileCheck -check-prefix=GFX11 %s
 ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s
 ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx90a < %s | FileCheck -check-prefix=GFX90A %s
@@ -27,30 +27,30 @@ define float @global_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_memory(pt
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: global_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    global_load_dword v3, v[0:1], off
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    v_max_f32_e32 v2, v2, v2
-; GFX940-NEXT:  .LBB0_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v5, v3
-; GFX940-NEXT:    v_max_f32_e32 v3, v5, v5
-; GFX940-NEXT:    v_max_f32_e32 v4, v3, v2
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    global_atomic_cmpswap v3, v[0:1], v[4:5], off sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB0_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v0, v3
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: global_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    global_load_dword v3, v[0:1], off
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    v_max_f32_e32 v2, v2, v2
+; GFX942-NEXT:  .LBB0_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v5, v3
+; GFX942-NEXT:    v_max_f32_e32 v3, v5, v5
+; GFX942-NEXT:    v_max_f32_e32 v4, v3, v2
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    global_atomic_cmpswap v3, v[0:1], v[4:5], off sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB0_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v0, v3
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: global_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -188,30 +188,30 @@ define float @global_agent_atomic_fmax_ret_f32__offset12b_pos__amdgpu_no_fine_gr
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: global_agent_atomic_fmax_ret_f32__offset12b_pos__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    global_load_dword v3, v[0:1], off offset:2044
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    v_max_f32_e32 v2, v2, v2
-; GFX940-NEXT:  .LBB1_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v5, v3
-; GFX940-NEXT:    v_max_f32_e32 v3, v5, v5
-; GFX940-NEXT:    v_max_f32_e32 v4, v3, v2
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    global_atomic_cmpswap v3, v[0:1], v[4:5], off offset:2044 sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB1_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v0, v3
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: global_agent_atomic_fmax_ret_f32__offset12b_pos__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    global_load_dword v3, v[0:1], off offset:2044
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    v_max_f32_e32 v2, v2, v2
+; GFX942-NEXT:  .LBB1_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v5, v3
+; GFX942-NEXT:    v_max_f32_e32 v3, v5, v5
+; GFX942-NEXT:    v_max_f32_e32 v4, v3, v2
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    global_atomic_cmpswap v3, v[0:1], v[4:5], off offset:2044 sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB1_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v0, v3
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: global_agent_atomic_fmax_ret_f32__offset12b_pos__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -351,30 +351,30 @@ define float @global_agent_atomic_fmax_ret_f32__offset12b_neg__amdgpu_no_fine_gr
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: global_agent_atomic_fmax_ret_f32__offset12b_neg__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    global_load_dword v3, v[0:1], off offset:-2048
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    v_max_f32_e32 v2, v2, v2
-; GFX940-NEXT:  .LBB2_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v5, v3
-; GFX940-NEXT:    v_max_f32_e32 v3, v5, v5
-; GFX940-NEXT:    v_max_f32_e32 v4, v3, v2
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    global_atomic_cmpswap v3, v[0:1], v[4:5], off offset:-2048 sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB2_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v0, v3
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: global_agent_atomic_fmax_ret_f32__offset12b_neg__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    global_load_dword v3, v[0:1], off offset:-2048
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    v_max_f32_e32 v2, v2, v2
+; GFX942-NEXT:  .LBB2_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v5, v3
+; GFX942-NEXT:    v_max_f32_e32 v3, v5, v5
+; GFX942-NEXT:    v_max_f32_e32 v4, v3, v2
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    global_atomic_cmpswap v3, v[0:1], v[4:5], off offset:-2048 sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB2_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v0, v3
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: global_agent_atomic_fmax_ret_f32__offset12b_neg__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -514,29 +514,29 @@ define void @global_agent_atomic_fmax_noret_f32__amdgpu_no_fine_grained_memory(p
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: global_agent_atomic_fmax_noret_f32__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    global_load_dword v3, v[0:1], off
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    v_max_f32_e32 v4, v2, v2
-; GFX940-NEXT:  .LBB3_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_max_f32_e32 v2, v3, v3
-; GFX940-NEXT:    v_max_f32_e32 v2, v2, v4
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    global_atomic_cmpswap v2, v[0:1], v[2:3], off sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v3, v2
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB3_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: global_agent_atomic_fmax_noret_f32__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    global_load_dword v3, v[0:1], off
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    v_max_f32_e32 v4, v2, v2
+; GFX942-NEXT:  .LBB3_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_max_f32_e32 v2, v3, v3
+; GFX942-NEXT:    v_max_f32_e32 v2, v2, v4
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    global_atomic_cmpswap v2, v[0:1], v[2:3], off sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v3, v2
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB3_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: global_agent_atomic_fmax_noret_f32__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -669,29 +669,29 @@ define void @global_agent_atomic_fmax_noret_f32__offset12b_pos__amdgpu_no_fine_g
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: global_agent_atomic_fmax_noret_f32__offset12b_pos__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    global_load_dword v3, v[0:1], off offset:2044
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    v_max_f32_e32 v4, v2, v2
-; GFX940-NEXT:  .LBB4_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_max_f32_e32 v2, v3, v3
-; GFX940-NEXT:    v_max_f32_e32 v2, v2, v4
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:2044 sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v3, v2
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB4_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: global_agent_atomic_fmax_noret_f32__offset12b_pos__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    global_load_dword v3, v[0:1], off offset:2044
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    v_max_f32_e32 v4, v2, v2
+; GFX942-NEXT:  .LBB4_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_max_f32_e32 v2, v3, v3
+; GFX942-NEXT:    v_max_f32_e32 v2, v2, v4
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:2044 sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v3, v2
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB4_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: global_agent_atomic_fmax_noret_f32__offset12b_pos__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -827,29 +827,29 @@ define void @global_agent_atomic_fmax_noret_f32__offset12b_neg__amdgpu_no_fine_g
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: global_agent_atomic_fmax_noret_f32__offset12b_neg__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    global_load_dword v3, v[0:1], off offset:-2048
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    v_max_f32_e32 v4, v2, v2
-; GFX940-NEXT:  .LBB5_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_max_f32_e32 v2, v3, v3
-; GFX940-NEXT:    v_max_f32_e32 v2, v2, v4
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:-2048 sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v3, v2
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB5_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: global_agent_atomic_fmax_noret_f32__offset12b_neg__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    global_load_dword v3, v[0:1], off offset:-2048
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    v_max_f32_e32 v4, v2, v2
+; GFX942-NEXT:  .LBB5_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_max_f32_e32 v2, v3, v3
+; GFX942-NEXT:    v_max_f32_e32 v2, v2, v4
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:-2048 sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v3, v2
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB5_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: global_agent_atomic_fmax_noret_f32__offset12b_neg__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -986,30 +986,30 @@ define float @global_system_atomic_fmax_ret_f32__offset12b_pos__amdgpu_no_fine_g
 ; GFX12-NEXT:    global_inv scope:SCOPE_SYS
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: global_system_atomic_fmax_ret_f32__offset12b_pos__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    global_load_dword v3, v[0:1], off offset:2044
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    v_max_f32_e32 v2, v2, v2
-; GFX940-NEXT:  .LBB6_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v5, v3
-; GFX940-NEXT:    v_max_f32_e32 v3, v5, v5
-; GFX940-NEXT:    v_max_f32_e32 v4, v3, v2
-; GFX940-NEXT:    buffer_wbl2 sc0 sc1
-; GFX940-NEXT:    global_atomic_cmpswap v3, v[0:1], v[4:5], off offset:2044 sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc0 sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB6_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v0, v3
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: global_system_atomic_fmax_ret_f32__offset12b_pos__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    global_load_dword v3, v[0:1], off offset:2044
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    v_max_f32_e32 v2, v2, v2
+; GFX942-NEXT:  .LBB6_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v5, v3
+; GFX942-NEXT:    v_max_f32_e32 v3, v5, v5
+; GFX942-NEXT:    v_max_f32_e32 v4, v3, v2
+; GFX942-NEXT:    buffer_wbl2 sc0 sc1
+; GFX942-NEXT:    global_atomic_cmpswap v3, v[0:1], v[4:5], off offset:2044 sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc0 sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB6_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v0, v3
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: global_system_atomic_fmax_ret_f32__offset12b_pos__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -1152,29 +1152,29 @@ define void @global_system_atomic_fmax_noret_f32__offset12b_pos__amdgpu_no_fine_
 ; GFX12-NEXT:    global_inv scope:SCOPE_SYS
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: global_system_atomic_fmax_noret_f32__offset12b_pos__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    global_load_dword v3, v[0:1], off offset:2044
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    v_max_f32_e32 v4, v2, v2
-; GFX940-NEXT:  .LBB7_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_max_f32_e32 v2, v3, v3
-; GFX940-NEXT:    v_max_f32_e32 v2, v2, v4
-; GFX940-NEXT:    buffer_wbl2 sc0 sc1
-; GFX940-NEXT:    global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:2044 sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc0 sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v3, v2
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB7_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: global_system_atomic_fmax_noret_f32__offset12b_pos__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    global_load_dword v3, v[0:1], off offset:2044
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    v_max_f32_e32 v4, v2, v2
+; GFX942-NEXT:  .LBB7_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_max_f32_e32 v2, v3, v3
+; GFX942-NEXT:    v_max_f32_e32 v2, v2, v4
+; GFX942-NEXT:    buffer_wbl2 sc0 sc1
+; GFX942-NEXT:    global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:2044 sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc0 sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v3, v2
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB7_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: global_system_atomic_fmax_noret_f32__offset12b_pos__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -1312,30 +1312,30 @@ define float @global_agent_atomic_fmax_ret_f32__amdgpu_no_remote_memory(ptr addr
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: global_agent_atomic_fmax_ret_f32__amdgpu_no_remote_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    global_load_dword v3, v[0:1], off
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    v_max_f32_e32 v2, v2, v2
-; GFX940-NEXT:  .LBB8_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v5, v3
-; GFX940-NEXT:    v_max_f32_e32 v3, v5, v5
-; GFX940-NEXT:    v_max_f32_e32 v4, v3, v2
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    global_atomic_cmpswap v3, v[0:1], v[4:5], off sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB8_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v0, v3
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: global_agent_atomic_fmax_ret_f32__amdgpu_no_remote_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    global_load_dword v3, v[0:1], off
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    v_max_f32_e32 v2, v2, v2
+; GFX942-NEXT:  .LBB8_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v5, v3
+; GFX942-NEXT:    v_max_f32_e32 v3, v5, v5
+; GFX942-NEXT:    v_max_f32_e32 v4, v3, v2
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    global_atomic_cmpswap v3, v[0:1], v[4:5], off sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB8_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v0, v3
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: global_agent_atomic_fmax_ret_f32__amdgpu_no_remote_memory:
 ; GFX11:       ; %bb.0:
@@ -1542,30 +1542,30 @@ define float @global_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_memory__a
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: global_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    global_load_dword v3, v[0:1], off
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    v_max_f32_e32 v2, v2, v2
-; GFX940-NEXT:  .LBB9_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v5, v3
-; GFX940-NEXT:    v_max_f32_e32 v3, v5, v5
-; GFX940-NEXT:    v_max_f32_e32 v4, v3, v2
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    global_atomic_cmpswap v3, v[0:1], v[4:5], off sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB9_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v0, v3
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: global_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    global_load_dword v3, v[0:1], off
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    v_max_f32_e32 v2, v2, v2
+; GFX942-NEXT:  .LBB9_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v5, v3
+; GFX942-NEXT:    v_max_f32_e32 v3, v5, v5
+; GFX942-NEXT:    v_max_f32_e32 v4, v3, v2
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    global_atomic_cmpswap v3, v[0:1], v[4:5], off sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB9_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v0, v3
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: global_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory:
 ; GFX11:       ; %bb.0:
@@ -1707,30 +1707,30 @@ define float @global_agent_atomic_fmax_ret_f32__ftz__amdgpu_no_fine_grained_memo
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: global_agent_atomic_fmax_ret_f32__ftz__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    global_load_dword v3, v[0:1], off
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    v_max_f32_e32 v2, v2, v2
-; GFX940-NEXT:  .LBB10_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v5, v3
-; GFX940-NEXT:    v_max_f32_e32 v3, v5, v5
-; GFX940-NEXT:    v_max_f32_e32 v4, v3, v2
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    global_atomic_cmpswap v3, v[0:1], v[4:5], off sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB10_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v0, v3
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: global_agent_atomic_fmax_ret_f32__ftz__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    global_load_dword v3, v[0:1], off
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    v_max_f32_e32 v2, v2, v2
+; GFX942-NEXT:  .LBB10_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v5, v3
+; GFX942-NEXT:    v_max_f32_e32 v3, v5, v5
+; GFX942-NEXT:    v_max_f32_e32 v4, v3, v2
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    global_atomic_cmpswap v3, v[0:1], v[4:5], off sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB10_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v0, v3
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: global_agent_atomic_fmax_ret_f32__ftz__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -1868,30 +1868,30 @@ define float @global_agent_atomic_fmax_ret_f32__offset12b_pos__ftz__amdgpu_no_fi
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: global_agent_atomic_fmax_ret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    global_load_dword v3, v[0:1], off offset:2044
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    v_max_f32_e32 v2, v2, v2
-; GFX940-NEXT:  .LBB11_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v5, v3
-; GFX940-NEXT:    v_max_f32_e32 v3, v5, v5
-; GFX940-NEXT:    v_max_f32_e32 v4, v3, v2
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    global_atomic_cmpswap v3, v[0:1], v[4:5], off offset:2044 sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB11_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v0, v3
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: global_agent_atomic_fmax_ret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    global_load_dword v3, v[0:1], off offset:2044
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    v_max_f32_e32 v2, v2, v2
+; GFX942-NEXT:  .LBB11_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v5, v3
+; GFX942-NEXT:    v_max_f32_e32 v3, v5, v5
+; GFX942-NEXT:    v_max_f32_e32 v4, v3, v2
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    global_atomic_cmpswap v3, v[0:1], v[4:5], off offset:2044 sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB11_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v0, v3
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: global_agent_atomic_fmax_ret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -2031,30 +2031,30 @@ define float @global_agent_atomic_fmax_ret_f32__offset12b_neg__ftz__amdgpu_no_fi
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: global_agent_atomic_fmax_ret_f32__offset12b_neg__ftz__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    global_load_dword v3, v[0:1], off offset:-2048
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    v_max_f32_e32 v2, v2, v2
-; GFX940-NEXT:  .LBB12_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v5, v3
-; GFX940-NEXT:    v_max_f32_e32 v3, v5, v5
-; GFX940-NEXT:    v_max_f32_e32 v4, v3, v2
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    global_atomic_cmpswap v3, v[0:1], v[4:5], off offset:-2048 sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB12_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v0, v3
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: global_agent_atomic_fmax_ret_f32__offset12b_neg__ftz__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    global_load_dword v3, v[0:1], off offset:-2048
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    v_max_f32_e32 v2, v2, v2
+; GFX942-NEXT:  .LBB12_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v5, v3
+; GFX942-NEXT:    v_max_f32_e32 v3, v5, v5
+; GFX942-NEXT:    v_max_f32_e32 v4, v3, v2
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    global_atomic_cmpswap v3, v[0:1], v[4:5], off offset:-2048 sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB12_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v0, v3
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: global_agent_atomic_fmax_ret_f32__offset12b_neg__ftz__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -2194,29 +2194,29 @@ define void @global_agent_atomic_fmax_noret_f32__ftz__amdgpu_no_fine_grained_mem
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: global_agent_atomic_fmax_noret_f32__ftz__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    global_load_dword v3, v[0:1], off
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    v_max_f32_e32 v4, v2, v2
-; GFX940-NEXT:  .LBB13_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_max_f32_e32 v2, v3, v3
-; GFX940-NEXT:    v_max_f32_e32 v2, v2, v4
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    global_atomic_cmpswap v2, v[0:1], v[2:3], off sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v3, v2
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB13_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: global_agent_atomic_fmax_noret_f32__ftz__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    global_load_dword v3, v[0:1], off
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    v_max_f32_e32 v4, v2, v2
+; GFX942-NEXT:  .LBB13_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_max_f32_e32 v2, v3, v3
+; GFX942-NEXT:    v_max_f32_e32 v2, v2, v4
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    global_atomic_cmpswap v2, v[0:1], v[2:3], off sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v3, v2
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB13_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: global_agent_atomic_fmax_noret_f32__ftz__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -2349,29 +2349,29 @@ define void @global_agent_atomic_fmax_noret_f32__offset12b_pos__ftz__amdgpu_no_f
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: global_agent_atomic_fmax_noret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    global_load_dword v3, v[0:1], off offset:2044
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    v_max_f32_e32 v4, v2, v2
-; GFX940-NEXT:  .LBB14_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_max_f32_e32 v2, v3, v3
-; GFX940-NEXT:    v_max_f32_e32 v2, v2, v4
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:2044 sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v3, v2
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB14_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: global_agent_atomic_fmax_noret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    global_load_dword v3, v[0:1], off offset:2044
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    v_max_f32_e32 v4, v2, v2
+; GFX942-NEXT:  .LBB14_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_max_f32_e32 v2, v3, v3
+; GFX942-NEXT:    v_max_f32_e32 v2, v2, v4
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:2044 sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v3, v2
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB14_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: global_agent_atomic_fmax_noret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -2507,29 +2507,29 @@ define void @global_agent_atomic_fmax_noret_f32__offset12b_neg__ftz__amdgpu_no_f
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: global_agent_atomic_fmax_noret_f32__offset12b_neg__ftz__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    global_load_dword v3, v[0:1], off offset:-2048
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    v_max_f32_e32 v4, v2, v2
-; GFX940-NEXT:  .LBB15_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_max_f32_e32 v2, v3, v3
-; GFX940-NEXT:    v_max_f32_e32 v2, v2, v4
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:-2048 sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v3, v2
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB15_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: global_agent_atomic_fmax_noret_f32__offset12b_neg__ftz__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    global_load_dword v3, v[0:1], off offset:-2048
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    v_max_f32_e32 v4, v2, v2
+; GFX942-NEXT:  .LBB15_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_max_f32_e32 v2, v3, v3
+; GFX942-NEXT:    v_max_f32_e32 v2, v2, v4
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:-2048 sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v3, v2
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB15_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: global_agent_atomic_fmax_noret_f32__offset12b_neg__ftz__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -2666,30 +2666,30 @@ define float @global_system_atomic_fmax_ret_f32__offset12b_pos__ftz__amdgpu_no_f
 ; GFX12-NEXT:    global_inv scope:SCOPE_SYS
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: global_system_atomic_fmax_ret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    global_load_dword v3, v[0:1], off offset:2044
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    v_max_f32_e32 v2, v2, v2
-; GFX940-NEXT:  .LBB16_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v5, v3
-; GFX940-NEXT:    v_max_f32_e32 v3, v5, v5
-; GFX940-NEXT:    v_max_f32_e32 v4, v3, v2
-; GFX940-NEXT:    buffer_wbl2 sc0 sc1
-; GFX940-NEXT:    global_atomic_cmpswap v3, v[0:1], v[4:5], off offset:2044 sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc0 sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB16_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v0, v3
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: global_system_atomic_fmax_ret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    global_load_dword v3, v[0:1], off offset:2044
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    v_max_f32_e32 v2, v2, v2
+; GFX942-NEXT:  .LBB16_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v5, v3
+; GFX942-NEXT:    v_max_f32_e32 v3, v5, v5
+; GFX942-NEXT:    v_max_f32_e32 v4, v3, v2
+; GFX942-NEXT:    buffer_wbl2 sc0 sc1
+; GFX942-NEXT:    global_atomic_cmpswap v3, v[0:1], v[4:5], off offset:2044 sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc0 sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB16_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v0, v3
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: global_system_atomic_fmax_ret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -2832,29 +2832,29 @@ define void @global_system_atomic_fmax_noret_f32__offset12b_pos__ftz__amdgpu_no_
 ; GFX12-NEXT:    global_inv scope:SCOPE_SYS
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: global_system_atomic_fmax_noret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    global_load_dword v3, v[0:1], off offset:2044
-; GFX940-NEXT:    s_mov_b64 s[0:1], 0
-; GFX940-NEXT:    v_max_f32_e32 v4, v2, v2
-; GFX940-NEXT:  .LBB17_1: ; %atomicrmw.start
-; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_max_f32_e32 v2, v3, v3
-; GFX940-NEXT:    v_max_f32_e32 v2, v2, v4
-; GFX940-NEXT:    buffer_wbl2 sc0 sc1
-; GFX940-NEXT:    global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:2044 sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc0 sc1
-; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
-; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
-; GFX940-NEXT:    v_mov_b32_e32 v3, v2
-; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execnz .LBB17_1
-; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: global_system_atomic_fmax_noret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    global_load_dword v3, v[0:1], off offset:2044
+; GFX942-NEXT:    s_mov_b64 s[0:1], 0
+; GFX942-NEXT:    v_max_f32_e32 v4, v2, v2
+; GFX942-NEXT:  .LBB17_1: ; %atomicrmw.start
+; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    v_max_f32_e32 v2, v3, v3
+; GFX942-NEXT:    v_max_f32_e32 v2, v2, v4
+; GFX942-NEXT:    buffer_wbl2 sc0 sc1
+; GFX942-NEXT:    global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:2044 sc0 sc1
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc0 sc1
+; GFX942-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX942-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX942-NEXT:    v_mov_b32_e32 v3, v2
+; GFX942-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_cbranch_execnz .LBB17_1
+; GFX942-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX942-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: global_system_atomic_fmax_noret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -3015,14 +3015,14 @@ define double @global_agent_atomic_fmax_ret_f64__amdgpu_no_fine_grained_memory(p
 ; GFX12-NEXT:    v_dual_mov_b32 v0, v4 :: v_dual_mov_b32 v1, v5
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: global_agent_atomic_fmax_ret_f64__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    global_atomic_max_f64 v[0:1], v[0:1], v[2:3], off sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: global_agent_atomic_fmax_ret_f64__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    global_atomic_max_f64 v[0:1], v[0:1], v[2:3], off sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: global_agent_atomic_fmax_ret_f64__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -3187,14 +3187,14 @@ define double @global_agent_atomic_fmax_ret_f64__offset12b_pos__amdgpu_no_fine_g
 ; GFX12-NEXT:    v_dual_mov_b32 v0, v4 :: v_dual_mov_b32 v1, v5
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: global_agent_atomic_fmax_ret_f64__offset12b_pos__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    global_atomic_max_f64 v[0:1], v[0:1], v[2:3], off offset:2040 sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: global_agent_atomic_fmax_ret_f64__offset12b_pos__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    global_atomic_max_f64 v[0:1], v[0:1], v[2:3], off offset:2040 sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    buffer_inv sc1
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: global_agent_atomic_fmax_ret_f64__offset12b_pos__amdgpu_no_fine_grained_memory:
 ; GFX11:       ; %bb.0:
@@ -3360,14 +3360,14 @@ define double @global_agent_atomic_fmax_ret_f64__offset12b_neg__amdgpu_no_fine_g
 ; GFX12-NEXT:    v_dual_mov_b32 v0, v4 :: v_dual_mov_b32 v1, v5
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX940-LABEL: global_agent_atomic_fmax_ret_f64__offset12b_neg__amdgpu_no_fine_grained_memory:
-; GFX940:       ; %bb.0:
-; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    buffer_wbl2 sc1
-; GFX940-NEXT:    global_atomic_max_f64 v[0:1], v[0:1], v[2:3], off offset:-2048 sc0
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    buffer_inv sc1
-; GFX940-NEXT:    s_setpc_b64 s[30:31]
+; GFX942-LABEL: global_agent_atomic_fmax_ret_f64__offset12b_neg__amdgpu_no_fine_grained_memory:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    buffer_wbl2 sc1
+; GFX942-NEXT:    global_atomic_max_f64 v[0:1], v[0:1], v[2:3], off offset:-2048 sc0
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
########## TRUNCATED ###########