Skip to content

AMDGPU: Fix atomic expand tests accidentally underaligning #147299

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Conversation

arsenm
Copy link
Contributor

@arsenm arsenm commented Jul 7, 2025

No description provided.

Copy link
Contributor Author

arsenm commented Jul 7, 2025

@arsenm arsenm marked this pull request as ready for review July 7, 2025 13:40
@llvmbot
Copy link
Member

llvmbot commented Jul 7, 2025

@llvm/pr-subscribers-llvm-transforms

@llvm/pr-subscribers-backend-amdgpu

Author: Matt Arsenault (arsenm)

Changes

Patch is 448.50 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/147299.diff

2 Files Affected:

  • (modified) llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-f64-agent.ll (+1942-750)
  • (modified) llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-f64-system.ll (+1586-715)
diff --git a/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-f64-agent.ll b/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-f64-agent.ll
index 1c2ae608711cc..668e7ba9b1f0f 100644
--- a/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-f64-agent.ll
+++ b/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-f64-agent.ll
@@ -787,419 +787,1863 @@ define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_no_fine_grained_memo
 }
 
 define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode(ptr addrspace(1) %ptr, double %value) {
-; COMMON-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode(
-; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
-; COMMON-NEXT:    [[TMP1:%.*]] = alloca double, align 8, addrspace(5)
-; COMMON-NEXT:    [[TMP2:%.*]] = alloca double, align 8, addrspace(5)
-; COMMON-NEXT:    [[TMP3:%.*]] = load double, ptr addrspace(1) [[PTR]], align 4
-; COMMON-NEXT:    br label [[ATOMICRMW_START:%.*]]
-; COMMON:       atomicrmw.start:
-; COMMON-NEXT:    [[LOADED:%.*]] = phi double [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
-; COMMON-NEXT:    [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
-; COMMON-NEXT:    [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; COMMON-NEXT:    call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP1]])
-; COMMON-NEXT:    store double [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
-; COMMON-NEXT:    call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
-; COMMON-NEXT:    store double [[NEW]], ptr addrspace(5) [[TMP2]], align 8
-; COMMON-NEXT:    [[TMP5:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP4]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
-; COMMON-NEXT:    call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
-; COMMON-NEXT:    [[TMP6:%.*]] = load double, ptr addrspace(5) [[TMP1]], align 8
-; COMMON-NEXT:    call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP1]])
-; COMMON-NEXT:    [[TMP7:%.*]] = insertvalue { double, i1 } poison, double [[TMP6]], 0
-; COMMON-NEXT:    [[TMP8:%.*]] = insertvalue { double, i1 } [[TMP7]], i1 [[TMP5]], 1
-; COMMON-NEXT:    [[SUCCESS:%.*]] = extractvalue { double, i1 } [[TMP8]], 1
-; COMMON-NEXT:    [[NEWLOADED]] = extractvalue { double, i1 } [[TMP8]], 0
-; COMMON-NEXT:    br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
-; COMMON:       atomicrmw.end:
-; COMMON-NEXT:    ret double [[NEWLOADED]]
+; GFX803-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode(
+; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX803-NEXT:    [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX803-NEXT:    br label [[ATOMICRMW_START:%.*]]
+; GFX803:       atomicrmw.start:
+; GFX803-NEXT:    [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
+; GFX803-NEXT:    [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
+; GFX803-NEXT:    [[TMP2:%.*]] = bitcast double [[NEW]] to i64
+; GFX803-NEXT:    [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
+; GFX803-NEXT:    [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8
+; GFX803-NEXT:    [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
+; GFX803-NEXT:    [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
+; GFX803-NEXT:    [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
+; GFX803-NEXT:    br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX803:       atomicrmw.end:
+; GFX803-NEXT:    ret double [[TMP5]]
+;
+; GFX906-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode(
+; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX906-NEXT:    [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX906-NEXT:    br label [[ATOMICRMW_START:%.*]]
+; GFX906:       atomicrmw.start:
+; GFX906-NEXT:    [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
+; GFX906-NEXT:    [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
+; GFX906-NEXT:    [[TMP2:%.*]] = bitcast double [[NEW]] to i64
+; GFX906-NEXT:    [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
+; GFX906-NEXT:    [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8
+; GFX906-NEXT:    [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
+; GFX906-NEXT:    [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
+; GFX906-NEXT:    [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
+; GFX906-NEXT:    br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX906:       atomicrmw.end:
+; GFX906-NEXT:    ret double [[TMP5]]
+;
+; GFX908-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode(
+; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX908-NEXT:    [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX908-NEXT:    br label [[ATOMICRMW_START:%.*]]
+; GFX908:       atomicrmw.start:
+; GFX908-NEXT:    [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
+; GFX908-NEXT:    [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
+; GFX908-NEXT:    [[TMP2:%.*]] = bitcast double [[NEW]] to i64
+; GFX908-NEXT:    [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
+; GFX908-NEXT:    [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8
+; GFX908-NEXT:    [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
+; GFX908-NEXT:    [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
+; GFX908-NEXT:    [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
+; GFX908-NEXT:    br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX908:       atomicrmw.end:
+; GFX908-NEXT:    ret double [[TMP5]]
+;
+; GFX90A-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode(
+; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX90A-NEXT:    [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX90A-NEXT:    br label [[ATOMICRMW_START:%.*]]
+; GFX90A:       atomicrmw.start:
+; GFX90A-NEXT:    [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
+; GFX90A-NEXT:    [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
+; GFX90A-NEXT:    [[TMP2:%.*]] = bitcast double [[NEW]] to i64
+; GFX90A-NEXT:    [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
+; GFX90A-NEXT:    [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8
+; GFX90A-NEXT:    [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
+; GFX90A-NEXT:    [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
+; GFX90A-NEXT:    [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
+; GFX90A-NEXT:    br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX90A:       atomicrmw.end:
+; GFX90A-NEXT:    ret double [[TMP5]]
+;
+; GFX942-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode(
+; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX942-NEXT:    [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.ignore.denormal.mode [[META0]]
+; GFX942-NEXT:    ret double [[RES]]
+;
+; GFX10-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode(
+; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX10-NEXT:    [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX10-NEXT:    br label [[ATOMICRMW_START:%.*]]
+; GFX10:       atomicrmw.start:
+; GFX10-NEXT:    [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
+; GFX10-NEXT:    [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
+; GFX10-NEXT:    [[TMP2:%.*]] = bitcast double [[NEW]] to i64
+; GFX10-NEXT:    [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
+; GFX10-NEXT:    [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8
+; GFX10-NEXT:    [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
+; GFX10-NEXT:    [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
+; GFX10-NEXT:    [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
+; GFX10-NEXT:    br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX10:       atomicrmw.end:
+; GFX10-NEXT:    ret double [[TMP5]]
+;
+; GFX11-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode(
+; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX11-NEXT:    [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX11-NEXT:    br label [[ATOMICRMW_START:%.*]]
+; GFX11:       atomicrmw.start:
+; GFX11-NEXT:    [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
+; GFX11-NEXT:    [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
+; GFX11-NEXT:    [[TMP2:%.*]] = bitcast double [[NEW]] to i64
+; GFX11-NEXT:    [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
+; GFX11-NEXT:    [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8
+; GFX11-NEXT:    [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
+; GFX11-NEXT:    [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
+; GFX11-NEXT:    [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
+; GFX11-NEXT:    br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX11:       atomicrmw.end:
+; GFX11-NEXT:    ret double [[TMP5]]
+;
+; GFX12-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode(
+; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX12-NEXT:    [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX12-NEXT:    br label [[ATOMICRMW_START:%.*]]
+; GFX12:       atomicrmw.start:
+; GFX12-NEXT:    [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
+; GFX12-NEXT:    [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
+; GFX12-NEXT:    [[TMP2:%.*]] = bitcast double [[NEW]] to i64
+; GFX12-NEXT:    [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
+; GFX12-NEXT:    [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8
+; GFX12-NEXT:    [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
+; GFX12-NEXT:    [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
+; GFX12-NEXT:    [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
+; GFX12-NEXT:    br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX12:       atomicrmw.end:
+; GFX12-NEXT:    ret double [[TMP5]]
 ;
-  %res = atomicrmw fadd ptr addrspace(1) %ptr, double %value syncscope("agent") seq_cst, align 4, !amdgpu.ignore.denormal.mode !0
+  %res = atomicrmw fadd ptr addrspace(1) %ptr, double %value syncscope("agent") seq_cst, align 8, !amdgpu.ignore.denormal.mode !0
   ret double %res
 }
 
 define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, double %value) {
-; COMMON-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory(
-; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
-; COMMON-NEXT:    [[TMP1:%.*]] = alloca double, align 8, addrspace(5)
-; COMMON-NEXT:    [[TMP2:%.*]] = alloca double, align 8, addrspace(5)
-; COMMON-NEXT:    [[TMP3:%.*]] = load double, ptr addrspace(1) [[PTR]], align 4
-; COMMON-NEXT:    br label [[ATOMICRMW_START:%.*]]
-; COMMON:       atomicrmw.start:
-; COMMON-NEXT:    [[LOADED:%.*]] = phi double [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
-; COMMON-NEXT:    [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
-; COMMON-NEXT:    [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; COMMON-NEXT:    call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP1]])
-; COMMON-NEXT:    store double [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
-; COMMON-NEXT:    call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
-; COMMON-NEXT:    store double [[NEW]], ptr addrspace(5) [[TMP2]], align 8
-; COMMON-NEXT:    [[TMP5:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP4]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
-; COMMON-NEXT:    call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
-; COMMON-NEXT:    [[TMP6:%.*]] = load double, ptr addrspace(5) [[TMP1]], align 8
-; COMMON-NEXT:    call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP1]])
-; COMMON-NEXT:    [[TMP7:%.*]] = insertvalue { double, i1 } poison, double [[TMP6]], 0
-; COMMON-NEXT:    [[TMP8:%.*]] = insertvalue { double, i1 } [[TMP7]], i1 [[TMP5]], 1
-; COMMON-NEXT:    [[SUCCESS:%.*]] = extractvalue { double, i1 } [[TMP8]], 1
-; COMMON-NEXT:    [[NEWLOADED]] = extractvalue { double, i1 } [[TMP8]], 0
-; COMMON-NEXT:    br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
-; COMMON:       atomicrmw.end:
-; COMMON-NEXT:    ret double [[NEWLOADED]]
+; GFX803-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory(
+; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX803-NEXT:    [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX803-NEXT:    br label [[ATOMICRMW_START:%.*]]
+; GFX803:       atomicrmw.start:
+; GFX803-NEXT:    [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
+; GFX803-NEXT:    [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
+; GFX803-NEXT:    [[TMP2:%.*]] = bitcast double [[NEW]] to i64
+; GFX803-NEXT:    [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
+; GFX803-NEXT:    [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]]
+; GFX803-NEXT:    [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
+; GFX803-NEXT:    [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
+; GFX803-NEXT:    [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
+; GFX803-NEXT:    br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX803:       atomicrmw.end:
+; GFX803-NEXT:    ret double [[TMP5]]
 ;
-  %res = atomicrmw fadd ptr addrspace(1) %ptr, double %value syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0, !amdgpu.ignore.denormal.mode !0
-  ret double %res
-}
-
-define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, double %value) {
-; COMMON-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory(
-; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
-; COMMON-NEXT:    [[TMP1:%.*]] = alloca double, align 8, addrspace(5)
-; COMMON-NEXT:    [[TMP2:%.*]] = alloca double, align 8, addrspace(5)
-; COMMON-NEXT:    [[TMP3:%.*]] = load double, ptr addrspace(1) [[PTR]], align 4
-; COMMON-NEXT:    br label [[ATOMICRMW_START:%.*]]
-; COMMON:       atomicrmw.start:
-; COMMON-NEXT:    [[LOADED:%.*]] = phi double [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
-; COMMON-NEXT:    [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
-; COMMON-NEXT:    [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; COMMON-NEXT:    call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP1]])
-; COMMON-NEXT:    store double [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
-; COMMON-NEXT:    call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
-; COMMON-NEXT:    store double [[NEW]], ptr addrspace(5) [[TMP2]], align 8
-; COMMON-NEXT:    [[TMP5:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP4]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
-; COMMON-NEXT:    call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
-; COMMON-NEXT:    [[TMP6:%.*]] = load double, ptr addrspace(5) [[TMP1]], align 8
-; COMMON-NEXT:    call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP1]])
-; COMMON-NEXT:    [[TMP7:%.*]] = insertvalue { double, i1 } poison, double [[TMP6]], 0
-; COMMON-NEXT:    [[TMP8:%.*]] = insertvalue { double, i1 } [[TMP7]], i1 [[TMP5]], 1
-; COMMON-NEXT:    [[SUCCESS:%.*]] = extractvalue { double, i1 } [[TMP8]], 1
-; COMMON-NEXT:    [[NEWLOADED]] = extractvalue { double, i1 } [[TMP8]], 0
-; COMMON-NEXT:    br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
-; COMMON:       atomicrmw.end:
-; COMMON-NEXT:    ret double [[NEWLOADED]]
+; GFX906-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory(
+; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
+; GFX906-NEXT:    [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
+; GFX906-NEXT:    br label [[ATOMICRMW_START:%.*]]
+; GFX906:       atomicrmw.start:
+; GFX906-NEXT:    [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
+; GFX906-NEXT:    [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
+; GFX906-NEXT:    [[TMP2:%.*]] = bitcast double [[NEW]] to i64
+; GFX906-NEXT:    [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
+; GFX906-NEXT:    [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]]
+; GFX906-NEXT:    [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
+; GFX906-NEXT:    [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
+; GFX906-NEXT:    [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
+; GFX906-NEXT:    br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; GFX906:       atomicrmw.end:
+; GFX906-NEXT:    ret double [[TMP5]]
 ;
-  %res = atomicrmw fadd ptr addrspace(1) %ptr, double %value syncscope("agent") seq_cst, align 4, !amdgpu.no.remote.memory !0, !amdgpu.ignore.denormal.mode !0
-  ret double %res
-}
-
-define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, double %value) {
-; COMMON-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
-; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
-; COMMON-NEXT:    [[TMP1:%.*]] = alloca double, align 8, addrspace(5)
-; COMMON-NEXT:    [[TMP2:%.*]] = alloca double, align 8, addrspace(5)
-; COMMON-NEXT:    [[TMP3:%.*]] = load double, ptr addrspace(1) [[PTR]], align 4
-; COMMON-NEXT:    br label [[ATOMICRMW_START:%.*]]
-; COMMON:       atomicrmw.start:
-; COMMON-NEXT:    [[LOADED:%.*]] = phi double [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
-; COMMON-NEXT:    [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
-; COMMON-NEXT:    [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; COMMON-NEXT:    call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP1]])
-; COMMON-NEXT:    store double [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
-; COMMON-NEXT:    call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
-; COMMON-NEXT:    store double [[NEW]], ptr addrspace(5) [[TMP2]], align 8
-; COMMON-NEXT:    [[TMP5:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP4]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
-; COMMON-NEXT:    call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
-; COMMON-NEXT:    [[TMP6:%.*]] = load double, ptr addrspace(5) [[TMP1]], align 8
-; COMMON-NEXT:    call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP1]])
-; COMMON-NEXT:    [[TMP7:%.*]] = insertvalue { double, i1 } ...
[truncated]

Copy link
Contributor Author

arsenm commented Jul 8, 2025

Merge activity

  • Jul 8, 2:57 PM UTC: A user started a stack merge that includes this pull request via Graphite.
  • Jul 8, 2:58 PM UTC: @arsenm merged this pull request with Graphite.

@arsenm arsenm merged commit 67076dd into main Jul 8, 2025
14 checks passed
@arsenm arsenm deleted the users/arsenm/amdgpu/fix-tests-accidentally-using-unaligned-atomics branch July 8, 2025 14:58
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

3 participants