1+ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+ ; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_90 -mattr=+ptx80| FileCheck --check-prefixes=CHECK-PTX64 %s
3+ ; RUN: %if ptxas-12.3 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_90 -mattr=+ptx80| %ptxas-verify -arch=sm_90 %}
4+
5+ target triple = "nvptx64-nvidia-cuda"
6+
7+ declare void @llvm.nvvm.prefetch.local.L1.evictnormal (ptr addrspace (5 ) %local_ptr )
8+ declare void @llvm.nvvm.prefetch.local.L2.evictnormal (ptr addrspace (5 ) %local_ptr )
9+
10+ declare void @llvm.nvvm.prefetch.global.L1.evictnormal (ptr addrspace (1 ) %global_ptr )
11+ declare void @llvm.nvvm.prefetch.global.L2.evictnormal (ptr addrspace (1 ) %global_ptr )
12+ declare void @llvm.nvvm.prefetch.global.L1.evictlast (ptr addrspace (1 ) %global_ptr )
13+ declare void @llvm.nvvm.prefetch.global.L2.evictlast (ptr addrspace (1 ) %global_ptr )
14+
15+ declare void @llvm.nvvm.prefetch.L1.evictnormal (ptr %ptr )
16+ declare void @llvm.nvvm.prefetch.L2.evictnormal (ptr %ptr )
17+
18+ declare void @llvm.nvvm.prefetchu.L1.evictnormal (ptr %ptr )
19+
20+ define void @prefetch_local (ptr addrspace (5 ) %local_ptr ) {
21+ ; CHECK-PTX64-LABEL: prefetch_local(
22+ ; CHECK-PTX64: {
23+ ; CHECK-PTX64-NEXT: .reg .b64 %rd<2>;
24+ ; CHECK-PTX64-EMPTY:
25+ ; CHECK-PTX64-NEXT: // %bb.0:
26+ ; CHECK-PTX64-NEXT: ld.param.u64 %rd1, [prefetch_local_param_0];
27+ ; CHECK-PTX64-NEXT: prefetch.local.L1.evictnormal [%rd1];
28+ ; CHECK-PTX64-NEXT: prefetch.local.L2.evictnormal [%rd1];
29+ ; CHECK-PTX64-NEXT: ret;
30+ tail call void @llvm.nvvm.prefetch.local.L1.evictnormal (ptr addrspace (5 ) %local_ptr )
31+ tail call void @llvm.nvvm.prefetch.local.L2.evictnormal (ptr addrspace (5 ) %local_ptr )
32+ ret void
33+ }
34+
35+ define void @prefetch_global (ptr addrspace (1 ) %global_ptr ) {
36+ ; CHECK-PTX64-LABEL: prefetch_global(
37+ ; CHECK-PTX64: {
38+ ; CHECK-PTX64-NEXT: .reg .b64 %rd<2>;
39+ ; CHECK-PTX64-EMPTY:
40+ ; CHECK-PTX64-NEXT: // %bb.0:
41+ ; CHECK-PTX64-NEXT: ld.param.u64 %rd1, [prefetch_global_param_0];
42+ ; CHECK-PTX64-NEXT: prefetch.global.L1.evictnormal [%rd1];
43+ ; CHECK-PTX64-NEXT: prefetch.global.L2.evictnormal [%rd1];
44+ ; CHECK-PTX64-NEXT: prefetch.global.L1.evictlast [%rd1];
45+ ; CHECK-PTX64-NEXT: prefetch.global.L2.evictlast [%rd1];
46+ ; CHECK-PTX64-NEXT: ret;
47+ tail call void @llvm.nvvm.prefetch.global.L1.evictnormal (ptr addrspace (1 ) %global_ptr )
48+ tail call void @llvm.nvvm.prefetch.global.L2.evictnormal (ptr addrspace (1 ) %global_ptr )
49+ tail call void @llvm.nvvm.prefetch.global.L1.evictlast (ptr addrspace (1 ) %global_ptr )
50+ tail call void @llvm.nvvm.prefetch.global.L2.evictlast (ptr addrspace (1 ) %global_ptr )
51+ ret void
52+ }
53+
54+
55+ define void @prefetch_ (ptr %ptr ) {
56+ ; CHECK-PTX64-LABEL: prefetch_(
57+ ; CHECK-PTX64: {
58+ ; CHECK-PTX64-NEXT: .reg .b64 %rd<2>;
59+ ; CHECK-PTX64-EMPTY:
60+ ; CHECK-PTX64-NEXT: // %bb.0:
61+ ; CHECK-PTX64-NEXT: ld.param.u64 %rd1, [prefetch__param_0];
62+ ; CHECK-PTX64-NEXT: prefetch.L1.evictnormal [%rd1];
63+ ; CHECK-PTX64-NEXT: prefetch.L2.evictnormal [%rd1];
64+ ; CHECK-PTX64-NEXT: ret;
65+ tail call void @llvm.nvvm.prefetch.L1.evictnormal (ptr %ptr )
66+ tail call void @llvm.nvvm.prefetch.L2.evictnormal (ptr %ptr )
67+ ret void
68+ }
69+
70+ define void @prefetchu_l1 (ptr %ptr ) {
71+ ; CHECK-PTX64-LABEL: prefetchu_l1(
72+ ; CHECK-PTX64: {
73+ ; CHECK-PTX64-NEXT: .reg .b64 %rd<2>;
74+ ; CHECK-PTX64-EMPTY:
75+ ; CHECK-PTX64-NEXT: // %bb.0:
76+ ; CHECK-PTX64-NEXT: ld.param.u64 %rd1, [prefetchu_l1_param_0];
77+ ; CHECK-PTX64-NEXT: prefetchu.L1.evictnormal [%rd1];
78+ ; CHECK-PTX64-NEXT: ret;
79+ tail call void @llvm.nvvm.prefetchu.L1.evictnormal (ptr %ptr )
80+ ret void
81+ }
0 commit comments