Skip to content

Commit 33016db

Browse files
[TritonIntelGPUPipeline] Remove supportRegularPtr option
Signed-off-by: Whitney Tsang <[email protected]>
1 parent 443b2de commit 33016db

File tree

8 files changed

+12
-23
lines changed

8 files changed

+12
-23
lines changed

test/TritonIntelGPU/loop-pipeline.mlir

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// RUN: triton-opt %s -split-input-file -tritonintelgpu-pipeline="num-stages=3 support-regular-ptr=true" | FileCheck %s
1+
// RUN: triton-opt %s -split-input-file -tritonintelgpu-pipeline="num-stages=3" | FileCheck %s
22

33
// CHECK: #[[$BLOCK_0:.+]] = #ttg.blocked<{sizePerThread = [1, 1], threadsPerWarp = [1, 16], warpsPerCTA = [2, 2], order = [1, 0]}>
44
// CHECK: #[[$BLOCK_1:.+]] = #ttg.blocked<{sizePerThread = [1, 1], threadsPerWarp = [1, 16], warpsPerCTA = [1, 4], order = [1, 0]}>

test/TritonIntelGPU/split-barrier.mlir

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
// RUN: triton-opt %s -split-input-file -tritonintelgpu-pipeline="num-stages=3 support-regular-ptr=true split-barriers-scope=workgroup" | FileCheck %s --check-prefixes=CHECK,WORKGROUP_SCOPE
2-
// RUN: triton-opt %s -split-input-file -tritonintelgpu-pipeline="num-stages=3 support-regular-ptr=true split-barriers-scope=subgroup" | FileCheck %s --check-prefixes=CHECK,SUBGROUP_SCOPE
1+
// RUN: triton-opt %s -split-input-file -tritonintelgpu-pipeline="num-stages=3 split-barriers-scope=workgroup" | FileCheck %s --check-prefixes=CHECK,WORKGROUP_SCOPE
2+
// RUN: triton-opt %s -split-input-file -tritonintelgpu-pipeline="num-stages=3 split-barriers-scope=subgroup" | FileCheck %s --check-prefixes=CHECK,SUBGROUP_SCOPE
33

44
// CHECK: #[[$BLOCK:.+]] = #ttg.blocked<{sizePerThread = [1, 4], threadsPerWarp = [1, 16], warpsPerCTA = [8, 4], order = [1, 0]}>
55
// CHECK: #[[$DPAS:.+]] = #triton_intel_gpu.dpas<{repeatCount = 8, systolicDepth = 8, executionSize = 16, opsPerChan = 2, threadsPerWarp = 16, warpsPerCTA = [4, 8], repCluster = [1, 1], A = [8, 16], B = [16, 16], C = [8, 16]}>

third_party/intel/backend/compiler.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -311,7 +311,7 @@ def make_ttgir(mod, metadata, opt, properties):
311311
intel.passes.ttgpuir.add_remove_layout_conversions(pm)
312312
intel.passes.ttgpuir.add_materialize_block_pointer(pm)
313313
intel.passes.ttgpuir.add_remove_layout_conversions(pm)
314-
intel.passes.ttgpuir.add_pipeline(pm, opt.num_stages, True, XPUBackend.get_split_barrier_scope(opt))
314+
intel.passes.ttgpuir.add_pipeline(pm, opt.num_stages, XPUBackend.get_split_barrier_scope(opt))
315315

316316
passes.ttgpuir.add_fuse_nested_loops(pm)
317317
passes.ttgpuir.add_optimize_thread_locality(pm)

third_party/intel/include/Dialect/TritonIntelGPU/Transforms/Passes.td

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -122,9 +122,6 @@ def TritonIntelGPUPipeline : Pass<"tritonintelgpu-pipeline", "mlir::ModuleOp"> {
122122
Option<"numStages", "num-stages",
123123
"int32_t", /*default*/"3",
124124
"number of pipeline stages">,
125-
Option<"supportRegularPtr", "support-regular-ptr",
126-
"bool", /*default*/"false",
127-
"Enable support for prefetching non-block pointers">,
128125
Option<"splitBarrierScope", "split-barriers-scope",
129126
"enum SplitBarrierScope", "SplitBarrierScope::None",
130127
"insert split barriers in a loop",

third_party/intel/lib/TritonIntelGPUTransforms/Pipeliner/MatmulLoopPipeline.cpp

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -117,8 +117,7 @@ static std::optional<LoadDotOperand> loadDotOperand(tt::LoadOp loadOp) {
117117

118118
/// Collect loads to pipeline. Return success if we can pipeline this loop.
119119
static void collectOpsToPipeline(scf::ForOp forOp,
120-
SmallVectorImpl<LoadDotOperand> &loadOps,
121-
bool supportRegularPtr) {
120+
SmallVectorImpl<LoadDotOperand> &loadOps) {
122121
assert(loadOps.empty() && "Expecting an empty list of load operations");
123122

124123
ModuleOp moduleOp = forOp->getParentOfType<ModuleOp>();
@@ -128,11 +127,6 @@ static void collectOpsToPipeline(scf::ForOp forOp,
128127
// operations in the loop body block.
129128
for (Operation &op : forOp) {
130129
if (auto loadOp = dyn_cast<tt::LoadOp>(&op)) {
131-
Value ptr = loadOp.getPtr();
132-
bool isBlockPtr = mlir::triton::isTensorPointerType(ptr.getType());
133-
if (!isBlockPtr && !supportRegularPtr)
134-
continue;
135-
136130
// Check if the memory is structed densely. If not, we do not prefetch it
137131
// to avoid polluting the cache.
138132
Attribute blockIOAttr =
@@ -303,12 +297,11 @@ createSchedule(scf::ForOp forOp, int numStages) {
303297
}
304298

305299
bool ttgi::preProcessLoopAndGetSchedule(scf::ForOp &forOp, int numStages,
306-
bool supportRegularPtr,
307300
mlir::scf::PipeliningOption &options) {
308301
// 1. First collect "interesting" operations with a stage where to schedule
309302
// them. This gives a coarse scheduling for the loop.
310303
SmallVector<LoadDotOperand> loads;
311-
collectOpsToPipeline(forOp, loads, supportRegularPtr);
304+
collectOpsToPipeline(forOp, loads);
312305
if (loads.empty()) {
313306
LDBG("No loads to pipeline");
314307
return false;

third_party/intel/lib/TritonIntelGPUTransforms/Pipeliner/Schedule.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@
66
namespace mlir::triton::gpu::intel {
77

88
bool preProcessLoopAndGetSchedule(scf::ForOp &forOp, int numStages,
9-
bool supportRegularPtr,
109
mlir::scf::PipeliningOption &options);
1110

1211
} // namespace mlir::triton::gpu::intel

third_party/intel/lib/TritonIntelGPUTransforms/Pipeliner/SoftwarePipeliner.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -39,14 +39,14 @@ static bool preCondition(scf::ForOp forOp) {
3939
}
4040

4141
static void
42-
pipelineLoop(scf::ForOp forOp, int numStages, bool supportRegularPtr,
42+
pipelineLoop(scf::ForOp forOp, int numStages,
4343
std::optional<spirv::Scope> barrierScope = std::nullopt) {
4444
mlir::scf::PipeliningOption options;
4545
if (!preCondition(forOp))
4646
return;
4747

48-
bool foundSchedule = ttgi::preProcessLoopAndGetSchedule(
49-
forOp, numStages, supportRegularPtr, options);
48+
bool foundSchedule =
49+
ttgi::preProcessLoopAndGetSchedule(forOp, numStages, options);
5050
if (!foundSchedule)
5151
return;
5252

@@ -108,7 +108,7 @@ struct IntelGPUPipelinePass
108108
getOperation()->walk([&](scf::ForOp forOp) { loops.push_back(forOp); });
109109

110110
for (scf::ForOp forOp : loops) {
111-
pipelineLoop(forOp, numStages, supportRegularPtr, barrierScope);
111+
pipelineLoop(forOp, numStages, barrierScope);
112112
}
113113
}
114114
};

third_party/intel/triton_xpu.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -88,8 +88,8 @@ void init_triton_intel_passes_ttgpuir(py::module &&m) {
8888
gpu::intel::createTritonIntelGPUAccelerateMatmul);
8989
ADD_PASS_WRAPPER_0("add_rewrite_stack_ptr",
9090
gpu::intel::createTritonIntelGPURewriteStackPtr);
91-
ADD_PASS_WRAPPER_OPT_3("add_pipeline",
92-
gpu::intel::createTritonIntelGPUPipeline, int, bool,
91+
ADD_PASS_WRAPPER_OPT_2("add_pipeline",
92+
gpu::intel::createTritonIntelGPUPipeline, int,
9393
enum gpu::intel::SplitBarrierScope);
9494
ADD_PASS_WRAPPER_0("add_remove_layout_conversions",
9595
gpu::intel::createTritonIntelGPURemoveLayoutConversions);

0 commit comments

Comments
 (0)