From e97be30d952c3b1d315208b8bd5e4082148dda0e Mon Sep 17 00:00:00 2001 From: Xiang Gao Date: Wed, 29 Jan 2025 14:44:16 -0800 Subject: [PATCH] cleanup --- csrc/codegen.cpp | 4 +- csrc/device_lower/pass/allocation.cpp | 4 +- csrc/kernel_ir.cpp | 2 - csrc/kernel_ir.h | 75 +++------------------------ runtime/tensor_memory.cu | 17 +----- 5 files changed, 11 insertions(+), 91 deletions(-) diff --git a/csrc/codegen.cpp b/csrc/codegen.cpp index fc0514708f1..51d1722c5d1 100644 --- a/csrc/codegen.cpp +++ b/csrc/codegen.cpp @@ -3193,9 +3193,7 @@ class CudaKernelGenerator : private kir::ConstIrVisitor { } case MemoryType::Tensor: { indent() << "TMemTensor " << genVariableName(tv) << "(" - << genInline(alloc->baseAddress()) << ", " - << genInline(alloc->laneOffset()) << ", " - << genInline(alloc->colOffset()) << ");\n"; + << genInline(alloc->address()) << ");\n"; break; } default: diff --git a/csrc/device_lower/pass/allocation.cpp b/csrc/device_lower/pass/allocation.cpp index 7c2924a3366..0cf9097a971 100644 --- a/csrc/device_lower/pass/allocation.cpp +++ b/csrc/device_lower/pass/allocation.cpp @@ -483,9 +483,7 @@ class AllocationInserter : public kir::ExprMutator { GpuLower::current()->tmemInfo().allocation_address; auto address_ti = IrBuilder::create( allocation_address, allocation_address->fusion()->zeroVal()); - alloc_expr->setBaseAddress(address_ti); - alloc_expr->setLaneOffset(allocation_address->fusion()->zeroVal()); - alloc_expr->setColOffset(allocation_address->fusion()->zeroVal()); + alloc_expr->setAddress(address_ti); } return alloc_expr; diff --git a/csrc/kernel_ir.cpp b/csrc/kernel_ir.cpp index c5b7964e7df..d21e83f32bd 100644 --- a/csrc/kernel_ir.cpp +++ b/csrc/kernel_ir.cpp @@ -187,8 +187,6 @@ Allocate::Allocate( addAttribute(alias); // Always initialize smem/tmem addresses to nullptr addAttribute(nullptr); - addAttribute(nullptr); - addAttribute(nullptr); for (auto s : shape) { addAttribute(s); diff --git a/csrc/kernel_ir.h b/csrc/kernel_ir.h index 3f7027982e3..5dce505b2b6 100644 --- a/csrc/kernel_ir.h +++ b/csrc/kernel_ir.h @@ -309,12 +309,9 @@ class Allocate final : public Expr { //! Size of each dimension std::vector shape() const { - constexpr int64_t num_attributes_before_shape = 8; std::vector result; - result.reserve(attributes().size() - num_attributes_before_shape); - for (auto i = attributes().begin() + num_attributes_before_shape; - i != attributes().end(); - ++i) { + result.reserve(attributes().size() - 6); + for (auto i = attributes().begin() + 6; i != attributes().end(); ++i) { result.emplace_back((*i)->as()); } return result; @@ -368,8 +365,9 @@ class Allocate final : public Expr { // aligned address in bytes. void setAddress(Val* addr) { NVF_CHECK( - memoryType() == MemoryType::Shared, - "Allocation address may only be set for shared memory allocations. Memory type is ", + memoryType() == MemoryType::Shared || + memoryType() == MemoryType::Tensor, + "Allocation address may only be set for shared/tensor memory allocations. Memory type is ", memoryType()); NVF_CHECK( address() == nullptr, @@ -378,76 +376,17 @@ class Allocate final : public Expr { attributes_[5] = addr; } - void setBaseAddress(Val* addr) { - NVF_CHECK( - memoryType() == MemoryType::Tensor, - "Allocation base address may only be set for tensor memory allocations. Memory type is ", - memoryType()); - NVF_CHECK( - baseAddress() == nullptr, - "Attempted to set base address twice for allocation ", - toString()); - attributes_[5] = addr; - } - - void setLaneOffset(Val* lane_offset) { - NVF_CHECK( - memoryType() == MemoryType::Tensor, - "Lane offset may only be set for tensor memory allocations. Memory type is ", - memoryType()); - NVF_CHECK( - laneOffset() == nullptr, - "Attempted to set lane offset twice for allocation ", - toString()); - attributes_[6] = lane_offset; - } - - void setColOffset(Val* col_offset) { - NVF_CHECK( - memoryType() == MemoryType::Tensor, - "Column offset may only be set for tensor memory allocations. Memory type is ", - memoryType()); - NVF_CHECK( - colOffset() == nullptr, - "Attempted to set column offset twice for allocation ", - toString()); - attributes_[7] = col_offset; - } - // This is an integer scalar describing the byte address within the dynamic // shared memory array for a shared memory allocation. For memory types other // than Shared, or before allocation, this function might return nullptr. Val* address() const { NVF_CHECK( - memoryType() == MemoryType::Shared, + memoryType() == MemoryType::Shared || + memoryType() == MemoryType::Tensor, "Allocation address may only be set for shared memory allocations. Memory type is ", memoryType()); return attributeVal(5); } - - Val* baseAddress() const { - NVF_CHECK( - memoryType() == MemoryType::Tensor, - "Base address may only be set for tensor memory allocations. Memory type is ", - memoryType()); - return attributeVal(5); - } - - Val* laneOffset() const { - NVF_CHECK( - memoryType() == MemoryType::Tensor, - "Lane offset may only be set for tensor memory allocations. Memory type is ", - memoryType()); - return attributeVal(6); - } - - Val* colOffset() const { - NVF_CHECK( - memoryType() == MemoryType::Tensor, - "Column offset may only be set for tensor memory allocations. Memory type is ", - memoryType()); - return attributeVal(7); - } }; // Allocate tensor memory tcgen05.alloc diff --git a/runtime/tensor_memory.cu b/runtime/tensor_memory.cu index 6e84244d12a..1c3fca0aae5 100644 --- a/runtime/tensor_memory.cu +++ b/runtime/tensor_memory.cu @@ -10,10 +10,8 @@ // manipulate tensor memory addresses. Example usage: // TMemTensor T0(0x12345678): // -> address (lane=0x1234, col=0x5678): -// TMemTensor T1(0x12345678, 32, 32): -// -> address (lane=0x1234+32, col=0x5678+32) -// TMemTensor T2 = T1 + {64, 64}: -// -> address (lane=T1.lane+64, col=T1.col+64) +// TMemTensor T1 = T0 + {64, 64}: +// -> address (lane=T0.lane+64, col=T0.col+64) struct TMemTensor { uint32_t raw_address; @@ -31,9 +29,6 @@ struct TMemTensor { TMemTensor(uint32_t raw_address) : raw_address(raw_address) {} - TMemTensor(uint32_t base_address, uint16_t lane_offset, uint16_t col_offset) - : raw_address(add(base_address, lane_offset, col_offset)) {} - operator uint32_t() const { return raw_address; } @@ -41,14 +36,6 @@ struct TMemTensor { uint32_t operator+(Array offset) const { return add(raw_address, offset[0], offset[1]); } - - uint16_t lane() const { - return raw_address >> 16; - } - - uint16_t col() const { - return raw_address & 0xFFFF; - } }; static_assert(