From e97be30d952c3b1d315208b8bd5e4082148dda0e Mon Sep 17 00:00:00 2001
From: Xiang Gao <qasdfgtyuiop@gmail.com>
Date: Wed, 29 Jan 2025 14:44:16 -0800
Subject: [PATCH] cleanup

---
 csrc/codegen.cpp                      |  4 +-
 csrc/device_lower/pass/allocation.cpp |  4 +-
 csrc/kernel_ir.cpp                    |  2 -
 csrc/kernel_ir.h                      | 75 +++------------------------
 runtime/tensor_memory.cu              | 17 +-----
 5 files changed, 11 insertions(+), 91 deletions(-)
diff --git a/csrc/codegen.cpp b/csrc/codegen.cpp
index fc0514708f1..51d1722c5d1 100644
--- a/csrc/codegen.cpp
+++ b/csrc/codegen.cpp
@@ -3193,9 +3193,7 @@ class CudaKernelGenerator : private kir::ConstIrVisitor {
         }
         case MemoryType::Tensor: {
           indent() << "TMemTensor " << genVariableName(tv) << "("
-                   << genInline(alloc->baseAddress()) << ", "
-                   << genInline(alloc->laneOffset()) << ", "
-                   << genInline(alloc->colOffset()) << ");\n";
+                   << genInline(alloc->address()) << ");\n";
           break;
         }
         default:
diff --git a/csrc/device_lower/pass/allocation.cpp b/csrc/device_lower/pass/allocation.cpp
index 7c2924a3366..0cf9097a971 100644
--- a/csrc/device_lower/pass/allocation.cpp
+++ b/csrc/device_lower/pass/allocation.cpp
@@ -483,9 +483,7 @@ class AllocationInserter : public kir::ExprMutator {
           GpuLower::current()->tmemInfo().allocation_address;
       auto address_ti = IrBuilder::create<kir::TensorIndex>(
           allocation_address, allocation_address->fusion()->zeroVal());
-      alloc_expr->setBaseAddress(address_ti);
-      alloc_expr->setLaneOffset(allocation_address->fusion()->zeroVal());
-      alloc_expr->setColOffset(allocation_address->fusion()->zeroVal());
+      alloc_expr->setAddress(address_ti);
     }
 
     return alloc_expr;
diff --git a/csrc/kernel_ir.cpp b/csrc/kernel_ir.cpp
index c5b7964e7df..d21e83f32bd 100644
--- a/csrc/kernel_ir.cpp
+++ b/csrc/kernel_ir.cpp
@@ -187,8 +187,6 @@ Allocate::Allocate(
   addAttribute(alias);
   // Always initialize smem/tmem addresses to nullptr
   addAttribute(nullptr);
-  addAttribute(nullptr);
-  addAttribute(nullptr);
 
   for (auto s : shape) {
     addAttribute(s);
diff --git a/csrc/kernel_ir.h b/csrc/kernel_ir.h
index 3f7027982e3..5dce505b2b6 100644
--- a/csrc/kernel_ir.h
+++ b/csrc/kernel_ir.h
@@ -309,12 +309,9 @@ class Allocate final : public Expr {
 
   //! Size of each dimension
   std::vector<Val*> shape() const {
-    constexpr int64_t num_attributes_before_shape = 8;
     std::vector<Val*> result;
-    result.reserve(attributes().size() - num_attributes_before_shape);
-    for (auto i = attributes().begin() + num_attributes_before_shape;
-         i != attributes().end();
-         ++i) {
+    result.reserve(attributes().size() - 6);
+    for (auto i = attributes().begin() + 6; i != attributes().end(); ++i) {
       result.emplace_back((*i)->as<Val>());
     }
     return result;
@@ -368,8 +365,9 @@ class Allocate final : public Expr {
   // aligned address in bytes.
   void setAddress(Val* addr) {
     NVF_CHECK(
-        memoryType() == MemoryType::Shared,
-        "Allocation address may only be set for shared memory allocations. Memory type is ",
+        memoryType() == MemoryType::Shared ||
+            memoryType() == MemoryType::Tensor,
+        "Allocation address may only be set for shared/tensor memory allocations. Memory type is ",
         memoryType());
     NVF_CHECK(
         address() == nullptr,
@@ -378,76 +376,17 @@ class Allocate final : public Expr {
     attributes_[5] = addr;
   }
 
-  void setBaseAddress(Val* addr) {
-    NVF_CHECK(
-        memoryType() == MemoryType::Tensor,
-        "Allocation base address may only be set for tensor memory allocations. Memory type is ",
-        memoryType());
-    NVF_CHECK(
-        baseAddress() == nullptr,
-        "Attempted to set base address twice for allocation ",
-        toString());
-    attributes_[5] = addr;
-  }
-
-  void setLaneOffset(Val* lane_offset) {
-    NVF_CHECK(
-        memoryType() == MemoryType::Tensor,
-        "Lane offset may only be set for tensor memory allocations. Memory type is ",
-        memoryType());
-    NVF_CHECK(
-        laneOffset() == nullptr,
-        "Attempted to set lane offset twice for allocation ",
-        toString());
-    attributes_[6] = lane_offset;
-  }
-
-  void setColOffset(Val* col_offset) {
-    NVF_CHECK(
-        memoryType() == MemoryType::Tensor,
-        "Column offset may only be set for tensor memory allocations. Memory type is ",
-        memoryType());
-    NVF_CHECK(
-        colOffset() == nullptr,
-        "Attempted to set column offset twice for allocation ",
-        toString());
-    attributes_[7] = col_offset;
-  }
-
   // This is an integer scalar describing the byte address within the dynamic
   // shared memory array for a shared memory allocation. For memory types other
   // than Shared, or before allocation, this function might return nullptr.
   Val* address() const {
     NVF_CHECK(
-        memoryType() == MemoryType::Shared,
+        memoryType() == MemoryType::Shared ||
+            memoryType() == MemoryType::Tensor,
         "Allocation address may only be set for shared memory allocations. Memory type is ",
         memoryType());
     return attributeVal(5);
   }
-
-  Val* baseAddress() const {
-    NVF_CHECK(
-        memoryType() == MemoryType::Tensor,
-        "Base address may only be set for tensor memory allocations. Memory type is ",
-        memoryType());
-    return attributeVal(5);
-  }
-
-  Val* laneOffset() const {
-    NVF_CHECK(
-        memoryType() == MemoryType::Tensor,
-        "Lane offset may only be set for tensor memory allocations. Memory type is ",
-        memoryType());
-    return attributeVal(6);
-  }
-
-  Val* colOffset() const {
-    NVF_CHECK(
-        memoryType() == MemoryType::Tensor,
-        "Column offset may only be set for tensor memory allocations. Memory type is ",
-        memoryType());
-    return attributeVal(7);
-  }
 };
 
 // Allocate tensor memory tcgen05.alloc
diff --git a/runtime/tensor_memory.cu b/runtime/tensor_memory.cu
index 6e84244d12a..1c3fca0aae5 100644
--- a/runtime/tensor_memory.cu
+++ b/runtime/tensor_memory.cu
@@ -10,10 +10,8 @@
 // manipulate tensor memory addresses. Example usage:
 //  TMemTensor T0(0x12345678):
 //    -> address (lane=0x1234, col=0x5678):
-//  TMemTensor T1(0x12345678, 32, 32):
-//    -> address (lane=0x1234+32, col=0x5678+32)
-//  TMemTensor T2 = T1 + {64, 64}:
-//    -> address (lane=T1.lane+64, col=T1.col+64)
+//  TMemTensor T1 = T0 + {64, 64}:
+//    -> address (lane=T0.lane+64, col=T0.col+64)
 struct TMemTensor {
   uint32_t raw_address;
 
@@ -31,9 +29,6 @@ struct TMemTensor {
 
   TMemTensor(uint32_t raw_address) : raw_address(raw_address) {}
 
-  TMemTensor(uint32_t base_address, uint16_t lane_offset, uint16_t col_offset)
-      : raw_address(add(base_address, lane_offset, col_offset)) {}
-
   operator uint32_t() const {
     return raw_address;
   }
@@ -41,14 +36,6 @@ struct TMemTensor {
   uint32_t operator+(Array<uint16_t, 2> offset) const {
     return add(raw_address, offset[0], offset[1]);
   }
-
-  uint16_t lane() const {
-    return raw_address >> 16;
-  }
-
-  uint16_t col() const {
-    return raw_address & 0xFFFF;
-  }
 };
 
 static_assert(