Skip to content

Commit c4dbb06

Browse files
steven-johnsonderek-gerstmannDerek Gerstmann
authored
Backport #8259 to release/17.x (#8270)
* [vulkan] Fix Vulkan SIMT mappings for GPU loop vars. (#8259) * Fix Vulkan SIMT mappings for GPU loop vars. Previous refactoring accidentally used the fully qualified var name rather than the categorized vulkan intrinsic name. * Avoid formatting the GPU kernel to a string for Vulkan (since it's binary SPIR-V needs to remain intact). --------- Co-authored-by: Derek Gerstmann <dgerstmann@adobe.com> Co-authored-by: Steven Johnson <srj@google.com> * Update CodeGen_Vulkan_Dev.cpp * [vulkan] Add conform API methods to memory allocator to fix block allocations (#8130) * Add conform API methods to block and region allocator classes Override conform requests for Vulkan memory allocator Cleanup memory requirement constraints for Vulkan Add conform test cases to block_allocator runtime test. * Clang format/tidy pas * Fix unsigned int comparisons * Clang format pass * Fix other unsigned int comparisons * Fix mismatched template types for max() * Fix whitespace for clang format --------- Co-authored-by: Derek Gerstmann <dgerstmann@adobe.com> * Backport fixes for Vulkan in src/runtime/internal for allocations. --------- Co-authored-by: Derek Gerstmann <derek.gerstmann@gmail.com> Co-authored-by: Derek Gerstmann <dgerstmann@adobe.com>
1 parent 54a7f1d commit c4dbb06

File tree

8 files changed

+945
-296
lines changed

8 files changed

+945
-296
lines changed

src/CodeGen_C.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1159,8 +1159,9 @@ void CodeGen_C::compile(const Buffer<> &buffer) {
11591159
bool is_constant = buffer.dimensions() != 0;
11601160

11611161
// If it is an GPU source kernel, we would like to see the actual output, not the
1162-
// uint8 representation. We use a string literal for this.
1163-
if (ends_with(name, "gpu_source_kernels")) {
1162+
// uint8 representation. We use a string literal for this. Since the Vulkan backend
1163+
// actually generates a SPIR-V binary, keep it as raw data to avoid textual reformatting.
1164+
if (ends_with(name, "gpu_source_kernels") && !target.has_feature(Target::Vulkan)) {
11641165
stream << "static const char *" << name << "_string = R\"BUFCHARSOURCE(";
11651166
stream.write((char *)b.host, num_elems);
11661167
stream << ")BUFCHARSOURCE\";\n";

src/CodeGen_Vulkan_Dev.cpp

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2514,12 +2514,20 @@ void CodeGen_Vulkan_Dev::SPIRV_Emitter::declare_workgroup_size(SpvId kernel_func
25142514
namespace {
25152515

25162516
// Locate all the unique GPU variables used as SIMT intrinsics
2517+
// This pass is used to identify if LocalInvocationID and/or WorkgroupID
2518+
// need to be declared as variables for the entrypoint to the Kernel. Since
2519+
// these can only be declared once and their type is vec3, we don't
2520+
// care about the specific dims that are mapped to loop variables.
25172521
class FindIntrinsicsUsed : public IRVisitor {
25182522
using IRVisitor::visit;
25192523
void visit(const For *op) override {
25202524
if (CodeGen_GPU_Dev::is_gpu_var(op->name)) {
2525+
2526+
// map the block or thread id name to the SIMT intrinsic definition
25212527
auto intrinsic = simt_intrinsic(op->name);
2522-
intrinsics_used.insert(intrinsic.first);
2528+
2529+
// mark the name of the intrinsic being used (without the dimension)
2530+
intrinsics_used.insert(intrinsic.first); // name only!
25232531
}
25242532
op->body.accept(this);
25252533
}
@@ -2555,20 +2563,22 @@ void CodeGen_Vulkan_Dev::SPIRV_Emitter::declare_entry_point(const Stmt &s, SpvId
25552563
s.accept(&find_intrinsics);
25562564

25572565
SpvFactory::Variables entry_point_variables;
2558-
for (const std::string &intrinsic_name : find_intrinsics.intrinsics_used) {
2566+
for (const std::string &used_intrinsic : find_intrinsics.intrinsics_used) {
25592567

2560-
// The builtins are pointers to vec3
2568+
// The builtins are pointers to vec3 and can only be declared once per kernel entrypoint
25612569
SpvStorageClass storage_class = SpvStorageClassInput;
25622570
SpvId intrinsic_type_id = builder.declare_type(Type(Type::UInt, 32, 3));
25632571
SpvId intrinsic_ptr_type_id = builder.declare_pointer_type(intrinsic_type_id, storage_class);
2564-
const std::string intrinsic_var_name = std::string("k") + std::to_string(kernel_index) + std::string("_") + intrinsic_name;
2572+
const std::string intrinsic_var_name = std::string("k") + std::to_string(kernel_index) + std::string("_") + used_intrinsic;
25652573
SpvId intrinsic_var_id = builder.declare_global_variable(intrinsic_var_name, intrinsic_ptr_type_id, storage_class);
25662574
SpvId intrinsic_loaded_id = builder.reserve_id();
25672575
builder.append(SpvFactory::load(intrinsic_type_id, intrinsic_loaded_id, intrinsic_var_id));
25682576
symbol_table.push(intrinsic_var_name, {intrinsic_loaded_id, storage_class});
25692577

2570-
// Annotate that this is the specific builtin
2571-
SpvBuiltIn built_in_kind = map_simt_builtin(intrinsic_name);
2578+
// Map the used intrinsic name to the specific builtin
2579+
SpvBuiltIn built_in_kind = map_simt_builtin(used_intrinsic);
2580+
2581+
// Add an annotation that indicates this variable is bound to the requested intrinsic
25722582
SpvBuilder::Literals annotation_literals = {(uint32_t)built_in_kind};
25732583
builder.add_annotation(intrinsic_var_id, SpvDecorationBuiltIn, annotation_literals);
25742584

src/runtime/internal/block_allocator.h

Lines changed: 122 additions & 88 deletions
Large diffs are not rendered by default.

src/runtime/internal/memory_arena.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -271,7 +271,7 @@ void *MemoryArena::create_entry(void *user_context, Block *block, uint32_t index
271271
void *entry_ptr = lookup_entry(user_context, block, index);
272272
block->free_index = block->indices[index];
273273
block->status[index] = AllocationStatus::InUse;
274-
#if DEBUG_RUNTIME_INTERNAL
274+
#ifdef DEBUG_RUNTIME_INTERNAL
275275
memset(entry_ptr, 0, config.entry_size);
276276
#endif
277277
return entry_ptr;

src/runtime/internal/memory_resources.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,7 @@ ALWAYS_INLINE bool is_power_of_two_alignment(size_t x) {
127127
// -- Alignment must be power of two!
128128
ALWAYS_INLINE size_t aligned_offset(size_t offset, size_t alignment) {
129129
halide_abort_if_false(nullptr, is_power_of_two_alignment(alignment));
130-
return (offset + (alignment - 1)) & ~(alignment - 1);
130+
return (alignment == 0) ? (offset) : (offset + (alignment - 1)) & ~(alignment - 1);
131131
}
132132

133133
// Returns a suitable alignment such that requested alignment is a suitable
@@ -202,18 +202,22 @@ struct HalideSystemAllocatorFns {
202202

203203
typedef int (*AllocateBlockFn)(void *, MemoryBlock *);
204204
typedef int (*DeallocateBlockFn)(void *, MemoryBlock *);
205+
typedef int (*ConformBlockRequestFn)(void *, MemoryRequest *);
205206

206207
struct MemoryBlockAllocatorFns {
207208
AllocateBlockFn allocate = nullptr;
208209
DeallocateBlockFn deallocate = nullptr;
210+
ConformBlockRequestFn conform = nullptr;
209211
};
210212

211213
typedef int (*AllocateRegionFn)(void *, MemoryRegion *);
212214
typedef int (*DeallocateRegionFn)(void *, MemoryRegion *);
215+
typedef int (*ConformBlockRegionFn)(void *, MemoryRequest *);
213216

214217
struct MemoryRegionAllocatorFns {
215218
AllocateRegionFn allocate = nullptr;
216219
DeallocateRegionFn deallocate = nullptr;
220+
ConformBlockRegionFn conform = nullptr;
217221
};
218222

219223
// --

0 commit comments

Comments
 (0)