Skip to content

Commit 42d312f

Browse files
authored
Merge pull request #12 from trusted-programming/mem_alloc
feat: implement vadd in npu functions
2 parents 53fcad8 + da42644 commit 42d312f

28 files changed

+655
-343
lines changed

descend_derive/build.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ use std::path::Path;
44
fn main() {
55
// Tell Cargo to re-run this build script if any .desc files change
66
let examples_dir = "examples/core";
7-
7+
88
if Path::new(examples_dir).exists() {
99
// Walk through the directory and tell Cargo to re-run if any .desc files change
1010
if let Ok(entries) = fs::read_dir(examples_dir) {
@@ -17,7 +17,7 @@ fn main() {
1717
}
1818
}
1919
}
20-
20+
2121
// Also watch the entire directory for new files
2222
println!("cargo:rerun-if-changed={}", examples_dir);
2323
}

examples/core/assign.desc

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
fn assign<n: nat, r: prv>(
2+
a: &r shrd gpu.global [i16; 16],
3+
b: &r uniq gpu.global [i16; 16]
4+
) -[grid: gpu.grid<X<1>, X<16>>]-> () {
5+
b = a;
6+
()
7+
}
File renamed without changes.

examples/core/vdiv.desc

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
fn div<n: nat, r: prv>(
2+
a: &r shrd gpu.global [i16; 16],
3+
b: &r shrd gpu.global [i16; 16]
4+
) -[grid: gpu.grid<X<1>, X<16>>]-> () {
5+
a / b;
6+
()
7+
}

examples/core/vec_add.desc

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
// Vector addition kernel demonstrating Descend's safe GPU programming model
2+
// This function showcases extended borrow checking, memory safety, and execution context tracking
3+
4+
// Generic function with type parameters:
5+
// - n: nat - Natural number parameter (for array size, though not used in this specific function)
6+
// - r: prv - Provenance parameter tracking memory region/lifetime for all references
7+
fn add<n: nat, r: prv>(
8+
// Shared reference to first input vector - multiple threads can read simultaneously
9+
// Memory space: gpu.global (GPU global memory)
10+
// Ownership: shrd (shared) - prevents write-after-read data races
11+
// Type: 16-element array of 16-bit signed integers
12+
a: &r shrd gpu.global [i16; 16],
13+
14+
// Shared reference to second input vector - multiple threads can read simultaneously
15+
// Same memory space and ownership constraints as 'a'
16+
b: &r shrd gpu.global [i16; 16],
17+
18+
// Unique reference to output vector - only one thread can write at a time
19+
// Ownership: uniq (unique) - prevents write-after-write data races
20+
// The compiler statically ensures no conflicting borrows exist
21+
c: &r uniq gpu.global [i16; 16]
22+
23+
// Execution context specification - defines how this function runs on GPU hardware
24+
// - grid: gpu.grid<X<1>, X<16>> - GPU execution grid with 1 block containing 16 threads
25+
// - The type system ensures GPU memory is only accessed in GPU execution contexts
26+
// - Prevents invalid cross-device memory accesses (CPU accessing GPU memory)
27+
) -[grid: gpu.grid<X<1>, X<16>>]-> () {
28+
29+
// Vector addition operation - element-wise addition of arrays
30+
// The compiler generates safe parallel code that:
31+
// 1. Loads data from global memory to local memory for each thread
32+
// 2. Performs vectorized addition using HIVM dialect operations
33+
// 3. Stores results back to global memory safely
34+
// The ownership system ensures this operation is race-free
35+
//
36+
// LAZY LOADING: Descend's compiler implements lazy loading strategies:
37+
// - Memory loads are deferred until actually needed by computation
38+
// - The HIVM dialect generates 'hivm.hir.load' operations that load from
39+
// global memory (gm) to local memory (ub) only when data is accessed
40+
// - This minimizes memory bandwidth usage and improves cache efficiency
41+
// - The type system ensures loads happen in the correct execution context
42+
// - Shared references enable read-only access without unnecessary copies
43+
c = a + b;
44+
45+
// Unit return value - indicates successful completion
46+
// In MLIR, this becomes a 'return' operation
47+
()
48+
}

examples/core/vec_add.desc.off

Lines changed: 0 additions & 8 deletions
This file was deleted.

examples/core/vec_add_1.desc.off

Lines changed: 0 additions & 8 deletions
This file was deleted.
File renamed without changes.

examples/core/vmul.desc

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
fn mul<n: nat, r: prv>(
2+
a: &r shrd gpu.global [i16; 16],
3+
b: &r shrd gpu.global [i16; 16]
4+
) -[grid: gpu.grid<X<1>, X<16>>]-> () {
5+
a * b;
6+
()
7+
}
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
fn assign_to_shared_ref<n: nat, r: prv>(
2+
a: &r shrd gpu.global [i16; 16],
3+
b: &r shrd gpu.global [i16; 16]
4+
) -[grid: gpu.grid<X<1>, X<16>>]-> () {
5+
b = a; // This should fail - cannot assign to shared reference
6+
()
7+
}

0 commit comments

Comments
 (0)