Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions perf/arraydiff.jl
Original file line number Diff line number Diff line change
Expand Up @@ -106,4 +106,22 @@ function neural(
)
end

function profile_gpu(; T = Float32, h = 4096, d = 13, n = 178)
state = _build(T, h, d, n, true)
x = CUDA.CuVector{T}(vec(state.W1))
g = CUDA.zeros(T, h * d)
fill!(state.evaluator.backend.last_x, NaN)
CUDA.@sync CUDA.@allowscalar MOI.eval_objective_gradient(
state.evaluator,
g,
x,
)
fill!(state.evaluator.backend.last_x, NaN)
return CUDA.@profile CUDA.@sync CUDA.@allowscalar MOI.eval_objective_gradient(
state.evaluator,
g,
x,
)
end

end # module
26 changes: 26 additions & 0 deletions perf/hand_cuda.jl
Original file line number Diff line number Diff line change
Expand Up @@ -111,4 +111,30 @@ function neural(
end
end

function profile_gpu(;
T = Float32,
h = 4096,
d = 13,
n = 178,
prealloc::Bool = true,
)
Random.seed!(0)
W1 = randn(T, h, d)
W2 = randn(T, OUT_DIM, h)
X = randn(T, d, n)
y = randn(T, OUT_DIM, n)
W1g, W2g, Xg, yg = CuArray(W1), CuArray(W2), CuArray(X), CuArray(y)
CUDA.synchronize()
CUDA.@sync if prealloc
gradient!(Buffers{(typeof(W1g))}(h, d, n), W1g, W2g, Xg, yg)
else
gradient_alloc(W1g, W2g, Xg, yg)
end
return CUDA.@profile CUDA.@sync if prealloc
gradient!(Buffers{(typeof(W1g))}(h, d, n), W1g, W2g, Xg, yg)
else
gradient_alloc(W1g, W2g, Xg, yg)
end
end

end # module
Loading