diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml index 1a4e416a8..2d69ec0e5 100644 --- a/.buildkite/pipeline.yml +++ b/.buildkite/pipeline.yml @@ -14,7 +14,6 @@ steps: dirs: - src - lib - - examples agents: queue: "juliaecosystem" os: "macos" @@ -84,7 +83,6 @@ steps: dirs: - src - lib - - examples env: MTL_DEBUG_LAYER: '1' MTL_SHADER_VALIDATION: '1' @@ -113,7 +111,6 @@ steps: dirs: - src - lib - - examples env: JULIA_LLVM_ARGS: '--opaque-pointers' agents: diff --git a/lib/mps/MPS.jl b/lib/mps/MPS.jl index 9097a3f77..dacc8817e 100644 --- a/lib/mps/MPS.jl +++ b/lib/mps/MPS.jl @@ -43,6 +43,7 @@ is_supported(dev::MTLDevice) = ccall(:MPSSupportsMTLDevice, Bool, (id{MTLDevice} include("libmps.jl") include("size.jl") +include("datatype.jl") # high-level wrappers include("command_buf.jl") diff --git a/lib/mps/datatype.jl b/lib/mps/datatype.jl new file mode 100644 index 000000000..df37928d9 --- /dev/null +++ b/lib/mps/datatype.jl @@ -0,0 +1,17 @@ +## Some extra definitions for MPSDataType defined in libmps.jl + +# Conversions for MPSDataTypes with Julia equivalents +const jl_mps_to_typ = Dict{MPSDataType, DataType}() +for type in [ + :Bool, :UInt8, :UInt16, :UInt32, :UInt64, :Int8, :Int16, :Int32, :Int64, + :Float16, :BFloat16, :Float32, (:ComplexF16, :MPSDataTypeComplexFloat16), + (:ComplexF32, :MPSDataTypeComplexFloat32), + ] + jltype, mpstype = if type isa Symbol + type, Symbol(:MPSDataType, type) + else + type + end + @eval Base.convert(::Type{MPSDataType}, ::Type{$jltype}) = $(mpstype) + @eval jl_mps_to_typ[$(mpstype)] = $jltype +end diff --git a/lib/mps/matrix.jl b/lib/mps/matrix.jl index f1b156cd2..5953377e5 100644 --- a/lib/mps/matrix.jl +++ b/lib/mps/matrix.jl @@ -1,28 +1,3 @@ -## Some extra definitions for MPSDataType defined in libmps.jl - -## bitwise operations lose type information, so allow conversions -Base.convert(::Type{MPSDataType}, x::Integer) = MPSDataType(x) - -# Conversions for MPSDataTypes with Julia equivalents -const jl_mps_to_typ = Dict{MPSDataType, DataType}() -for type in [ - :Bool, :UInt8, :UInt16, :UInt32, :UInt64, :Int8, :Int16, :Int32, :Int64, - :Float16, :BFloat16, :Float32, (:ComplexF16, :MPSDataTypeComplexFloat16), - (:ComplexF32, :MPSDataTypeComplexFloat32), - ] - jltype, mpstype = if type isa Symbol - type, Symbol(:MPSDataType, type) - else - type - end - @eval Base.convert(::Type{MPSDataType}, ::Type{$jltype}) = $(mpstype) - @eval jl_mps_to_typ[$(mpstype)] = $jltype -end -Base.sizeof(t::MPSDataType) = sizeof(jl_mps_to_typ[t]) - -Base.convert(::Type{DataType}, mpstyp::MPSDataType) = jl_mps_to_typ[mpstyp] - - ## descriptor export MPSMatrixDescriptor @@ -119,6 +94,13 @@ function MPSMatrix(arr::MtlArray{T,3}) where T return MPSMatrix(arr, desc, offset) end +function Base.size(mat::MPS.MPSMatrix) + if mat.matrices > 1 + return Int.((mat.matrices, mat.rows, mat.columns)) + else + return Int.((mat.rows, mat.columns)) + end +end ## matrix multiplication @@ -160,7 +142,7 @@ with any `MtlArray` and it should be accelerated using Metal Performance Shaders """ function matmul!(c::MtlArray{T1,N}, a::MtlArray{T2,N}, b::MtlArray{T3,N}, alpha::Number=true, beta::Number=true, - transpose_a=false, transpose_b=false) where {T1, T2, T3, N} + transpose_a=false, transpose_b=false) where {T1, T2, T3, N} # NOTE: MPS uses row major, while Julia is col-major. Instead of transposing # the inputs (by passing !transpose_[ab]) and afterwards transposing # the output, we use the property that (AB)ᵀ = BᵀAᵀ diff --git a/lib/mps/ndarray.jl b/lib/mps/ndarray.jl index 34d1a69f9..6fe2418a0 100644 --- a/lib/mps/ndarray.jl +++ b/lib/mps/ndarray.jl @@ -114,7 +114,7 @@ function MPSNDArray(arr::MtlArray{T,N}) where {T,N} arrsize = size(arr) @assert arrsize[1] * sizeof(T) % 16 == 0 "First dimension of input MtlArray must have a byte size divisible by 16" desc = MPSNDArrayDescriptor(T, arrsize) - return MPSNDArray(arr.data[], UInt(arr.offset), desc) + return MPSNDArray(arr.data[], UInt(arr.offset) * sizeof(T), desc) end function Metal.MtlArray(ndarr::MPSNDArray; storage = Metal.DefaultStorageMode, async = false) diff --git a/src/MetalKernels.jl b/src/MetalKernels.jl index cdde3d0f2..7fa17e77f 100644 --- a/src/MetalKernels.jl +++ b/src/MetalKernels.jl @@ -131,6 +131,7 @@ end ## indexing +## COV_EXCL_START @device_override @inline function KA.__index_Local_Linear(ctx) return thread_position_in_threadgroup_1d() end @@ -191,5 +192,6 @@ end @device_override @inline function KA.__print(args...) # TODO end +## COV_EXCL_STOP end diff --git a/src/accumulate.jl b/src/accumulate.jl index 1cf4c1b51..31e2dc4fe 100644 --- a/src/accumulate.jl +++ b/src/accumulate.jl @@ -1,3 +1,4 @@ +## COV_EXCL_START function partial_scan(op::Function, output::AbstractArray{T}, input::AbstractArray, Rdim, Rpre, Rpost, Rother, neutral, init, ::Val{maxthreads}, ::Val{inclusive}=Val(true)) where {T, maxthreads, inclusive} @@ -100,6 +101,7 @@ function aggregate_partial_scan(op::Function, output::AbstractArray, aggregates: return end +## COV_EXCL_STOP function scan!(f::Function, output::WrappedMtlArray{T}, input::WrappedMtlArray; dims::Integer, init=nothing, neutral=GPUArrays.neutral_element(f, T)) where {T} diff --git a/src/broadcast.jl b/src/broadcast.jl index 9ece5fc1b..05ea7c196 100644 --- a/src/broadcast.jl +++ b/src/broadcast.jl @@ -59,6 +59,7 @@ end _broadcast_shapes[Is] += 1 end if _broadcast_shapes[Is] > BROADCAST_SPECIALIZATION_THRESHOLD + ## COV_EXCL_START function broadcast_cartesian_static(dest, bc, Is) i = thread_position_in_grid_1d() stride = threads_per_grid_1d() @@ -69,6 +70,7 @@ end end return end + ## COV_EXCL_STOP Is = StaticCartesianIndices(Is) kernel = @metal launch=false broadcast_cartesian_static(dest, bc, Is) @@ -82,6 +84,7 @@ end # try to use the most appropriate hardware index to avoid integer division if ndims(dest) == 1 || (isa(IndexStyle(dest), IndexLinear) && isa(IndexStyle(bc), IndexLinear)) + ## COV_EXCL_START function broadcast_linear(dest, bc) i = thread_position_in_grid_1d() stride = threads_per_grid_1d() @@ -91,12 +94,14 @@ end end return end + ## COV_EXCL_STOP kernel = @metal launch=false broadcast_linear(dest, bc) elements = cld(length(dest), 4) threads = min(elements, kernel.pipeline.maxTotalThreadsPerThreadgroup) groups = cld(elements, threads) elseif ndims(dest) == 2 + ## COV_EXCL_START function broadcast_2d(dest, bc) is = Tuple(thread_position_in_grid_2d()) stride = threads_per_grid_2d() @@ -107,6 +112,7 @@ end end return end + ## COV_EXCL_STOP kernel = @metal launch=false broadcast_2d(dest, bc) w = min(size(dest, 1), kernel.pipeline.threadExecutionWidth) @@ -114,6 +120,7 @@ end threads = (w, h) groups = cld.(size(dest), threads) elseif ndims(dest) == 3 + ## COV_EXCL_START function broadcast_3d(dest, bc) is = Tuple(thread_position_in_grid_3d()) stride = threads_per_grid_3d() @@ -126,6 +133,7 @@ end end return end + ## COV_EXCL_STOP kernel = @metal launch=false broadcast_3d(dest, bc) w = min(size(dest, 1), kernel.pipeline.threadExecutionWidth) @@ -135,6 +143,7 @@ end threads = (w, h, d) groups = cld.(size(dest), threads) else + ## COV_EXCL_START function broadcast_cartesian(dest, bc) i = thread_position_in_grid_1d() stride = threads_per_grid_1d() @@ -145,6 +154,7 @@ end end return end + ## COV_EXCL_STOP kernel = @metal launch=false broadcast_cartesian(dest, bc) elements = cld(length(dest), 4) diff --git a/src/utilities.jl b/src/utilities.jl index a5285a337..a6ee9c042 100644 --- a/src/utilities.jl +++ b/src/utilities.jl @@ -146,7 +146,7 @@ end ## profile macro - +## COV_EXCL_START function profile_dir() root = pwd() i = 1 @@ -239,3 +239,5 @@ macro profile(ex...) end end end +## COV_EXCL_START + diff --git a/test/mps/matrix.jl b/test/mps/matrix.jl index 75e4c2fc4..da61417c6 100644 --- a/test/mps/matrix.jl +++ b/test/mps/matrix.jl @@ -45,77 +45,83 @@ using .MPS: MPSMatrix rowBytes = sizeof(T) * cols mats = 4 - desc = MPSMatrixDescriptor(rows, cols, rowBytes, T) - devmat = MPSMatrix(dev, desc) - @test devmat isa MPSMatrix - @test devmat.device == dev - @test devmat.rows == rows - @test devmat.columns == cols - @test devmat.rowBytes == rowBytes - @test devmat.matrices == 1 - @test devmat.dataType == DT - @test devmat.matrixBytes == rowBytes * rows - @test devmat.offset == 0 - - mat = MtlMatrix{T}(undef, rows, cols) - acols, arows = size(mat) - arowBytes = sizeof(T) * acols - abufmat = MPSMatrix(mat) - @test abufmat isa MPSMatrix - @test abufmat.device == dev - @test abufmat.rows == arows - @test abufmat.columns == acols - @test abufmat.rowBytes == arowBytes - @test abufmat.matrices == 1 - @test abufmat.dataType == DT - @test abufmat.matrixBytes == arowBytes * arows - @test abufmat.offset == 0 - @test abufmat.data == mat.data[] - - vmat = @view mat[:, 2:3] - vcols, vrows = size(vmat) - vrowBytes = sizeof(T) * vcols - vbufmat = MPSMatrix(vmat) - @test vbufmat isa MPSMatrix - @test vbufmat.device == dev - @test vbufmat.rows == vrows - @test vbufmat.columns == vcols - @test vbufmat.rowBytes == vrowBytes - @test vbufmat.matrices == 1 - @test vbufmat.dataType == DT - @test vbufmat.matrixBytes == vrowBytes * vrows - @test vbufmat.offset == vmat.offset * sizeof(T) - @test vbufmat.data == vmat.data[] - - arr = MtlArray{T,3}(undef, rows, cols, mats) - mcols, mrows, mmats = size(arr) - mrowBytes = sizeof(T) * mcols - mpsmat = MPSMatrix(mat) - @test mpsmat isa MPSMatrix - @test mpsmat.device == dev - @test mpsmat.rows == mrows - @test mpsmat.columns == mcols - @test mpsmat.rowBytes == mrowBytes - @test mpsmat.matrices == 1 - @test mpsmat.dataType == DT - @test mpsmat.matrixBytes == mrowBytes * mrows - @test mpsmat.offset == 0 - @test mpsmat.data == mat.data[] - - vec = MtlVector{T}(undef, rows) - veccols, vecrows = length(vec), 1 - vecrowBytes = sizeof(T)*veccols - vmpsmat = MPSMatrix(vec) - @test vmpsmat isa MPSMatrix - @test vmpsmat.device == dev - @test vmpsmat.rows == vecrows - @test vmpsmat.columns == veccols - @test vmpsmat.rowBytes == vecrowBytes - @test vmpsmat.matrices == 1 - @test vmpsmat.dataType == DT - @test vmpsmat.matrixBytes == vecrowBytes*vecrows - @test vmpsmat.offset == 0 - @test vmpsmat.data == vec.data[] + let desc = MPSMatrixDescriptor(rows, cols, rowBytes, T) + devmat = MPSMatrix(dev, desc) + @test devmat isa MPSMatrix + @test devmat.device == dev + @test devmat.rows == rows + @test devmat.columns == cols + @test devmat.rowBytes == rowBytes + @test devmat.matrices == 1 + @test devmat.dataType == DT + @test devmat.matrixBytes == rowBytes * rows + @test devmat.offset == 0 + @test size(devmat) == (rows, cols) + end + + let mat = MtlMatrix{T}(undef, rows, cols) + acols, arows = size(mat) + arowBytes = sizeof(T) * acols + abufmat = MPSMatrix(mat) + @test abufmat isa MPSMatrix + @test abufmat.device == dev + @test abufmat.rows == arows + @test abufmat.columns == acols + @test abufmat.rowBytes == arowBytes + @test abufmat.matrices == 1 + @test abufmat.dataType == DT + @test abufmat.matrixBytes == arowBytes * arows + @test abufmat.offset == 0 + @test abufmat.data == mat.data[] + + vmat = @view mat[:, 2:3] + vcols, vrows = size(vmat) + vrowBytes = sizeof(T) * vcols + vbufmat = MPSMatrix(vmat) + @test vbufmat isa MPSMatrix + @test vbufmat.device == dev + @test vbufmat.rows == vrows + @test vbufmat.columns == vcols + @test vbufmat.rowBytes == vrowBytes + @test vbufmat.matrices == 1 + @test vbufmat.dataType == DT + @test vbufmat.matrixBytes == vrowBytes * vrows + @test vbufmat.offset == vmat.offset * sizeof(T) + @test vbufmat.data == vmat.data[] + end + + let arr = MtlArray{T, 3}(undef, rows, cols, mats) + mcols, mrows, mmats = size(arr) + mrowBytes = sizeof(T) * mcols + mpsmat = MPSMatrix(arr) + @test mpsmat isa MPSMatrix + @test mpsmat.device == dev + @test mpsmat.rows == mrows + @test mpsmat.columns == mcols + @test mpsmat.rowBytes == mrowBytes + @test mpsmat.matrices == mmats + @test mpsmat.dataType == DT + @test mpsmat.matrixBytes == mrowBytes * mrows + @test mpsmat.offset == 0 + @test mpsmat.data == arr.data[] + @test size(mpsmat) == (mmats, mrows, mcols) + end + + let vec = MtlVector{T}(undef, rows) + veccols, vecrows = length(vec), 1 + vecrowBytes = sizeof(T) * veccols + vmpsmat = MPSMatrix(vec) + @test vmpsmat isa MPSMatrix + @test vmpsmat.device == dev + @test vmpsmat.rows == vecrows + @test vmpsmat.columns == veccols + @test vmpsmat.rowBytes == vecrowBytes + @test vmpsmat.matrices == 1 + @test vmpsmat.dataType == DT + @test vmpsmat.matrixBytes == vecrowBytes * vecrows + @test vmpsmat.offset == 0 + @test vmpsmat.data == vec.data[] + end end diff --git a/test/mps/size.jl b/test/mps/size.jl index 12687dce9..0bd48c7db 100644 --- a/test/mps/size.jl +++ b/test/mps/size.jl @@ -1,13 +1,18 @@ # ## size @testset "size" begin + siz1 = MPS.MPSSize() + @test siz1.width == 1.0 + @test siz1.height == 1.0 + @test siz1.depth == 1.0 + dim1 = rand() dim2 = rand() dim3 = rand() @test MPS.MPSSize(dim1) == MPS.MPSSize((dim1,)) - @test MPS.MPSSize(dim1,dim2) == MPS.MPSSize((dim1,dim2)) - @test MPS.MPSSize(dim1,dim2,dim3) == MPS.MPSSize((dim1,dim2,dim3)) + @test MPS.MPSSize(dim1, dim2) == MPS.MPSSize((dim1, dim2)) + @test MPS.MPSSize(dim1, dim2, dim3) == MPS.MPSSize((dim1, dim2, dim3)) end @testset "origin" begin @@ -15,10 +20,25 @@ end dim2 = rand() dim3 = rand() - orig = MPS.MPSOrigin(dim1,dim2,dim3) - @test orig.x == dim1 - @test orig.y == dim2 - @test orig.z == dim3 + orig1 = MPS.MPSOrigin(dim1, dim2, dim3) + @test orig1.x == dim1 + @test orig1.y == dim2 + @test orig1.z == dim3 + + orig2 = MPS.MPSOrigin(dim1, dim2) + @test orig2.x == dim1 + @test orig2.y == dim2 + @test orig2.z == 0.0 + + orig3 = MPS.MPSOrigin(dim1) + @test orig3.x == dim1 + @test orig3.y == 0.0 + @test orig3.z == 0.0 + + orig4 = MPS.MPSOrigin() + @test orig4.x == 0.0 + @test orig4.y == 0.0 + @test orig4.z == 0.0 end @testset "offset" begin @@ -26,8 +46,33 @@ end dim2 = rand(Int) dim3 = rand(Int) - off = MPS.MPSOffset(dim1,dim2,dim3) - @test off.x == dim1 - @test off.y == dim2 - @test off.z == dim3 + off1 = MPS.MPSOffset(dim1, dim2, dim3) + @test off1.x == dim1 + @test off1.y == dim2 + @test off1.z == dim3 + + off2 = MPS.MPSOffset(dim1, dim2) + @test off2.x == dim1 + @test off2.y == dim2 + @test off2.z == 0 + + off3 = MPS.MPSOffset(dim1) + @test off3.x == dim1 + @test off3.y == 0 + @test off3.z == 0 + + off4 = MPS.MPSOffset() + @test off4.x == 0 + @test off4.y == 0 + @test off4.z == 0 +end + +@testset "region" begin + reg1 = MPS.MPSRegion() + @test reg1.origin isa MPS.MPSOrigin + @test reg1.size isa MPS.MPSSize + + reg2 = MPS.MPSRegion(MPS.MPSOrigin()) + @test reg1.origin isa MPS.MPSOrigin + @test reg1.size isa MPS.MPSSize end diff --git a/test/mtl/metal.jl b/test/mtl/metal.jl index cfb93a45d..278ce34e7 100644 --- a/test/mtl/metal.jl +++ b/test/mtl/metal.jl @@ -9,7 +9,7 @@ using .MTL devs = devices() @test length(devs) > 0 -dev = first(devs) +dev = MTLDevice(1) @test dev == devs[1] if length(devs) > 1 @@ -34,6 +34,35 @@ full_str = sprint(io->show(io, MIME"text/plain"(), dev)) @test dev.currentAllocatedSize isa Integer +@test is_m1(dev) isa Bool +@test is_m2(dev) isa Bool +@test is_m3(dev) isa Bool +@test is_m4(dev) isa Bool + +@test MTL.MTLCreateSystemDefaultDevice() isa MTLDevice + +end + +@testset "storage_type" begin + @test convert(MTL.MTLStorageMode, MTL.SharedStorage) == MTL.MTLStorageModeShared + @test convert(MTL.MTLStorageMode, MTL.ManagedStorage) == MTL.MTLStorageModeManaged + @test convert(MTL.MTLStorageMode, MTL.PrivateStorage) == MTL.MTLStorageModePrivate + @test convert(MTL.MTLStorageMode, MTL.Memoryless) == MTL.MTLStorageModeMemoryless + + @test convert(MTL.MTLResourceOptions, MTL.SharedStorage) == MTL.MTLResourceStorageModeShared + @test convert(MTL.MTLResourceOptions, MTL.ManagedStorage) == MTL.MTLResourceStorageModeManaged + @test convert(MTL.MTLResourceOptions, MTL.PrivateStorage) == MTL.MTLResourceStorageModePrivate + @test convert(MTL.MTLResourceOptions, MTL.Memoryless) == MTL.MTLResourceStorageModeMemoryless + + @test convert(MTL.MTLResourceOptions, MTL.MTLStorageModeShared) == MTL.MTLResourceStorageModeShared + @test convert(MTL.MTLResourceOptions, MTL.MTLStorageModeManaged) == MTL.MTLResourceStorageModeManaged + @test convert(MTL.MTLResourceOptions, MTL.MTLStorageModePrivate) == MTL.MTLResourceStorageModePrivate + @test convert(MTL.MTLResourceOptions, MTL.MTLStorageModeMemoryless) == MTL.MTLResourceStorageModeMemoryless + + @test MTL.MTLResourceStorageModeShared == MTL.MTLStorageModeShared + @test MTL.MTLStorageModeManaged == MTL.MTLResourceStorageModeManaged + @test MTL.MTLResourceStorageModePrivate == MTL.MTLStorageModePrivate + @test MTL.MTLStorageModeMemoryless == MTL.MTLResourceStorageModeMemoryless end @testset "compile options" begin diff --git a/test/mtl/size.jl b/test/mtl/size.jl index f75e8cce6..b43fc35c2 100644 --- a/test/mtl/size.jl +++ b/test/mtl/size.jl @@ -1,12 +1,12 @@ -@testset "size.jl" begin + @testset "size" begin dim1 = rand(UInt64) dim2 = rand(UInt64) dim3 = rand(UInt64) @test MTL.MTLSize(dim1) == MTL.MTLSize((dim1,)) - @test MTL.MTLSize(dim1,dim2) == MTL.MTLSize((dim1,dim2)) - @test MTL.MTLSize(dim1,dim2,dim3) == MTL.MTLSize((dim1,dim2,dim3)) + @test MTL.MTLSize(dim1, dim2) == MTL.MTLSize((dim1, dim2)) + @test MTL.MTLSize(dim1, dim2, dim3) == MTL.MTLSize((dim1, dim2, dim3)) end @testset "origin" begin @@ -14,15 +14,28 @@ end dim2 = rand(UInt64) dim3 = rand(UInt64) - orig = MTL.MTLOrigin(dim1,dim2,dim3) - @test orig.x == dim1 - @test orig.y == dim2 - @test orig.z == dim3 + orig1 = MTL.MTLOrigin(dim1, dim2, dim3) + @test orig1.x == dim1 + @test orig1.y == dim2 + @test orig1.z == dim3 + + orig2 = MTL.MTLOrigin(dim1, dim2) + @test orig2.x == dim1 + @test orig2.y == dim2 + @test orig2.z == 0 + + orig3 = MTL.MTLOrigin(dim1) + @test orig3.x == dim1 + @test orig3.y == 0 + @test orig3.z == 0 end @testset "region" begin - reg = MTL.MTLRegion() - @test reg.origin isa MTL.MTLOrigin - @test reg.size isa MTL.MTLSize -end + reg1 = MTL.MTLRegion() + @test reg1.origin isa MTL.MTLOrigin + @test reg1.size isa MTL.MTLSize + + reg2 = MTL.MTLRegion(MTL.MTLOrigin()) + @test reg1.origin isa MTL.MTLOrigin + @test reg1.size isa MTL.MTLSize end