@@ -154,6 +154,15 @@ function tiled_blasmul!(tile_size, α, A::AbstractMatrix{T}, B::AbstractMatrix{S
154154 C
155155end
156156
157+ @inline function _default_blasmul_loop! (α, A, B, β, C, k, j)
158+ z2 = @inbounds zero (A[k, 1 ]* B[1 , j] + A[k, 1 ]* B[1 , j])
159+ Ctmp = convert (promote_type (eltype (C), typeof (z2)), z2)
160+ @simd for ν = rowsupport (A,k) ∩ colsupport (B,j)
161+ Ctmp = @inbounds muladd (A[k, ν],B[ν, j],Ctmp)
162+ end
163+ @inbounds C[k,j] = muladd (α,Ctmp, C[k,j])
164+ end
165+
157166function default_blasmul! (α, A:: AbstractMatrix , B:: AbstractMatrix , β, C:: AbstractMatrix )
158167 mA, nA = size (A)
159168 mB, nB = size (B)
@@ -165,13 +174,17 @@ function default_blasmul!(α, A::AbstractMatrix, B::AbstractMatrix, β, C::Abstr
165174 (iszero (mA) || iszero (nB)) && return C
166175 iszero (nA) && return C
167176
168- @inbounds for k in colsupport (A), j in rowsupport (B,rowsupport (A,k))
169- z2 = zero (A[k, 1 ]* B[1 , j] + A[k, 1 ]* B[1 , j])
170- Ctmp = convert (promote_type (eltype (C), typeof (z2)), z2)
171- @simd for ν = rowsupport (A,k) ∩ colsupport (B,j)
172- Ctmp = muladd (A[k, ν],B[ν, j],Ctmp)
177+ r = rowsupport (B,rowsupport (A,first (colsupport (A))))
178+ jindsid = all (k -> rowsupport (B,rowsupport (A,k)) == r, colsupport (A))
179+
180+ if jindsid
181+ for j in rowsupport (B,rowsupport (A,1 )), k in colsupport (A)
182+ _default_blasmul_loop! (α, A, B, β, C, k, j)
183+ end
184+ else
185+ for k in colsupport (A), j in rowsupport (B,rowsupport (A,k))
186+ _default_blasmul_loop! (α, A, B, β, C, k, j)
173187 end
174- C[k,j] = muladd (α,Ctmp, C[k,j])
175188 end
176189 C
177190end
0 commit comments