-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathtester2.py
79 lines (62 loc) · 3.18 KB
/
tester2.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
import numpy as np
import scipy.sparse as sp
import cupy as cp
from Gordo_cuda.cusparse_module import solve_geam
def sort_csr(matrix):
row_start = 0
for i in range(len(matrix.indptr) - 1):
row_end = matrix.indptr[i + 1]
indices = matrix.indices[row_start:row_end]
data = matrix.data[row_start:row_end]
sorted_indices = np.argsort(indices)
matrix.indices[row_start:row_end] = indices[sorted_indices]
matrix.data[row_start:row_end] = data[sorted_indices]
row_start = row_end
# Parameters
np.random.seed(69) #some random seeds produce a row singularity and break stuff
n = 1000 # Size of the matrix (adjust as needed)
density = .01 # Density of non-zero elements (adjust as needed)
# Generate a sparse symmetric positive definite matrix A
A = sp.random(n, n, density=density, format='csr') + sp.eye(n,n)
A = A.T.dot(A) # Ensure A is symmetric and positive definite
# Convert A to cupyx if needed (for CUDA-based operations)
sort_csr(A)
A_data = cp.asarray(A.data)
A_indices = cp.asarray(A.indices)
A_indptr = cp.asarray(A.indptr)
# Generate a sparse symmetric positive definite matrix B
B = sp.random(n, n, density=density, format='csr') + sp.eye(n,n)
B = B.T.dot(B) # Ensure B is symmetric and positive definite
# Convert A and B to cupy arrays if needed
sort_csr(B)
B_data = cp.asarray(B.data)
B_indices = cp.asarray(B.indices)
B_indptr = cp.asarray(B.indptr)
# force the cupy arrays to store data contiguously
A_data = cp.ascontiguousarray(A_data)
A_indices = cp.ascontiguousarray(A_indices)
A_indptr = cp.ascontiguousarray(A_indptr)
# force the cupy arrays to store data contiguously
B_data = cp.ascontiguousarray(B_data)
B_indices = cp.ascontiguousarray(B_indices)
B_indptr = cp.ascontiguousarray(B_indptr)
# Now you can test your solve_cholesky function with A_cupy and b_cupy
# Assuming K_active is your cupyx CSR matrix
csrRowPtrA_ptr = A_indptr.__cuda_array_interface__['data'][0] # csrRowPtr
csrColIndA_ptr = A_indices.__cuda_array_interface__['data'][0] # csrColInd
csrValA_ptr = A_data.__cuda_array_interface__['data'][0] # csrValA
nnz_A = A_data.shape[0]
csrRowPtrB_ptr = B_indptr.__cuda_array_interface__['data'][0] # csrRowPtr
csrColIndB_ptr = B_indices.__cuda_array_interface__['data'][0] # csrColInd
csrValB_ptr = B_data.__cuda_array_interface__['data'][0] # csrValA
nnz_B = B_data.shape[0]
rows = A_indptr.shape[0] - 1
test=solve_geam(csrRowPtrA_ptr, csrColIndA_ptr, csrValA_ptr, nnz_A, csrRowPtrB_ptr, csrColIndB_ptr, csrValB_ptr, nnz_B, rows)
# Wrap the results of solve_geam into cupy.ndarray
nnzC = test[3]
# Example values
# Calculate the total size in bytes for the memory allocation
# Create a cupy.ndarray from the raw device pointer
C_indptr = cp.ndarray((n + 1,), dtype=cp.int32, memptr=cp.cuda.MemoryPointer(cp.cuda.UnownedMemory(test[0], cp.dtype(cp.int32).itemsize * nnzC, None), 0))
C_indices = cp.ndarray((nnzC,), dtype=cp.int32, memptr=cp.cuda.MemoryPointer(cp.cuda.UnownedMemory(test[1], cp.dtype(cp.int32).itemsize * nnzC, None), 0))
C_data = cp.ndarray((nnzC,), dtype=cp.float64, memptr=cp.cuda.MemoryPointer(cp.cuda.UnownedMemory(test[2], cp.dtype(cp.float64).itemsize * nnzC, None), 0))