diff --git a/ChangeLog b/ChangeLog index 3317b109ba..7eaf1c4eca 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,12 @@ +Apr 10, 2022, SuiteSparse 5.12.0 + + * GraphBLAS v7.0.3: see GraphBLAS/Doc/ChangeLog for details. + * performance: GrB_mxm, GrB_assign, and transpose + * bug fix: vector iterator for bitmap + * revised ACM TOMS submission: Doc/toms_parallel_grb2.pdf + * spec bug: GrB_Matrix_diag was implemented incorrectly, + thus requiring a version v7.x + Mar 14, 2022, SuiteSparse 5.11.0 * GraphBLAS v6.2.5: see GraphBLAS/Doc/ChangeLog for changes. diff --git a/GraphBLAS/CITATION.bib b/GraphBLAS/CITATION.bib new file mode 100644 index 0000000000..332ef8a441 --- /dev/null +++ b/GraphBLAS/CITATION.bib @@ -0,0 +1,41 @@ + +@article{GraphBLAS7, +author = {Davis, Timothy A.}, +title = {Algorithm 10xx: SuiteSparse:GraphBLAS: Graph Algorithms in the Language of Sparse Linear Algebra}, +year = {2022}, +abstract= {SuiteSparse:GraphBLAS is a full parallel implementation of the GraphBLAS +standard, which defines a set of sparse matrix operations on an extended +algebra of semirings using an almost unlimited variety of operators and types. +When applied to sparse adjacency matrices, these algebraic operations are +equivalent to computations on graphs. A description of the parallel +implementation of SuiteSparse:GraphBLAS is given, including its novel parallel +algorithms for sparse matrix multiply, addition, element-wise multiply, +submatrix extraction and assignment, and the GraphBLAS mask/accumulator +operation. Its performance is illustrated by solving the graph problems in the +GAP Benchmark and by comparing it with other sparse matrix libraries.}, +journal = {ACM Trans. Math. Softw.}, +month = {(under revision)}, +note={See GraphBLAS/Doc/toms_parallel_grb2.pdf}, +keywords = {GraphBLAS, Graph algorithms, sparse matrices} +} + +@article{10.1145/3322125, +author = {Davis, Timothy A.}, +title = {Algorithm 1000: SuiteSparse:GraphBLAS: Graph Algorithms in the Language of Sparse Linear Algebra}, +year = {2019}, +issue_date = {December 2019}, +publisher = {Association for Computing Machinery}, +address = {New York, NY, USA}, +volume = {45}, +number = {4}, +issn = {0098-3500}, +url = {https://doi.org/10.1145/3322125}, +doi = {10.1145/3322125}, +abstract = {SuiteSparse:GraphBLAS is a full implementation of the GraphBLAS standard, which defines a set of sparse matrix operations on an extended algebra of semirings using an almost unlimited variety of operators and types. When applied to sparse adjacency matrices, these algebraic operations are equivalent to computations on graphs. GraphBLAS provides a powerful and expressive framework for creating graph algorithms based on the elegant mathematics of sparse matrix operations on a semiring. An overview of the GraphBLAS specification is given, followed by a description of the key features and performance of its implementation in the SuiteSparse:GraphBLAS package.}, +journal = {ACM Trans. Math. Softw.}, +month = {dec}, +articleno = {44}, +numpages = {25}, +keywords = {GraphBLAS, Graph algorithms, sparse matrices} +} + diff --git a/GraphBLAS/CMakeLists.txt b/GraphBLAS/CMakeLists.txt index 12be426d4f..1a06d2dbca 100644 --- a/GraphBLAS/CMakeLists.txt +++ b/GraphBLAS/CMakeLists.txt @@ -26,10 +26,10 @@ endif ( ) set ( CMAKE_MACOSX_RPATH TRUE ) # version of SuiteSparse:GraphBLAS -set ( GraphBLAS_DATE "Mar 14, 2022" ) -set ( GraphBLAS_VERSION_MAJOR 6 ) -set ( GraphBLAS_VERSION_MINOR 2 ) -set ( GraphBLAS_VERSION_SUB 5 ) +set ( GraphBLAS_DATE "Apr 8, 2022" ) +set ( GraphBLAS_VERSION_MAJOR 7 ) +set ( GraphBLAS_VERSION_MINOR 0 ) +set ( GraphBLAS_VERSION_SUB 3 ) message ( STATUS "Building SuiteSparse:GraphBLAS version: v" ${GraphBLAS_VERSION_MAJOR}.${GraphBLAS_VERSION_MINOR}.${GraphBLAS_VERSION_SUB} " date: " ${GraphBLAS_DATE} ) @@ -511,6 +511,7 @@ if ( DEMO ) add_executable ( wildtype_demo "Demo/Program/wildtype_demo.c" ) add_executable ( reduce_demo "Demo/Program/reduce_demo.c" ) add_executable ( import_demo "Demo/Program/import_demo.c" ) + add_executable ( wathen_demo "Demo/Program/wathen_demo.c" ) # Libraries required for Demo programs target_link_libraries ( openmp_demo PUBLIC graphblas graphblasdemo ${GB_CUDA} ${GB_RMM} ) @@ -520,6 +521,7 @@ if ( DEMO ) target_link_libraries ( wildtype_demo PUBLIC graphblas ${GB_CUDA} ${GB_RMM} ) target_link_libraries ( reduce_demo PUBLIC graphblas ${GB_CUDA} ${GB_RMM} ) target_link_libraries ( import_demo PUBLIC graphblas graphblasdemo ${GB_CUDA} ${GB_RMM} ) + target_link_libraries ( wathen_demo PUBLIC graphblas graphblasdemo ${GB_CUDA} ${GB_RMM} ) else ( ) diff --git a/GraphBLAS/CUDA/CMakeLists.txt b/GraphBLAS/CUDA/CMakeLists.txt index de3472d8de..41228aecbe 100644 --- a/GraphBLAS/CUDA/CMakeLists.txt +++ b/GraphBLAS/CUDA/CMakeLists.txt @@ -7,7 +7,7 @@ cmake_minimum_required(VERSION 3.20.1) project(GRAPHBLAS_CUDA VERSION 0.1 LANGUAGES CXX CUDA) -set(CMAKE_CUDA_FLAGS "-cudart static -lineinfo -G") +set(CMAKE_CUDA_FLAGS "-cudart=static -lineinfo -G") set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} --std=c++17 -fPIC ") set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DGBNCPUFEAT") @@ -16,7 +16,7 @@ set(CMAKE_C_STANDARD 11) message(STATUS "${CMAKE_CXX_FLAGS}") -file(GLOB GRAPHBLAS_CUDA_SOURCES "*.cu" "*.c") +file(GLOB GRAPHBLAS_CUDA_SOURCES "*.cu" "*.c" "*.cpp") add_library(graphblascuda SHARED ${GRAPHBLAS_CUDA_SOURCES} @@ -34,12 +34,18 @@ set(GRAPHBLAS_CUDA_INCLUDES message(STATUS "${GRAPHBLAS_CUDA_INCLUDES}") -target_include_directories(graphblascuda PUBLIC ${CUDA_INCLUDE_DIRECTORIES} ${GRAPHBLAS_CUDA_INCLUDES}) +target_include_directories(graphblascuda PUBLIC ${CUDAToolkit_INCLUDE_DIRS} ${GRAPHBLAS_CUDA_INCLUDES}) set_target_properties(graphblascuda PROPERTIES POSITION_INDEPENDENT_CODE ON) set_target_properties(graphblascuda PROPERTIES CUDA_SEPARABLE_COMPILATION ON) set_target_properties(graphblascuda PROPERTIES CUDA_ARCHITECTURES "75") -target_link_libraries(graphblascuda nvrtc cuda) +target_link_libraries(graphblascuda CUDA::nvrtc CUDA::cudart_static) + +install ( TARGETS graphblascuda + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} + PUBLIC_HEADER DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} ) + # 1. Execute enumify/stringify/jitify logic to compile ptx kernels and compile/link w/ relevant *.cu files. @@ -77,8 +83,8 @@ foreach(var ${CUDA_TEST_SUITES}) # easily ignore them from the build add_custom_command( OUTPUT - ${CMAKE_CURRENT_BINARY_DIR}/${var}_${semiring}_${kernel}_test_instances.hpp - ${CMAKE_CURRENT_BINARY_DIR}/${var}_${semiring}_${kernel}_cuda_tests.cu + ${CMAKE_CURRENT_BINARY_DIR}/${var}_${semiring}_test_instances.hpp + ${CMAKE_CURRENT_BINARY_DIR}/${var}_${semiring}_cuda_tests.cpp DEPENDS jitFactory.hpp COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/test/testGen_cmake.py "\"${CMAKE_CURRENT_SOURCE_DIR}\"" "\"${var}\"" "\"${CUDA_TEST_MONOIDS}\"" @@ -87,7 +93,7 @@ foreach(var ${CUDA_TEST_SUITES}) ) # Construct final list of files to compile (in parallel) - list(APPEND CUDA_TEST_CPP_FILES ${CMAKE_CURRENT_BINARY_DIR}/${var}_${semiring}_${kernel}_cuda_tests.cu) + list(APPEND CUDA_TEST_CPP_FILES ${CMAKE_CURRENT_BINARY_DIR}/${var}_${semiring}_cuda_tests.cpp) endforeach() endforeach() endforeach() @@ -100,7 +106,13 @@ FetchContent_Declare( ) # For Windows: Prevent overriding the parent project's compiler/linker settings set(gtest_force_shared_crt ON CACHE BOOL "" FORCE) -FetchContent_MakeAvailable(googletest) +FetchContent_GetProperties(googletest) +if(NOT googletest_POPULATED) + FetchContent_Populate(googletest) + add_subdirectory(${googletest_SOURCE_DIR} ${googletest_BINARY_DIR} EXCLUDE_FROM_ALL) +endif() + +#FetchContent_MakeAvailable(googletest EC) #file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/external_includes) @@ -131,14 +143,18 @@ add_dependencies(graphblascuda_test gtest_main) add_dependencies(graphblascuda_test rmm_wrap) target_link_libraries(graphblascuda_test + PUBLIC graphblas graphblascuda - gtest_main - nvrtc - cuda - ${ADDITIONAL_DEPS}) - -target_include_directories(graphblascuda_test PUBLIC + CUDA::cudart_static + CUDA::nvrtc + ${ADDITIONAL_DEPS} + PRIVATE + gtest_main) + +target_include_directories(graphblascuda_test + PUBLIC ${ADDITIONAL_INCLUDES} - ${CUDA_INCLUDE_DIRECTORIES} + ${CUDAToolkit_INCLUDE_DIRS} ${GRAPHBLAS_CUDA_INCLUDES}) + diff --git a/GraphBLAS/CUDA/GB_AxB_dot3_cuda.cpp b/GraphBLAS/CUDA/GB_AxB_dot3_cuda.cpp new file mode 100644 index 0000000000..376a934088 --- /dev/null +++ b/GraphBLAS/CUDA/GB_AxB_dot3_cuda.cpp @@ -0,0 +1,335 @@ +//------------------------------------------------------------------------------ +// GB_AxB_dot3_cuda: compute C = A'*B in parallel, on the GPU(s) +//------------------------------------------------------------------------------ + +// SPDX-License-Identifier: Apache-2.0 +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +//------------------------------------------------------------------------------ + +// This function only computes C=A'*B on the GPUs. The mask must be +// present, and not complemented. The mask is always applied. + +extern "C" +{ + #include "GB_mxm.h" +} + +#include "GB_cuda.h" + +#include "GB_jit_cache.h" + +#include "jitFactory.hpp" +#include "GB_cuda_type_wrap.hpp" + +template +void print_array(void *arr, I size, const char *name) { + std::cout << "Printing " << name << std::endl; + for(I i = 0; i < size; ++i) { + std::cout << static_cast(arr)[i] << ", "; + } + std::cout << std::endl << "Done." << std::endl; +} + + +#undef GB_FREE_WORKSPACE +#define GB_FREE_WORKSPACE \ +{ \ + if (Nanobuckets != NULL) rmm_wrap_free (Nanobuckets) ; Nanobuckets = NULL ; \ + if (Blockbucket != NULL) rmm_wrap_free (Blockbucket) ; Blockbucket = NULL ; \ + if (Bucket != NULL) rmm_wrap_free (Bucket); Bucket = NULL ; \ + if (Bucketp != NULL) rmm_wrap_free (Bucketp); Bucketp = NULL ; \ + if (offset != NULL) rmm_wrap_free (offset); offset = NULL ; \ +} + +#undef GB_FREE_ALL +#define GB_FREE_ALL \ +{ \ + GB_FREE_WORKSPACE ; \ + GB_Matrix_free (&C) ; \ +} + + +GrB_Info GB_AxB_dot3_cuda // C = A'*B using dot product method +( + GrB_Matrix C, // output matrix + const GrB_Matrix M, // mask matrix + const bool Mask_struct, // if true, use the only structure of M + const GrB_Matrix A, // input matrix + const GrB_Matrix B, // input matrix + const GrB_Semiring semiring, // semiring that defines C=A*B + const bool flipxy, // if true, do z=fmult(b,a) vs fmult(a,b) + GB_Context Context +) +{ + + //-------------------------------------------------------------------------- + // check inputs + //-------------------------------------------------------------------------- + + printf ("HERE IN cuda dot3, mask_struct is %d\n", Mask_struct) ; + + // when CUDA is enabled, no static headers are used in all of GraphBLAS + GrB_Info info ; + ASSERT (C != NULL && !(C->static_header)) ; + ASSERT (M != NULL && !(M->static_header)) ; + ASSERT (A != NULL && !(A->static_header)) ; + ASSERT (B != NULL && !(B->static_header)) ; + + ASSERT_MATRIX_OK (M, "M for dot3 cuda A'*B", GB2) ; + ASSERT_MATRIX_OK (A, "A for dot3 cuda A'*B", GB2) ; + ASSERT_MATRIX_OK (B, "B for dot3 cuda A'*B", GB2) ; + + ASSERT (!GB_PENDING (M)) ; + ASSERT (GB_JUMBLED_OK (M)) ; + ASSERT (!GB_ZOMBIES (M)) ; + + ASSERT (!GB_PENDING (A)) ; + ASSERT (!GB_JUMBLED (A)) ; + ASSERT (!GB_ZOMBIES (A)) ; + + ASSERT (!GB_PENDING (B)) ; + ASSERT (!GB_ZOMBIES (B)) ; + ASSERT (!GB_JUMBLED (B)) ; + + ASSERT_SEMIRING_OK (semiring, "semiring for dot3 numeric A'*B", GB2) ; + + ASSERT (A->vlen == B->vlen) ; + GBURBLE ("(GPU dot3) ") ; + + //-------------------------------------------------------------------------- + // initializations + //-------------------------------------------------------------------------- + + int64_t *Nanobuckets = NULL, *Blockbucket = NULL ; + int64_t *Bucket = NULL; + int64_t *Bucketp = NULL; + int64_t *offset = NULL; + + int device = -1; + + CHECK_CUDA_SIMPLE(cudaSetDevice( 0 )); + CHECK_CUDA_SIMPLE(cudaGetDevice(&device)); + + //-------------------------------------------------------------------------- + // get M + //-------------------------------------------------------------------------- + + const int64_t mvlen = M->vlen ; + const int64_t mvdim = M->vdim ; + const int64_t mnz = GB_nnz (M) ; + const int64_t mnvec = M->nvec ; + const bool M_is_hyper = GB_IS_HYPERSPARSE( M ) ; + + const int64_t anz = GB_nnz (A) ; + const int64_t anvec = A->nvec ; + + const int64_t bnz = GB_nnz (B) ; + const int64_t bnvec = B->nvec ; + + //-------------------------------------------------------------------------- + // allocate C, the same size and # of entries as M + //-------------------------------------------------------------------------- + + // FUTURE: ctype need not be the op->ztype + GrB_Type ctype = semiring->add->op->ztype ; + int64_t cvlen = mvlen ; + int64_t cvdim = mvdim ; + int64_t cnz = mnz ; + int64_t cnvec = mnvec ; + + int sparsity_M = (M_is_hyper) ? GxB_HYPERSPARSE : GxB_SPARSE ; + info = GB_new_bix (&C, // sparse or hyper (from M), existing header + ctype, cvlen, cvdim, GB_Ap_malloc, true, + sparsity_M, false, M->hyper_switch, cnvec, + cnz+1, // add one to cnz for GB_cumsum of Cwork + true, /* not iso: */ false, Context) ; + + if (info != GrB_SUCCESS) + { + // out of memory + GB_FREE_ALL ; + return (info) ; + } + + //int64_t *Citemp = C->i ; + //auto *Cxtemp = C->x ; + //cudaMalloc ((void**) &(C->i), cnz * sizeof( int64_t) ); + //cudaMalloc ((void**) &(C->x), cnz * C->type->size ); + CHECK_CUDA_SIMPLE(cudaMemAdvise( C->i, (cnz+1) * sizeof ( int64_t), cudaMemAdviseSetPreferredLocation, device)); + CHECK_CUDA_SIMPLE(cudaMemAdvise( C->x, (cnz+1) * C->type->size , cudaMemAdviseSetPreferredLocation, device)); + + + //-------------------------------------------------------------------------- + // copy Mp and Mh into C + //-------------------------------------------------------------------------- + + CHECK_CUDA_SIMPLE(cudaMemcpy (C->p, M->p, (cnvec+1) * sizeof (int64_t), cudaMemcpyDefault)) ; + if (M_is_hyper) + { + // FIXME + CHECK_CUDA_SIMPLE(cudaMemcpy (C->h, M->h, cnvec * sizeof (int64_t), cudaMemcpyDefault)) ; + } + + C->magic = GB_MAGIC ; + C->nvec_nonempty = M->nvec_nonempty ; + C->nvec = M->nvec ; + // the dot3 CUDA kernel will produce C->i with jumbled indices + C->jumbled = true ; + + GBURBLE ("(GPU C created and copied from M) ") ; + //-------------------------------------------------------------------------- + // stringify the semiring and the mask + //-------------------------------------------------------------------------- + + GB_cuda_semiring_factory mysemiring = GB_cuda_semiring_factory ( ) ; + + // (1) create the semiring code and name + mysemiring.semiring_factory ( semiring, flipxy, + ctype, M->type, A->type, B->type, Mask_struct, // matrix types + false, GB_sparsity(C), GB_sparsity(M), GB_sparsity(A), GB_sparsity(B) ) ; + + // (2) ensure the jitifier has "GB_semiring_[mysemiring.sr_code].h" + jit::GBJitCache filecache = jit::GBJitCache::Instance() ; + filecache.getFile (mysemiring) ; + + GBURBLE ("(GPU stringified) ") ; + //-------------------------------------------------------------------------- + // construct the tasks for phase1 and phase2 + //-------------------------------------------------------------------------- + + // on the CPU: nthreads = GB_nthreads (cnz, chunk, nthreads_max) ; + // on the GPU: + phase1launchFactory p1lf(mysemiring); + phase2launchFactory p2lf; + phase2endlaunchFactory p2elf; + + + // # of threads in phase1 and phase2 kernel launches must be the same + int nthrd = p2lf.get_threads_per_block(); + int ntasks = p2elf.get_number_of_blocks(M); + + int64_t nanobuckets_size = NBUCKETS * nthrd * ntasks; + int64_t blockbuckets_size = NBUCKETS * ntasks; + + Nanobuckets = (int64_t*)rmm_wrap_malloc(nanobuckets_size * sizeof (int64_t)); + Blockbucket = (int64_t*)rmm_wrap_malloc(blockbuckets_size * sizeof (int64_t)); + Bucketp = (int64_t*)rmm_wrap_malloc((NBUCKETS+1) * sizeof (int64_t)); + Bucket = (int64_t*)rmm_wrap_malloc(mnz * sizeof (int64_t)); + offset = (int64_t*)rmm_wrap_malloc(NBUCKETS * sizeof (int64_t)); + + CHECK_CUDA_SIMPLE(cudaMemset(Nanobuckets, 0, nanobuckets_size * sizeof(int64_t))); + CHECK_CUDA_SIMPLE(cudaMemset(Blockbucket, 0, blockbuckets_size * sizeof(int64_t))); + CHECK_CUDA_SIMPLE(cudaMemset(Bucketp, 0, (NBUCKETS+1) * sizeof(int64_t))); + CHECK_CUDA_SIMPLE(cudaMemset(Bucket, 0, mnz * sizeof(int64_t))); + CHECK_CUDA_SIMPLE(cudaMemset(offset, 0, NBUCKETS * sizeof(int64_t))); + + //-------------------------------------------------------------------------- + // phase1 and phase2: place each C(i,j) in a bucket + //-------------------------------------------------------------------------- + + CHECK_CUDA_SIMPLE(cudaMemAdvise( Bucketp, (NBUCKETS+1) * sizeof ( int64_t), cudaMemAdviseSetPreferredLocation, cudaCpuDeviceId)); + CHECK_CUDA_SIMPLE(cudaMemAdvise( Bucketp, (NBUCKETS+1) * sizeof ( int64_t), cudaMemAdviseSetAccessedBy, device)); + + offset = (int64_t*)rmm_wrap_malloc( (NBUCKETS)*sizeof(int64_t)) ; + CHECK_CUDA_SIMPLE(cudaMemAdvise( offset, NBUCKETS * sizeof ( int64_t), cudaMemAdviseSetPreferredLocation, cudaCpuDeviceId)); + CHECK_CUDA_SIMPLE(cudaMemAdvise( offset, NBUCKETS * sizeof ( int64_t), cudaMemAdviseSetAccessedBy, device)); + + memset( offset, 0, NBUCKETS * sizeof(int64_t) ); + + //-------------------------------------------------------------------------- + // Pre-fetch arrays that will be used on the device + //-------------------------------------------------------------------------- + + CHECK_CUDA_SIMPLE(cudaMemPrefetchAsync( M->p, (mnvec+1) * sizeof (int64_t), device, NULL)) ; //stream_data) ; + CHECK_CUDA_SIMPLE(cudaMemPrefetchAsync( M->i, mnz * sizeof (int64_t), device, NULL )) ; //stream_data) ; + CHECK_CUDA_SIMPLE(cudaMemPrefetchAsync( M->x, mnz * M->type->size, device, NULL )) ; //stream_data) ; + CHECK_CUDA_SIMPLE(cudaMemPrefetchAsync( C->i, (cnz+1) * sizeof (int64_t), device, NULL )); //stream_data) ; + CHECK_CUDA_SIMPLE(cudaMemPrefetchAsync( C->x, (cnz+1) * C->type->size, device, NULL )); //stream_data) ; + CHECK_CUDA_SIMPLE(cudaMemPrefetchAsync( A->p, (anvec+1) * sizeof (int64_t), device, NULL)); // stream_data) ; + CHECK_CUDA_SIMPLE(cudaMemPrefetchAsync( A->i, anz * sizeof (int64_t), device, NULL )) ; //stream_data) ; + CHECK_CUDA_SIMPLE(cudaMemPrefetchAsync( A->x, anz * A->type->size, device, NULL )) ; //stream_data) ; + CHECK_CUDA_SIMPLE(cudaMemPrefetchAsync( B->p, (bnvec+1) * sizeof (int64_t), device, NULL)); //stream_data) ; + CHECK_CUDA_SIMPLE(cudaMemPrefetchAsync( B->i, bnz * sizeof (int64_t), device, NULL )); //stream_data) ; + CHECK_CUDA_SIMPLE(cudaMemPrefetchAsync( B->x, bnz * B->type->size, device, NULL )); //stream_data) ; + + // The work to compute C(i,j) is held in Ci [p], if C(i,j) appears in + // as the pth entry in C. + + //---------------------------------------------------------------------- + // phase1: assign each C(i,j) to a bucket, and count them + //---------------------------------------------------------------------- + + GBURBLE ("(GPU phase1 start) ") ; + + p1lf.jitGridBlockLaunch(Nanobuckets, Blockbucket, C, M, A, B); + + GBURBLE ("(GPU phase1 done) ") ; + + print_array(Nanobuckets, nanobuckets_size, "Nanobuckets"); + print_array(Blockbucket, blockbuckets_size , "Blockbucket"); + + //---------------------------------------------------------------------- + // phase2: cumsum across the blockbuckets, propagate to thread level + //---------------------------------------------------------------------- + + GBURBLE ("(GPU phase1 start) ") ; + + p2lf.jitGridBlockLaunch(Blockbucket, offset, M); + + int64_t s= 0; + for ( int bucket = 0 ; bucket < NBUCKETS+1; ++bucket) + { + Bucketp[bucket] = s; + s+= offset[bucket]; + printf("bucketp[%d] = %ld, offset=%ld\n", bucket, Bucketp[bucket], offset[bucket]); + } + + GBURBLE ("(GPU phase2 done) ") ; + + GBURBLE ("(GPU phase2end start) ") ; + + p2elf.jitGridBlockLaunch(Nanobuckets, Blockbucket, + Bucketp, Bucket, offset, C, M); + + GBURBLE ("(GPU phase2end done) ") ; + + print_array(Bucket, mnz , "Bucket"); + print_array(M->i, mnz , "M->i"); + print_array(C->i, mnz , "C->i"); + + //---------------------------------------------------------------------- + // phase3: do the numerical work + //---------------------------------------------------------------------- + + print_array(Bucketp, NBUCKETS + 1 , "Bucketp"); + C->nzombies = Bucketp[1]; //set pre-zombie counts + printf("pre-kernel C->nzombies=%ld\n", C->nzombies); + + for ( int bucket = 1 ; bucket < NBUCKETS; ++bucket) + { + int64_t start = Bucketp[bucket]; + int64_t end = Bucketp[bucket+1]; + + + if(end - start > 0) { + printf("Executing bucket: %d with %ld edges\n", bucket, end-start); + // TODO: We might want to consider submitting these in different cuda streams (maybe use cuda stream pool?) + phase3launchFactory p3lf(mysemiring, (GB_bucket_code)bucket); + p3lf.jitGridBlockLaunch(start, end, Bucketp, Bucket, C, M, A, B); + } else { + printf("Skipping bucket %d, no work to do\n", bucket); + } + + GBURBLE ("(GPU phase3 done ) ") ; + } + C->nzombies += Bucketp[1]; + printf("C->p[0]=%ld\n", C->p[0]); + printf("C->p[1]=%ld\n", C->p[1]); + printf("C->nzombies=%ld\n", C->nzombies); + + GB_FREE_WORKSPACE ; + return GrB_SUCCESS; +} + diff --git a/GraphBLAS/CUDA/GB_AxB_dot3_cuda.cu b/GraphBLAS/CUDA/GB_AxB_dot3_cuda.cu deleted file mode 100644 index 1451eac3f5..0000000000 --- a/GraphBLAS/CUDA/GB_AxB_dot3_cuda.cu +++ /dev/null @@ -1,695 +0,0 @@ -//------------------------------------------------------------------------------ -// GB_AxB_dot3_cuda: compute C = A'*B in parallel, on the GPU(s) -//------------------------------------------------------------------------------ - -// SPDX-License-Identifier: Apache-2.0 -// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved. -// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. - -//------------------------------------------------------------------------------ - -// This function only computes C=A'*B on the GPUs. The mask must be -// present, and not complemented. The mask is always applied. - -extern "C" -{ - #include "GB_mxm.h" -} -#include "GB_cuda.h" - - - -#include "GB_callback.hpp" -#include "GB_jit_launcher.h" -#include "GB_jit_cache.h" - -const std::vector header_names ={}; - -#undef GB_FREE_WORKSPACE -#define GB_FREE_WORKSPACE \ -{ \ - if (Nanobuckets != NULL) cudaFree (Nanobuckets) ; Nanobuckets = NULL ; \ - if (Blockbucket != NULL) cudaFree (Blockbucket) ; Blockbucket = NULL ; \ - if (Bucket != NULL) cudaFree (Bucket); Bucket = NULL ; \ - if (Bucketp != NULL) cudaFree (Bucketp); Bucketp = NULL ; \ - if (offset != NULL) cudaFree (offset); offset = NULL ; \ -} - -#undef GB_FREE_ALL -#define GB_FREE_ALL \ -{ \ - GB_FREE_WORKSPACE ; \ - GB_Matrix_free (&C) ; \ -} - -GrB_Info GB_AxB_dot3_cuda // C = A'*B using dot product method -( - GrB_Matrix C, // output matrix - const GrB_Matrix M, // mask matrix - const bool Mask_struct, // if true, use the only structure of M - const GrB_Matrix A, // input matrix - const GrB_Matrix B, // input matrix - const GrB_Semiring semiring, // semiring that defines C=A*B - const bool flipxy, // if true, do z=fmult(b,a) vs fmult(a,b) - GB_Context Context -) -{ - - //-------------------------------------------------------------------------- - // check inputs - //-------------------------------------------------------------------------- - - // when CUDA is enabled, no static headers are used in all of GraphBLAS - GrB_Info info ; - ASSERT (C != NULL && !(C->static_header)) ; - ASSERT (M != NULL && !(M->static_header)) ; - ASSERT (A != NULL && !(A->static_header)) ; - ASSERT (B != NULL && !(B->static_header)) ; - - ASSERT_MATRIX_OK (M, "M for dot3 cuda A'*B", GB2) ; - ASSERT_MATRIX_OK (A, "A for dot3 cuda A'*B", GB2) ; - ASSERT_MATRIX_OK (B, "B for dot3 cuda A'*B", GB2) ; - - ASSERT (!GB_PENDING (M)) ; - ASSERT (GB_JUMBLED_OK (M)) ; - ASSERT (!GB_ZOMBIES (M)) ; - - ASSERT (!GB_PENDING (A)) ; - ASSERT (!GB_JUMBLED (A)) ; - ASSERT (!GB_ZOMBIES (A)) ; - - ASSERT (!GB_PENDING (B)) ; - ASSERT (!GB_ZOMBIES (B)) ; - ASSERT (!GB_JUMBLED (B)) ; - - ASSERT_SEMIRING_OK (semiring, "semiring for dot3 numeric A'*B", GB2) ; - - ASSERT (A->vlen == B->vlen) ; - GBURBLE ("(GPU dot3) ") ; - - //-------------------------------------------------------------------------- - // initializations - //-------------------------------------------------------------------------- - - int ntasks = 0, number_of_sms = 0 ; - int64_t *Nanobuckets = NULL, *Blockbucket = NULL ; - int64_t *Bucket = NULL; - int64_t *Bucketp = NULL; - int64_t *offset = NULL; - - int device = -1; - - cudaSetDevice( 0 ) ; - - cudaGetDevice(&device); - - //-------------------------------------------------------------------------- - // get M - //-------------------------------------------------------------------------- - - const int64_t mvlen = M->vlen ; - const int64_t mvdim = M->vdim ; - const int64_t mnz = GB_nnz (M) ; - const int64_t mnvec = M->nvec ; - const bool M_is_hyper = GB_IS_HYPERSPARSE( M ) ; - - const int64_t anz = GB_nnz (A) ; - const int64_t anvec = A->nvec ; - - const int64_t bnz = GB_nnz (B) ; - const int64_t bnvec = B->nvec ; - - //-------------------------------------------------------------------------- - // allocate C, the same size and # of entries as M - //-------------------------------------------------------------------------- - - // FUTURE: ctype need not be the op->ztype - GrB_Type ctype = semiring->add->op->ztype ; - int64_t cvlen = mvlen ; - int64_t cvdim = mvdim ; - int64_t cnz = mnz ; - int64_t cnvec = mnvec ; - - int sparsity_M = (M_is_hyper) ? GxB_HYPERSPARSE : GxB_SPARSE ; - info = GB_new_bix (&C, // sparse or hyper (from M), existing header - ctype, cvlen, cvdim, GB_Ap_malloc, true, - sparsity_M, false, M->hyper_switch, cnvec, - cnz+1, // add one to cnz for GB_cumsum of Cwork - true, /* not iso: */ false, Context) ; - - if (info != GrB_SUCCESS) - { - // out of memory - GB_FREE_ALL ; - return (info) ; - } - - //int64_t *Citemp = C->i ; - //auto *Cxtemp = C->x ; - //cudaMalloc ((void**) &(C->i), cnz * sizeof( int64_t) ); - //cudaMalloc ((void**) &(C->x), cnz * C->type->size ); - cudaMemAdvise( C->i, cnz * sizeof ( int64_t), cudaMemAdviseSetPreferredLocation, device); - cudaMemAdvise( C->x, cnz * C->type->size , cudaMemAdviseSetPreferredLocation, device); - - - //-------------------------------------------------------------------------- - // copy Mp and Mh into C - //-------------------------------------------------------------------------- - - //cudaMemcpy (Cp, Mp, (cnvec+1) * sizeof (int64_t), cudaMemcpyDefault) ; - if (M_is_hyper) - { - // FIXME - //cudaMemcpy (Ch, Mh, cnvec * sizeof (int64_t), cudaMemcpyDefault) ; - } - C->magic = GB_MAGIC ; - C->nvec_nonempty = M->nvec_nonempty ; - C->nvec = M->nvec ; - - // the dot3 CUDA kernel will produce C->i with jumbled indices - C->jumbled = true ; - - GBURBLE ("(GPU C created and copied from M) ") ; - //-------------------------------------------------------------------------- - // stringify the semiring and the mask - //-------------------------------------------------------------------------- - - GB_cuda_semiring_factory mysemiring = GB_cuda_semiring_factory ( ) ; - - // (1) create the semiring code and name - mysemiring.semiring_factory ( semiring, flipxy, - ctype, A->type, B->type, M->type, Mask_struct, // matrix types - false, GB_sparsity(C), GB_sparsity(M), GB_sparsity(A), GB_sparsity(B) ) ; - - // (2) ensure the jitifier has "GB_semiring_[mysemiring.sr_code].h" - jit::GBJitCache filecache = jit::GBJitCache::Instance() ; - filecache.getFile (mysemiring) ; - - GBURBLE ("(GPU stringified) ") ; - //-------------------------------------------------------------------------- - // construct the tasks for phase1 and phase2 - //-------------------------------------------------------------------------- - - // on the CPU: nthreads = GB_nthreads (cnz, chunk, nthreads_max) ; - // on the GPU: - - // # of threads in phase1 and phase2 kernel launches must be the same - #define chunksize 128 - #define SYMBOLIC_PHASE_NTHREADS 32 - #define NBUCKETS (GB_BUCKET_MERGEPATH + 1) - - number_of_sms = GB_Global_gpu_sm_get (0) ; - // C and M have cnz entries, so create ... - //ntasks = ( (mnvec +7)/8 + SYMBOLIC_PHASE_NTHREADS -1 )/SYMBOLIC_PHASE_NTHREADS; - ntasks = ( mnz +chunksize -1)/chunksize; - // Idea is to have each task work on a continguous block of columns of C - ntasks = GB_IMIN( ntasks, 128*number_of_sms) ; // ntasks will be grid.x - - GBURBLE ("(GPU mnz=%ld mnvec=%ld blockDim=32, nblock= %d) ", mnz, mnvec, ntasks ) ; - - std::cout<< "ntasks, nthreads = " <p, (mnvec+1) * sizeof (int64_t), cudaMemAdviseSetPreferredLocation, device) ; - cudaMemAdvise( M->i, mnz * sizeof ( int64_t), cudaMemAdviseSetPreferredLocation, device); - cudaMemAdvise( M->x, mnz * M->type->size, cudaMemAdviseSetPreferredLocation,device) ; - - cudaMemAdvise( M->p, (mnvec+1) * sizeof (int64_t), cudaMemAdviseSetReadMostly, device) ; - cudaMemAdvise( M->i, mnz * sizeof (int64_t), cudaMemAdviseSetReadMostly, device) ; - cudaMemAdvise( M->x, mnz * M->type->size, cudaMemAdviseSetReadMostly,device) ; - */ - - cudaMemPrefetchAsync( M->p, (mnvec+1) * sizeof (int64_t), device, NULL) ; //stream_data) ; - cudaMemPrefetchAsync( M->i, mnz * sizeof (int64_t), device, NULL ) ; //stream_data) ; - cudaMemPrefetchAsync( M->x, mnz * M->type->size, device, NULL ) ; //stream_data) ; - /* - cudaMemAdvise( C->p, (mnvec+1) * sizeof (int64_t), cudaMemAdviseSetReadMostly, device) ; - cudaMemAdvise( C->i, mnz * sizeof (int64_t), cudaMemAdviseSetReadMostly, device) ; - cudaMemAdvise( C->x, mnz * C->type->size, cudaMemAdviseSetReadMostly,device) ; - */ - //cudaMemPrefetchAsync( C->p, (mnvec+1) * sizeof (int64_t), device, NULL) ; //stream_data) ; - cudaMemPrefetchAsync( C->i, mnz * sizeof (int64_t), device, NULL ); //stream_data) ; - cudaMemPrefetchAsync( C->x, mnz * C->type->size, device, NULL ); //stream_data) ; - - /* - cudaMemAdvise( A->p, (anvec+1) * sizeof (int64_t), cudaMemAdviseSetReadMostly, device) ; - cudaMemAdvise( A->i, anz * sizeof (int64_t), cudaMemAdviseSetReadMostly, device) ; - cudaMemAdvise( A->x, anz * A->type->size, cudaMemAdviseSetReadMostly,device) ; - */ - cudaMemPrefetchAsync( A->p, (anvec+1) * sizeof (int64_t), device, NULL); // stream_data) ; - cudaMemPrefetchAsync( A->i, anz * sizeof (int64_t), device, NULL ) ; //stream_data) ; - cudaMemPrefetchAsync( A->x, anz * A->type->size, device, NULL ) ; //stream_data) ; - - /* - cudaMemAdvise( B->p, (bnvec+1) * sizeof (int64_t), cudaMemAdviseSetReadMostly, device) ; - cudaMemAdvise( B->i, bnz * sizeof (int64_t), cudaMemAdviseSetReadMostly, device) ; - cudaMemAdvise( B->x, bnz * B->type->size, cudaMemAdviseSetReadMostly, device) ; - */ - cudaMemPrefetchAsync( B->p, (bnvec+1) * sizeof (int64_t), device, NULL) ; //stream_data) ; - cudaMemPrefetchAsync( B->i, bnz * sizeof (int64_t), device, NULL ) ; //stream_data) ; - cudaMemPrefetchAsync( B->x, bnz * B->type->size, device, NULL ) ; //stream_data) ; - - - - // The work to compute C(i,j) is held in Ci [p], if C(i,j) appears in - // as the pth entry in C. - - - cudaStream_t stream_AxB = NULL; - //cudaStreamCreate ( &stream_AxB); - //---------------------------------------------------------------------- - // phase1: assign each C(i,j) to a bucket, and count them - //---------------------------------------------------------------------- - dim3 grid( ntasks) ; - - dim3 block( SYMBOLIC_PHASE_NTHREADS ) ; - - std::string base_name = "GB_jit_AxB_dot3_"; - std::string Opname = "phase1_" ; - - // create a single string of 4 lines, containining the following, for some - // semiring code. Note that R "eats" the left and right parentheses. - /* - phase1_program - #include "GB_semiring_23030928029.h" - #include "GB_jit_AxB_dot3_phase1.cu" - */ - // where GB_semiring_23030928029.h is mysemiring.filename - - /** - * JIT Instantiation Calls - */ - std::stringstream phase1_program ; - phase1_program << - R"(phase1_program - #include ")" << mysemiring.filename << R"(" - #include "GB_jit_AxB_phase1.cu" - )"; - - // dump it: - std::cout << phase1_program.str() ; - - jitify::experimental::KernelLauncher phase1Kernel = - jit::launcher( base_name + Opname + mysemiring.filename, - phase1_program.str(), - header_names, - jit::compiler_flags, - dummy_callback, - stream_AxB) - .set_kernel_inst("GB_AxB_cuda_phase1", - {M->type->name}) - .configure(grid, block); - - //---------------------------------------------------------------------- - // phase2: cumsum across the blockbuckets, propagate to thread level - //---------------------------------------------------------------------- - - // p2grid is for phase2, which uses # of tasks (aka thread blocks) - // equal to ceil (ntasks / SYMBOLIC_PHASE_NTHREADS). - int p2ntasks = ( (ntasks + SYMBOLIC_PHASE_NTHREADS -1) / (SYMBOLIC_PHASE_NTHREADS) ) ; - dim3 p2grid( p2ntasks ) ; - - Opname = "phase2"; - - std::stringstream phase2_program ; - phase2_program << - R"(phase2_program - #include ")" << mysemiring.filename << R"(" - #include "GB_jit_AxB_dot3_phase2.cu" - )" ; - // dump it: - std::cout << phase2_program.str() ; - - jitify::experimental::KernelLauncher phase2Kernel = - jit::launcher( base_name + Opname + mysemiring.filename, - phase2_program.str(), - header_names, - jit::compiler_flags, - dummy_callback) - //stream_AxB) - .set_kernel_inst("GB_AxB_dot3_phase2", - {}) - .configure(p2grid, block); - - Opname = "phase2"; - jitify::experimental::KernelLauncher phase2endKernel = - jit::launcher( base_name + Opname + mysemiring.filename, - phase2_program.str(), - header_names, - jit::compiler_flags, - dummy_callback) - //stream_AxB) - .set_kernel_inst("GB_AxB_dot3_phase2end", - {}) - .configure(grid, block); - - - /** - * JIT Kernel Launch - */ - phase1Kernel.launch( - Nanobuckets, // array of size NBUCKETS-blockDim.x-by-gridDim.x - Blockbucket, // bucket counts, of size NBUCKETS-by-gridDim.x - // input/output: - C, // final output matrix - // inputs, not modified: - M, // mask matrix - A, // input matrix - B // input matrix - ); - - - // cudaDeviceSynchronize(); - - - GBURBLE ("(GPU phase1 done) ") ; - //for (int i = 0; i< cnz; i++){ - // printf("C[%d] = %ld\n", i , Ci[i]); - //} - //---------------------------------------------------------------------- - // phase2: cumsum across the blockbuckets, propagate to thread level - //---------------------------------------------------------------------- - int nblock = ntasks; // # of tasks from phase1 - - phase2Kernel.launch( // input - Nanobuckets, // array of size NBUCKETS-blockDim.x-by-gridDim.x - Blockbucket, // bucket counts, of size NBUCKETS-by-gridDim.x - // input/output: - Bucketp, // global bucket cumsum, of size NBUCKETS+1 - Bucket, // global buckets, of size cnz (== mnz) - offset, - nblock - ); - - cudaDeviceSynchronize(); - //cudaMemPrefetchAsync( offset, (NBUCKETS) * sizeof (int64_t), cudaCpuDeviceId, NULL) ; - - int64_t s= 0; - for ( int bucket = 0 ; bucket < NBUCKETS+1; ++bucket) - { - Bucketp[bucket] = s; - s+= offset[bucket]; - //printf("bucketp[%d] = %ld\n", bucket, Bucketp[bucket]); - } - - GBURBLE ("(GPU phase2 done) ") ; - - phase2endKernel.launch( // input - Nanobuckets, // array of size NBUCKETS-blockDim.x-by-gridDim.x - Blockbucket, // bucket counts, of size NBUCKETS-by-gridDim.x - // input/output: - Bucketp, // global bucket cumsum, of size NBUCKETS+1 - Bucket, // global buckets, of size cnz (== mnz) - offset, - C, // final output matrix - // inputs, not modified: - cnz // number of entries in mask and output matrix - ); - - cudaDeviceSynchronize(); - - GBURBLE ("(GPU phase2end done) ") ; - /* - for (int i = 0; i< cnz; i++){ - printf("C[%d],Bucket = %ld,%ld\n", i , Ci[i], Bucket[i]); - } - */ - - //---------------------------------------------------------------------- - // phase3: do the numerical work - //---------------------------------------------------------------------- - - C->nzombies = Bucketp[1]; //set pre-zombie counts - - for ( int bucket = 1 ; bucket < NBUCKETS; ++bucket) - { - int sz = 0 ; - - int64_t start = Bucketp[bucket]; - int64_t end = Bucketp[bucket+1]; - - //if( (end-start>0) && (start == Bucketp[1]) ) start = Bucketp[0]; //add in zombie slots - - int64_t Cnz = end- start; - - int gridsz, blocksz; - - //Nothing to do, next bucket - if ( Cnz == 0 ) continue; - - GBURBLE ("\n\n(GPU phase3 bucket,bucketsize= %d,%ld) ",bucket,Cnz) ; - std::stringstream phase3_program ; - - switch (bucket) - { - - //-------------------------------------------------------------- - // not a bucket ... bring out your dead: - //-------------------------------------------------------------- - - case GB_BUCKET_ZOMBIE : // C(i,j) is a zombie (not a bucket) - break ; - - //-------------------------------------------------------------- - // CUDA kernel: dndn, handles a single bucket: - //-------------------------------------------------------------- - - // both A(:,i) and B(:,j) are dense - case GB_BUCKET_DNDN : - Opname = "phase3_dndn" ; - - blocksz = 32; - gridsz = ( Cnz -1 + blocksz)/blocksz; - break ; - - //-------------------------------------------------------------- - // CUDA kernel: spdn, handles 4 buckets: - //-------------------------------------------------------------- - - // A(:,i) is dense and B(:,j) is very sparse (< 256 entries) - case GB_BUCKET_DNVS : - // A(:,i) is very sparse (< 256 entries) and B(:,j) is dense - case GB_BUCKET_VSDN : - sz = 64 ; - Opname = "phase3_spdn" ; - blocksz = 32; - gridsz = ( Cnz -1 + blocksz)/blocksz; - break ; - - // A(:,i) is dense and B(:,j) is sparse (>= 256 entries) - case GB_BUCKET_DNSP : - // A(:,i) is sparse (>= 256 entries) and B(:,j) is dense - case GB_BUCKET_SPDN : - sz = 256 ; - Opname = "phase3_spdn" ; - blocksz = 32; - gridsz = ( Cnz -1 + blocksz)/blocksz; - break ; - - //-------------------------------------------------------------- - // CUDA kernel: vssp, handles 1 bucket, uses binary search: - //-------------------------------------------------------------- - - // A(:,i) is very sparse compared to B(:,j), or visa versa - case GB_BUCKET_VSSP : - Opname = "phase3_vssp" ; - blocksz = 32; - gridsz = ( Cnz -1 + blocksz)/blocksz; - break ; - - //-------------------------------------------------------------- - // CUDA kernel: vsvs, handles 4 buckets: - //-------------------------------------------------------------- - - // let len = nnz (A (:,i) + nnz (B (:,j)), then: - - case GB_BUCKET_VSVS_256 : sz += 256-64 ; - case GB_BUCKET_VSVS_64 : sz += 64-16 ; - case GB_BUCKET_VSVS_16 : sz += 16-4 ; - case GB_BUCKET_VSVS_4 : sz += 4 ; - Opname = "phase3_vsvs" ; - blocksz = 1024; - gridsz = GB_IMIN( 1024*number_of_sms, ( Cnz + blocksz -1 )/blocksz); - gridsz = ( Cnz + blocksz -1 )/blocksz; - break ; - - //-------------------------------------------------------------- - // CUDA kernel: mp, use the merge-path method: - //-------------------------------------------------------------- - - case GB_BUCKET_MERGEPATH : - Opname = "phase3_mp" ; - blocksz = 32; - gridsz = ( Cnz -1 + blocksz)/blocksz; - break ; - - case GB_BUCKET_WARP_IX : sz = 32 ; - Opname = "phase3_warpix" ; - blocksz = 32; - gridsz = GB_IMIN( (mnvec+15)/16, 256*number_of_sms); - break ; - - default: - break ; - } - - phase3_program << - R"(phase3_program - #include ")" << mysemiring.filename << R"(" - #include "GB_jit_AxB_dot3_)" << Opname << R"(.cu" - )" ; - - dim3 grid(gridsz); - dim3 block(blocksz); - - std::string kernel_name = "AxB_dot3_" ; - - std::cout<< "Kernel name =" <name, - A->type->name, - B->type->name, - semiring->multiply->xtype->name, - semiring->multiply->ytype->name, - semiring->multiply->ztype->name }) - .configure(grid, block) //if commented, use implicit 1D configure in launch - .launch( - start, // input/output: - end, // global bucket cumsum, of size NBUCKETS+1 - Bucket, // global buckets, of size cnz (== mnz) - C, // final output matrix - // inputs, not modified: - M, // Mi used for column index - A, // A matrix - B, // B matrix - sz // only used for sparse-sparse cases - ); - - cudaDeviceSynchronize(); - } - GBURBLE ("(GPU phase3 done) ") ; - - //---------------------------------------------------------------------- - // reduce C to a scalar, just for testing: - //---------------------------------------------------------------------- - - std::stringstream reduce_program ; - reduce_program << - R"(reduce_program - #include ")" << mysemiring.filename << R"(" - #include "reduceNonZombiesWarp.cu" - )" ; - - std::string reduce_kernel_name = "reduceNonZombiesWarp"; - #define red_blocksz 1024 - - int num_reduce_blocks = GB_IMIN( 32*number_of_sms, (cnz + red_blocksz -1)/ red_blocksz ) ; - dim3 red_grid( num_reduce_blocks ) ; - dim3 red_block( red_blocksz ) ; - - int32_t *block_sum; - cudaMallocManaged ((void**) &block_sum, (num_reduce_blocks)*sizeof(int32_t)) ; - - GBURBLE ("(GPU reduce launch nblocks,blocksize= %d,%d )\n", num_reduce_blocks, red_blocksz) ; - jit::launcher( reduce_kernel_name + "_" + mysemiring.filename, - reduce_program.str(), - header_names, - jit::compiler_flags, - dummy_callback) - .set_kernel_inst( reduce_kernel_name , { ctype->name }) - .configure(red_grid, red_block) //if commented, use implicit 1D configure in launch - .launch( - C->i, // index vector, only sum up values >= 0 - C->x, // input pointer to vector to reduce, with zombies - block_sum, // Block sums on return - (unsigned int)cnz // length of vector to reduce to scalar - - ); - - cudaDeviceSynchronize(); - - int32_t num_triangles = 0; - for (int i = 0; i< num_reduce_blocks; i++){ - //printf("block%d num_triangles = %d\n", i, block_sum[i] ); - num_triangles += block_sum[i] ; - } - printf("num_triangles = %d\n", num_triangles ); - - if (block_sum != NULL) cudaFree( block_sum ); block_sum = NULL ; - - GB_FREE_WORKSPACE ; - return GrB_SUCCESS; -} - diff --git a/GraphBLAS/CUDA/GB_AxB_dot3_cuda_branch.cu b/GraphBLAS/CUDA/GB_AxB_dot3_cuda_branch.cpp similarity index 75% rename from GraphBLAS/CUDA/GB_AxB_dot3_cuda_branch.cu rename to GraphBLAS/CUDA/GB_AxB_dot3_cuda_branch.cpp index 11bd922439..6f33f6113b 100644 --- a/GraphBLAS/CUDA/GB_AxB_dot3_cuda_branch.cu +++ b/GraphBLAS/CUDA/GB_AxB_dot3_cuda_branch.cpp @@ -5,6 +5,7 @@ extern "C" #include "GB_mxm.h" } #include "GB_cuda.h" +#include bool GB_AxB_dot3_cuda_branch ( @@ -17,6 +18,9 @@ bool GB_AxB_dot3_cuda_branch GB_Context Context ) { + + printf ("HERE IN cuda branch, mask_struct is %d\n", Mask_struct) ; + // very rough estimate of the work to do double adeg = ((double) GB_nnz (A)) / ((double) GB_IMAX (1, A->nvec)) ; double bdeg = ((double) GB_nnz (B)) / ((double) GB_IMAX (1, B->nvec)) ; @@ -37,7 +41,7 @@ bool GB_AxB_dot3_cuda_branch GBURBLE (" work:%g GPUs:%d ", work, ngpus_to_use) ; if (ngpus_to_use > 0 // FIXME: FUTURE: user-defined types and operators - && (semiring->header_size == 0) // semiring is built-in +// && (semiring->header_size == 0) // semiring is built-in && (A->type->code != GB_UDT_code) && (B->type->code != GB_UDT_code) // FIXME: M could be hypersparse. we should handle this @@ -48,10 +52,18 @@ bool GB_AxB_dot3_cuda_branch && !GB_IS_BITMAP (A) && !GB_IS_BITMAP (B) && !GB_IS_FULL (A) && !GB_IS_FULL (B)) { + printf("Using CUDA Path.\n"); return true; } else - { + { + std::cout << "Not using cuda path. M_is_hypersparse: " << GB_IS_HYPERSPARSE(M) << + ", A->iso: " << A->iso << ", B->iso: " << B->iso << ", A_BITMAP: " << GB_IS_BITMAP(A) << + ", B_BITMAP: " << GB_IS_BITMAP(B) << ", GB_IS_FULL(A): " << GB_IS_FULL(A) + << ", GB_IS_FULL(B): " << GB_IS_FULL(B) << ", semiring header size: " << semiring->header_size << + + std::endl; + return false; } diff --git a/GraphBLAS/CUDA/GB_cuda.h b/GraphBLAS/CUDA/GB_cuda.h index a577176433..aae498729d 100644 --- a/GraphBLAS/CUDA/GB_cuda.h +++ b/GraphBLAS/CUDA/GB_cuda.h @@ -18,8 +18,8 @@ extern "C" } // Finally, include the CUDA definitions -#include "cuda.h" #include "cuda_runtime.h" +#include "cuda.h" #include "jitify.hpp" #include "GB_cuda_semiring_factory.hpp" diff --git a/GraphBLAS/CUDA/GB_cuda_atomics.cuh b/GraphBLAS/CUDA/GB_cuda_atomics.cuh new file mode 100644 index 0000000000..86608e1db6 --- /dev/null +++ b/GraphBLAS/CUDA/GB_cuda_atomics.cuh @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of NVIDIA CORPORATION nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * Specializations for different atomic operations on different types + */ + +#pragma once + +template +__device__ void atomic_add(T* ptr, T val); + +template +__device__ void atomic_max(T* ptr, T val); + +template +__device__ void atomic_min(T* ptr, T val); + +template +__device__ void atomic_sub(T* ptr, T val); + +template<> __device__ __inline__ void atomic_add(int* ptr, int val) { atomicAdd(ptr, val); } +template<> __device__ __inline__ void atomic_add(int64_t* ptr, int64_t val) { atomicAdd((unsigned long long*)ptr, (unsigned long long)val); } +template<> __device__ __inline__ void atomic_add(float* ptr, float val) { atomicAdd(ptr, val); } +template<> __device__ __inline__ void atomic_add(double* ptr, double val) { atomicAdd(ptr, val); } + +template<> __device__ __inline__ void atomic_max(int* ptr, int val) { atomicMax(ptr, val); } +template<> __device__ __inline__ void atomic_max(int64_t* ptr, int64_t val) { atomicMax((unsigned long long*)ptr, (unsigned long long)val); } + +template<> __device__ __inline__ void atomic_min(int* ptr, int val) { atomicMin(ptr, val); } +template<> __device__ __inline__ void atomic_min(int64_t* ptr, int64_t val) { atomicMin((unsigned long long*)ptr, (unsigned long long)val); } + +template<> __device__ __inline__ void atomic_sub(int* ptr, int val) { atomicSub(ptr, val); } diff --git a/GraphBLAS/CUDA/GB_cuda_error.h b/GraphBLAS/CUDA/GB_cuda_error.h new file mode 100644 index 0000000000..719c519cde --- /dev/null +++ b/GraphBLAS/CUDA/GB_cuda_error.h @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2022 NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +#ifndef GB_CUDA_ERROR_H +#define GB_CUDA_ERROR_H + +#include + +static const char *_cudaGetErrorEnum(cudaError_t error) { + return cudaGetErrorName(error); +} + +template +void check(T result, char const *const func, const char *const file, + int const line) { + if (result) { + fprintf(stderr, "CUDA error at %s:%d code=%d(%s) \"%s\" \n", file, line, + static_cast(result), _cudaGetErrorEnum(result), func); + exit(EXIT_FAILURE); + } +} + +#define checkCudaErrors(val) check((val), #val, __FILE__, __LINE__) + +// This will output the proper error string when calling cudaGetLastError +#define getLastCudaError(msg) __getLastCudaError(msg, __FILE__, __LINE__) + +inline void __getLastCudaError(const char *errorMessage, const char *file, + const int line) { + cudaError_t err = cudaGetLastError(); + + if (cudaSuccess != err) { + fprintf(stderr, + "%s(%i) : getLastCudaError() CUDA error :" + " %s : (%d) %s.\n", + file, line, errorMessage, static_cast(err), + cudaGetErrorString(err)); + exit(EXIT_FAILURE); + } +} + +// This will only print the proper error string when calling cudaGetLastError +// but not exit program incase error detected. +#define printLastCudaError(msg) __printLastCudaError(msg, __FILE__, __LINE__) + +inline void __printLastCudaError(const char *errorMessage, const char *file, + const int line) { + cudaError_t err = cudaGetLastError(); + + if (cudaSuccess != err) { + fprintf(stderr, + "%s(%i) : getLastCudaError() CUDA error :" + " %s : (%d) %s.\n", + file, line, errorMessage, static_cast(err), + cudaGetErrorString(err)); + } +} +#define CHECK_CUDA(call) checkCudaErrors( call ) + +#endif \ No newline at end of file diff --git a/GraphBLAS/CUDA/GB_cuda_semiring_factory.hpp b/GraphBLAS/CUDA/GB_cuda_semiring_factory.hpp index ced1c800a3..2bf5202810 100644 --- a/GraphBLAS/CUDA/GB_cuda_semiring_factory.hpp +++ b/GraphBLAS/CUDA/GB_cuda_semiring_factory.hpp @@ -27,6 +27,7 @@ class GB_cuda_semiring_factory: public jit::File_Desc { public: uint64_t sr_code; + bool mask_struct; // file ptr FILE *fp; @@ -66,7 +67,9 @@ class GB_cuda_semiring_factory: public jit::File_Desc { int B_sparsity // sparsity structure of B ) { - std::cout<<" calling stringify semiring: " << semiring << std::endl; + std::cout<<" calling stringify semiring: " << std::endl; + GxB_Semiring_fprint (semiring, "stringfiy the smiering", GxB_COMPLETE, stdout) ; + std::cout<<" Mask_struct: " << Mask_struct << std::endl; uint64_t scode; GB_enumify_semiring ( // output: @@ -90,6 +93,7 @@ class GB_cuda_semiring_factory: public jit::File_Desc { std::cout << "done stringify semiring" << std::endl; this->sr_code = scode; + mask_struct = Mask_struct; std::stringstream ss; ss << "GB_semiring_" << this->sr_code << ".h"; diff --git a/GraphBLAS/CUDA/GB_cuda_type_wrap.hpp b/GraphBLAS/CUDA/GB_cuda_type_wrap.hpp new file mode 100644 index 0000000000..b571dceb55 --- /dev/null +++ b/GraphBLAS/CUDA/GB_cuda_type_wrap.hpp @@ -0,0 +1,203 @@ +/* + * Copyright (c) 2019,2020 NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once +#ifndef GB_CONV_TYPE_H +#define GB_CONV_TYPE_H + +extern "C" { +#include "GB.h" +}; +#include +#include +#include +#include +#include +#include + +/**---------------------------------------------------------------------------* + * @file type_convert.hpp + * @brief Defines the mapping between concrete C++ types and Grb types. + *---------------------------------------------------------------------------**/ +namespace cuda::jit { + +template +GrB_Type to_grb_type(); + +template<> inline GrB_Type to_grb_type() { return GrB_INT8; } +template<> inline GrB_Type to_grb_type() { return GrB_INT16; } +template<> inline GrB_Type to_grb_type() { return GrB_INT32; } +template<> inline GrB_Type to_grb_type() { return GrB_INT64; } +template<> inline GrB_Type to_grb_type() { return GrB_UINT8; } +template<> inline GrB_Type to_grb_type() { return GrB_UINT16; } +template<> inline GrB_Type to_grb_type() { return GrB_UINT32; } +template<> inline GrB_Type to_grb_type() { return GrB_UINT64; } +template<> inline GrB_Type to_grb_type() { return GrB_FP32; } +template<> inline GrB_Type to_grb_type() { return GrB_FP64; } +template<> inline GrB_Type to_grb_type() { return GrB_BOOL; } + + +template +void set_element(GrB_Matrix A, T x, int64_t i, int64_t j); + +template<> inline void set_element(GrB_Matrix A, int8_t x, int64_t i, int64_t j) { GrB_Matrix_setElement_INT8(A, x, i, j); } +template<> inline void set_element(GrB_Matrix A, int16_t x, int64_t i, int64_t j) { GrB_Matrix_setElement_INT16(A, x, i, j); } +template<> inline void set_element(GrB_Matrix A, int32_t x, int64_t i, int64_t j) { GrB_Matrix_setElement_INT32(A, x, i, j); } +template<> inline void set_element(GrB_Matrix A, int64_t x, int64_t i, int64_t j) { GrB_Matrix_setElement_INT64(A, x, i, j); } +template<> inline void set_element(GrB_Matrix A, uint8_t x, int64_t i, int64_t j) { GrB_Matrix_setElement_UINT8(A, x, i, j); } +template<> inline void set_element(GrB_Matrix A, uint16_t x, int64_t i, int64_t j) { GrB_Matrix_setElement_UINT16(A, x, i, j); } +template<> inline void set_element(GrB_Matrix A, uint32_t x, int64_t i, int64_t j) { GrB_Matrix_setElement_UINT32(A, x, i, j); } +template<> inline void set_element(GrB_Matrix A, uint64_t x, int64_t i, int64_t j) { GrB_Matrix_setElement_UINT64(A, x, i, j); } +template<> inline void set_element(GrB_Matrix A, float x, int64_t i, int64_t j) { GrB_Matrix_setElement_FP32(A, x, i, j); } +template<> inline void set_element(GrB_Matrix A, double x, int64_t i, int64_t j) { GrB_Matrix_setElement_FP64(A, x, i, j); } +template<> inline void set_element(GrB_Matrix A, bool x, int64_t i, int64_t j) { GrB_Matrix_setElement_BOOL(A, x, i, j); } + + +template +void vector_set_element(GrB_Vector A, T x, int64_t i); + +template<> inline void vector_set_element(GrB_Vector A, int8_t x, int64_t i) { GrB_Vector_setElement_INT8(A, x, i); } +template<> inline void vector_set_element(GrB_Vector A, int16_t x, int64_t i) { GrB_Vector_setElement_INT16(A, x, i); } +template<> inline void vector_set_element(GrB_Vector A, int32_t x, int64_t i) { GrB_Vector_setElement_INT32(A, x, i); } +template<> inline void vector_set_element(GrB_Vector A, int64_t x, int64_t i) { GrB_Vector_setElement_INT64(A, x, i); } +template<> inline void vector_set_element(GrB_Vector A, uint8_t x, int64_t i) { GrB_Vector_setElement_UINT8(A, x, i); } +template<> inline void vector_set_element(GrB_Vector A, uint16_t x, int64_t i) { GrB_Vector_setElement_UINT16(A, x, i); } +template<> inline void vector_set_element(GrB_Vector A, uint32_t x, int64_t i) { GrB_Vector_setElement_UINT32(A, x, i); } +template<> inline void vector_set_element(GrB_Vector A, uint64_t x, int64_t i) { GrB_Vector_setElement_UINT64(A, x, i); } +template<> inline void vector_set_element(GrB_Vector A, float x, int64_t i) { GrB_Vector_setElement_FP32(A, x, i); } +template<> inline void vector_set_element(GrB_Vector A, double x, int64_t i) { GrB_Vector_setElement_FP64(A, x, i); } +template<> inline void vector_set_element(GrB_Vector A, bool x, int64_t i) { GrB_Vector_setElement_BOOL(A, x, i); } + + + template + void scalar_set_element(GrB_Scalar A, T x); + + template<> inline void scalar_set_element(GrB_Scalar A, int8_t x) { GrB_Scalar_setElement_INT8(A, x); } + template<> inline void scalar_set_element(GrB_Scalar A, int16_t x) { GrB_Scalar_setElement_INT16(A, x); } + template<> inline void scalar_set_element(GrB_Scalar A, int32_t x) { GrB_Scalar_setElement_INT32(A, x); } + template<> inline void scalar_set_element(GrB_Scalar A, int64_t x) { GrB_Scalar_setElement_INT64(A, x); } + template<> inline void scalar_set_element(GrB_Scalar A, uint8_t x) { GrB_Scalar_setElement_UINT8(A, x); } + template<> inline void scalar_set_element(GrB_Scalar A, uint16_t x) { GrB_Scalar_setElement_UINT16(A, x); } + template<> inline void scalar_set_element(GrB_Scalar A, uint32_t x) { GrB_Scalar_setElement_UINT32(A, x); } + template<> inline void scalar_set_element(GrB_Scalar A, uint64_t x) { GrB_Scalar_setElement_UINT64(A, x); } + template<> inline void scalar_set_element(GrB_Scalar A, float x) { GrB_Scalar_setElement_FP32(A, x); } + template<> inline void scalar_set_element(GrB_Scalar A, double x) { GrB_Scalar_setElement_FP64(A, x); } + template<> inline void scalar_set_element(GrB_Scalar A, bool x) { GrB_Scalar_setElement_BOOL(A, x); } + + +template +GrB_Info vector_reduce(T *scalar, GrB_Vector A, GrB_Monoid op); + +template<> inline GrB_Info vector_reduce(int8_t *scalar, GrB_Vector A, GrB_Monoid op) { return GrB_Vector_reduce_INT8(scalar, NULL, op, A, NULL); } +template<> inline GrB_Info vector_reduce(int16_t *scalar, GrB_Vector A, GrB_Monoid op) { return GrB_Vector_reduce_INT16(scalar, NULL, op, A, NULL); } +template<> inline GrB_Info vector_reduce(int32_t *scalar, GrB_Vector A, GrB_Monoid op) { return GrB_Vector_reduce_INT32(scalar, NULL, op, A, NULL); } +template<> inline GrB_Info vector_reduce(int64_t *scalar, GrB_Vector A, GrB_Monoid op) { return GrB_Vector_reduce_INT64(scalar, NULL, op, A, NULL); } +template<> inline GrB_Info vector_reduce(uint8_t *scalar, GrB_Vector A, GrB_Monoid op) { return GrB_Vector_reduce_UINT8(scalar, NULL, op, A, NULL); } +template<> inline GrB_Info vector_reduce(uint16_t *scalar, GrB_Vector A, GrB_Monoid op) { return GrB_Vector_reduce_UINT16(scalar, NULL, op, A, NULL); } +template<> inline GrB_Info vector_reduce(uint32_t *scalar, GrB_Vector A, GrB_Monoid op) { return GrB_Vector_reduce_UINT32(scalar, NULL, op, A, NULL); } +template<> inline GrB_Info vector_reduce(uint64_t *scalar, GrB_Vector A, GrB_Monoid op) { return GrB_Vector_reduce_UINT64(scalar, NULL, op, A, NULL); } +template<> inline GrB_Info vector_reduce(float *scalar, GrB_Vector A, GrB_Monoid op) { return GrB_Vector_reduce_FP32(scalar, NULL, op, A, NULL); } +template<> inline GrB_Info vector_reduce(double *scalar, GrB_Vector A, GrB_Monoid op) { return GrB_Vector_reduce_FP64(scalar, NULL, op, A, NULL); } +template<> inline GrB_Info vector_reduce(bool *scalar, GrB_Vector A, GrB_Monoid op) { return GrB_Vector_reduce_BOOL(scalar, NULL, op, A, NULL); } + +template +GrB_Info get_element(GrB_Matrix A, T* x, int64_t i, int64_t j); +template<> inline GrB_Info get_element(GrB_Matrix A, int8_t *x, int64_t i, int64_t j) { return GrB_Matrix_extractElement_INT8(x, A, i, j); } +template<> inline GrB_Info get_element(GrB_Matrix A, int16_t *x, int64_t i, int64_t j) { return GrB_Matrix_extractElement_INT16(x, A, i, j); } +template<> inline GrB_Info get_element(GrB_Matrix A, int32_t *x, int64_t i, int64_t j) { return GrB_Matrix_extractElement_INT32(x, A, i, j); } +template<> inline GrB_Info get_element(GrB_Matrix A, int64_t *x, int64_t i, int64_t j) { return GrB_Matrix_extractElement_INT64(x, A, i, j); } +template<> inline GrB_Info get_element(GrB_Matrix A, uint8_t *x, int64_t i, int64_t j) { return GrB_Matrix_extractElement_UINT8(x, A, i, j); } +template<> inline GrB_Info get_element(GrB_Matrix A, uint16_t *x, int64_t i, int64_t j) { return GrB_Matrix_extractElement_UINT16(x, A, i, j); } +template<> inline GrB_Info get_element(GrB_Matrix A, uint32_t *x, int64_t i, int64_t j) { return GrB_Matrix_extractElement_UINT32(x, A, i, j); } +template<> inline GrB_Info get_element(GrB_Matrix A, uint64_t *x, int64_t i, int64_t j) { return GrB_Matrix_extractElement_UINT64(x, A, i, j); } +template<> inline GrB_Info get_element(GrB_Matrix A, float *x, int64_t i, int64_t j) { return GrB_Matrix_extractElement_FP32(x, A, i, j); } +template<> inline GrB_Info get_element(GrB_Matrix A, double *x, int64_t i, int64_t j) { return GrB_Matrix_extractElement_FP64(x, A, i, j); } +template<> inline GrB_Info get_element(GrB_Matrix A, bool *x, int64_t i, int64_t j) { return GrB_Matrix_extractElement_BOOL(x, A, i, j); } + + + + + +template +class type_name { +public: + static const char *name; +}; + +#define DECLARE_TYPE_NAME(x) template<> inline const char *type_name::name = #x; +#define GET_TYPE_NAME(x) (type_name::name) + + DECLARE_TYPE_NAME(int); + DECLARE_TYPE_NAME(int&); + DECLARE_TYPE_NAME(int*); + DECLARE_TYPE_NAME(int8_t); + DECLARE_TYPE_NAME(int8_t&); + DECLARE_TYPE_NAME(int8_t*); + DECLARE_TYPE_NAME(unsigned char); + DECLARE_TYPE_NAME(unsigned char&); + DECLARE_TYPE_NAME(unsigned char*); + DECLARE_TYPE_NAME(unsigned int); + DECLARE_TYPE_NAME(unsigned int&); + DECLARE_TYPE_NAME(unsigned int*); + DECLARE_TYPE_NAME(unsigned int64_t); + DECLARE_TYPE_NAME(unsigned int64_t&); + DECLARE_TYPE_NAME(unsigned int64_t*); + DECLARE_TYPE_NAME(long); + DECLARE_TYPE_NAME(long&); + DECLARE_TYPE_NAME(long*); + DECLARE_TYPE_NAME(float); + DECLARE_TYPE_NAME(float&); + DECLARE_TYPE_NAME(float*); + DECLARE_TYPE_NAME(double); + DECLARE_TYPE_NAME(double&); + DECLARE_TYPE_NAME(double*); + DECLARE_TYPE_NAME(bool); + + + + inline const std::string grb_str_type(GB_Type_code grb_type_code) { + switch(grb_type_code) { + case GB_BOOL_code: + return "bool"; + case GB_INT8_code: + return "int8_t"; + case GB_UINT8_code: + return "uint8_t"; + case GB_INT16_code: + return "int16_t"; + case GB_UINT16_code: + return "uint16_t"; + case GB_INT32_code: + return "int32_t"; + case GB_UINT32_code: + return "uint32_t"; + case GB_INT64_code: + return "int64_t"; + case GB_UINT64_code: + return "uint64_t"; + case GB_FP32_code: + return "float"; + case GB_FP64_code: + return "double"; + default: + printf("Error: GrB_Type not supported.\n"); + exit(1); + } + } + + +} // namespace cuda::jit +#endif diff --git a/GraphBLAS/CUDA/GB_jit_cache.cu b/GraphBLAS/CUDA/GB_jit_cache.cu index 8068585183..a42488b698 100644 --- a/GraphBLAS/CUDA/GB_jit_cache.cu +++ b/GraphBLAS/CUDA/GB_jit_cache.cu @@ -21,6 +21,7 @@ #include #include #include +#include #include "GB_jit_cache.h" #include "GraphBLAS.h" @@ -45,6 +46,12 @@ namespace jit { } } +// Get the directory in home to use for storing the cache + std::string get_user_graphblas_source_path() { + auto gb_home = std::getenv("GRAPHBLAS_SOURCE_PATH"); + if (gb_home != nullptr) return std::string(gb_home); + else return std::string(); + } // Default `GRAPHBLAS_CACHE_PATH` to `$HOME/.GraphBLAS`. @@ -81,8 +88,8 @@ std::string getCacheDir() { // `mkdir -p` the kernel cache path if it doesn't exist printf("cache is going to path %s\n", kernel_cache_path.c_str()); int status; - status = mkdir(kernel_cache_path.c_str(), 0777); - if (status != 0 ) return std::string(); + status = std::filesystem::create_directories(kernel_cache_path.c_str()); +// if (status != 0 ) return std::string(); //boost::filesystem::create_directories(kernel_cache_path); } return std::string(kernel_cache_path); diff --git a/GraphBLAS/CUDA/GB_jit_cache.h b/GraphBLAS/CUDA/GB_jit_cache.h index d523094c20..0b6f22862d 100644 --- a/GraphBLAS/CUDA/GB_jit_cache.h +++ b/GraphBLAS/CUDA/GB_jit_cache.h @@ -31,6 +31,7 @@ namespace jit { std::string get_user_home_cache_dir(); +std::string get_user_graphblas_source_path(); std::string getCacheDir(void); template diff --git a/GraphBLAS/CUDA/GB_reduce_to_scalar_cuda.cpp b/GraphBLAS/CUDA/GB_reduce_to_scalar_cuda.cpp new file mode 100644 index 0000000000..67178b3078 --- /dev/null +++ b/GraphBLAS/CUDA/GB_reduce_to_scalar_cuda.cpp @@ -0,0 +1,44 @@ + +//------------------------------------------------------------------------------ +// GB_reduce_to_scalar_cuda.cu: reduce on the GPU with semiring +//------------------------------------------------------------------------------ + +// SPDX-License-Identifier: Apache-2.0 +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +//------------------------------------------------------------------------------ + +extern "C" +{ +#include "GB_reduce.h" +} + +//#include "GB_cuda.h" +//#include "GB_jit_cache.h" +// +#include "jitFactory.hpp" +//#include "type_name.hpp" + +GrB_Info GB_reduce_to_scalar_cuda +( + GB_void *s, + const GrB_Monoid reduce, + const GrB_Matrix A, + GB_Context Context +) +{ + + //---------------------------------------------------------------------- + // reduce C to a scalar, just for testing: + //---------------------------------------------------------------------- + + int64_t nz = GB_nnz(A); + + GB_cuda_reduce( A, s, reduce); + + printf("num_triangles = %d\n", s[0] ); + + return GrB_SUCCESS ; +} + diff --git a/GraphBLAS/CUDA/GB_reduce_to_scalar_cuda.cu b/GraphBLAS/CUDA/GB_reduce_to_scalar_cuda.cu deleted file mode 100644 index c54ed735fc..0000000000 --- a/GraphBLAS/CUDA/GB_reduce_to_scalar_cuda.cu +++ /dev/null @@ -1,89 +0,0 @@ - -//------------------------------------------------------------------------------ -// GB_reduce_to_scalar_cuda.cu: reduce on the GPU with semiring -//------------------------------------------------------------------------------ - -// SPDX-License-Identifier: Apache-2.0 -// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved. -// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. - -//------------------------------------------------------------------------------ - -#include "GB_cuda.h" - -//#include "templates/reduceWarp.cu.jit" -//#include "templates/reduceNonZombiesWarp.cu.jit" -#include "test/semiringFactory.hpp" - -#include "GB_jit_launcher.h" -#include "GB_callback.hpp" - - -const std::vector header_names ={}; - -GrB_Info GB_reduce_to_scalar_cuda -( - GB_void *s, - const GrB_Monoid reduce, - const GrB_Matrix A, - GB_Context Context -) -{ - - printf ("Hi I am %s :-)\n", __FILE__) ; - - // result = sum (Anz [0..anz-1]) using the GPU, - // with a kernel that has ntasks = grid.x and blocksize = blockDim.x - // nthreads = # of GPUs to use, but 1 for now - // We have a workspace W of size ntasks. - - thread_local static jitify::JitCache kernel_cache; - std::string reduce_kernel_name = "reduceNonZombiesWarp"; - - // stringified kernel specified above - - //TODO:Fix this -// jitify::Program program= kernel_cache.program( templates_reduceNonZombiesWarp_cu, 0, 0, -// file_callback_plus); - //{"--use_fast_math", "-I/usr/local/cuda/include"}); - - int nnz = GB_nnz( A ) ; - // GrB_Type ctype = reduce->op->ztype ; - - int blocksize = 1024 ; - int ntasks = ( nnz + blocksize -1) / blocksize ; - - int32_t *block_sum; - cudaMalloc ((void**) &block_sum, (ntasks)*sizeof(int32_t)) ; - - dim3 red_grid(ntasks); - dim3 red_block(blocksize); - -// GBURBLE ("(GPU reduce launch nblocks,blocksize= %d,%d )\n", ntasks, blocksize) ; -// jit::launcher( reduce_kernel_name + "_" + reduce->op->name, -// templates_reduceNonZombiesWarp_cu, -// header_names, -// compiler_flags, -// dummy_callback) -// .set_kernel_inst( reduce_kernel_name , { ctype->name }) -// .configure(red_grid, red_block) //if commented, use implicit 1D configure in launch -// .launch( -// A->i, // index vector, only sum up values >= 0 -// A->x, // input pointer to vector to reduce, with zombies -// block_sum, // Block sums on return -// (unsigned int)nnz // length of vector to reduce to scalar -// -// ); -// -// cudaDeviceSynchronize(); - - - for (int i = 0 ; i < ntasks ; i++) - { - *s += (block_sum [i]) ; - } - - cudaFree( block_sum); - return (GrB_SUCCESS) ; -} - diff --git a/GraphBLAS/CUDA/GB_reduce_to_scalar_cuda_branch.cu b/GraphBLAS/CUDA/GB_reduce_to_scalar_cuda_branch.cpp similarity index 76% rename from GraphBLAS/CUDA/GB_reduce_to_scalar_cuda_branch.cu rename to GraphBLAS/CUDA/GB_reduce_to_scalar_cuda_branch.cpp index 438d93d120..6481f3aae6 100644 --- a/GraphBLAS/CUDA/GB_reduce_to_scalar_cuda_branch.cu +++ b/GraphBLAS/CUDA/GB_reduce_to_scalar_cuda_branch.cpp @@ -13,13 +13,17 @@ bool GB_reduce_to_scalar_cuda_branch // work to do double work = GB_nnz (A) ; +// std::cout << "IS_BITMAP: " << GB_IS_BITMAP (A) << "IS_FULL: " << GB_IS_FULL(A) << std::endl; + int ngpus_to_use = GB_ngpus_to_use (work) ; GBURBLE (" work:%g gpus:%d ", work, ngpus_to_use) ; printf (" work:%g gpus:%d ", work, ngpus_to_use) ; if (ngpus_to_use > 0 && (reduce->header_size == 0) // semiring is built-in - && (A->type->code != GB_UDT_code)) - { + && (A->type->code != GB_UDT_code) + // FIXME: this is easy + && !A->iso + ) { return true; } else diff --git a/GraphBLAS/CUDA/GB_stringify_mask.c b/GraphBLAS/CUDA/GB_stringify_mask.c index 0b49c5f67e..30aa16de22 100644 --- a/GraphBLAS/CUDA/GB_stringify_mask.c +++ b/GraphBLAS/CUDA/GB_stringify_mask.c @@ -309,6 +309,8 @@ void GB_macrofy_mask // return enum to define mask macros break ; } + printf ("HERE is the Mask stuff:\n%s\n", f) ; + fprintf( fp, "%s\n", f ) ; } diff --git a/GraphBLAS/CUDA/GB_stringify_semiring.c b/GraphBLAS/CUDA/GB_stringify_semiring.c index 988743eb4f..950d8b668a 100644 --- a/GraphBLAS/CUDA/GB_stringify_semiring.c +++ b/GraphBLAS/CUDA/GB_stringify_semiring.c @@ -44,7 +44,7 @@ void GB_stringify_semiring // build a semiring (name and code) semiring, flipxy, ctype, mtype, atype, btype, Mask_struct, Mask_comp, C_sparsity, M_sparsity, A_sparsity, B_sparsity) ; - printf("done enumify semiring\n"); + printf("done enumify semiring: scode is %lu\n", scode); GB_macrofy_semiring ( fp, scode) ; @@ -78,7 +78,7 @@ void GB_enumify_semiring // enumerate a semiring //-------------------------------------------------------------------------- // get the semiring //-------------------------------------------------------------------------- - printf("inside enumify: %p\n", semiring); + printf("inside enumify: \n") ; GxB_print (semiring, 3) ; printf("Getting semiring add\n"); @@ -211,9 +211,11 @@ void GB_enumify_semiring // enumerate a semiring // enumify the mask //-------------------------------------------------------------------------- - printf("Invoking enumify_mask, mtype %p\n", mtype); + printf("Invoking enumify_mask, mtype: \n"); + GxB_print (mtype, 3) ; int mtype_code = (mtype == NULL) ? 0 : mtype->code ; // 0 to 14 int mask_ecode ; + printf("Mask_struct: %d, Mask_comp: %d\n", Mask_struct, Mask_comp); GB_enumify_mask (&mask_ecode, mtype_code, Mask_struct, Mask_comp) ; printf ("got mask_ecode: %d\n", mask_ecode) ; diff --git a/GraphBLAS/CUDA/jitFactory.hpp b/GraphBLAS/CUDA/jitFactory.hpp index f597d961d9..18d14caafe 100644 --- a/GraphBLAS/CUDA/jitFactory.hpp +++ b/GraphBLAS/CUDA/jitFactory.hpp @@ -33,38 +33,20 @@ and call kernels. */ +#ifndef GB_JITFACTORY_H +#define GB_JITFACTORY_H + #pragma once +extern "C" { +#include "GraphBLAS.h" +}; #include "GB_jit_launcher.h" #include "GB_cuda_semiring_factory.hpp" - -// FIXME: Is this okay or will it bring in too much (GB.h is brought in transitively) -#include "GraphBLAS.h" -#include "GB_Semiring_new.c" -#include "GrB_Semiring_new.c" -#include "GB_Monoid_new.c" -#include "GrB_Monoid_new.c" #include "GB_cuda_buckets.h" - -#include "type_name.hpp" - -#undef JITIFY_PRINT_INSTANTIATION -#define JITIFY_PRINT_INSTANTIATION 1 -#undef JITIFY_PRINT_SOURCE -#define JITIFY_PRINT_SOURCE 1 -#undef JITIFY_PRINT_LOG -#define JITIFY_PRINT_LOG 1 -#undef JITIFY_PRINT_PTX -#define JITIFY_PRINT_PTX 1 -#undef JITIFY_PRINT_LINKER_LOG -#define JITIFY_PRINT_LINKER_LOG 1 -#undef JITIFY_PRINT_LAUNCH -#define JITIFY_PRINT_LAUNCH 1 - -#include "test/dataFactory.hpp" -#include "test/semiringFactory.hpp" -// #include "GB_cuda.h" - +#include "GB_cuda_type_wrap.hpp" +#include "GB_cuda_error.h" +#include "../rmm_wrap/rmm_wrap.h" #if __cplusplus >= 201103L @@ -78,14 +60,17 @@ * Kernel factory says "Here's the actual instance I want you to build with the given parameters" */ +//bool GB_cuda_reduce(int64_t *index, void *in_data, void *output, unsigned int N, GrB_Monoid op); + //Kernel jitifiers -template class reduceFactory ; +class reduceFactory ; template class dotFactory ; template class spdotFactory ; +inline std::istream* (*file_callback)(std::string, std::iostream&); //AxB_dot3_phase1 kernel launchers -template< typename T_C, typename T_M, typename T_A, typename T_B, int threads_per_block, int chunk_size> class phase1launchFactory ; +template class phase1launchFactory ; //AxB_dot3_phase3 kernel launchers @@ -93,7 +78,7 @@ template< typename T_C, typename T_M, typename T_A, typename T_B, typename T_xy, typename T_z> class launchFactory ; -const std::vector compiler_flags{ +static const std::vector compiler_flags{ "-std=c++14", "-G", "-remove-unused-globals", @@ -101,19 +86,24 @@ const std::vector compiler_flags{ "-D__CUDACC_RTC__", "-I.", "-I..", -// "-I../../Include", "-I../../Source", "-I../../Source/Template", - "-I../local_cub/block", "-I../templates", + + // Add includes relative to GRAPHBLAS_SOURCE_PATH variable + "-I" + jit::get_user_graphblas_source_path() + "/CUDA", + "-I" + jit::get_user_graphblas_source_path() + "/CUDA/templates", + "-I" + jit::get_user_graphblas_source_path() + "/Source", + "-I" + jit::get_user_graphblas_source_path() + "/Source/Template", "-I/usr/local/cuda/include", }; -const std::vector header_names ={}; +static const std::vector header_names ={}; -// FIXME: Need to be able to convert from GrB_Type->std::type to populate these templates -// this isn't going to be known at compile time. -template< typename T_C, typename T_M, typename T_A, typename T_B, int threads_per_block=32, int chunk_size = 128> +// FIXME: We probably want to remove this type template altogether and provide a +// macro/function that can convert from a GrB_Type instance to the name of a type +// that the jitifier will accept. +template class phase1launchFactory { std::string base_name = "GB_jit"; @@ -147,7 +137,6 @@ class phase1launchFactory // 128*number_of_sms (say 128*80 = 10,240 on a V100). // Defining dummy instance only so we can introspect type - T_M dumM; std::cout << "A TYpe: " << A->type << std::endl; std::cout << "B TYpe: " << B->type << std::endl; @@ -157,13 +146,15 @@ class phase1launchFactory jit::GBJitCache filecache = jit::GBJitCache::Instance() ; filecache.getFile (semiring_factory_) ; + auto sr_code = std::to_string(semiring_factory_.sr_code); + std::stringstream string_to_be_jitted ; - std::vector template_types = {GET_TYPE_NAME(dumM)}; + std::vector template_types = {M->type->name, sr_code}; std::string hashable_name = base_name + "_" + kernel_name; string_to_be_jitted << hashable_name << std::endl << R"(#include ")" << jit::get_user_home_cache_dir() << "/" << semiring_factory_.filename << R"(")" << std::endl << - R"(#include ")" << hashable_name << R"(.cu")" << std::endl; + R"(#include "templates/)" << hashable_name << R"(.cuh")" << std::endl; std::cout << string_to_be_jitted.str(); bool result = false; @@ -171,7 +162,7 @@ class phase1launchFactory dim3 grid(get_number_of_blocks(M)); dim3 block(get_threads_per_block()); - jit::launcher( hashable_name, + jit::launcher( hashable_name + "_" + M->type->name + "_" + sr_code, string_to_be_jitted.str(), header_names, compiler_flags, @@ -187,7 +178,7 @@ class phase1launchFactory } }; -template< typename T_C, int threads_per_block = 32, int chunk_size = 128> +template class phase2launchFactory { @@ -209,9 +200,7 @@ class phase2launchFactory } bool jitGridBlockLaunch(// parameters to AxB_phase2: - int64_t *nanobuckets, int64_t *blockBucket, - int64_t *bucketp, int64_t *bucket, int64_t *offset, - GrB_Matrix M) { + int64_t *blockBucket, int64_t *offset, GrB_Matrix M) { bool result = false; @@ -221,7 +210,7 @@ class phase2launchFactory std::string hashable_name = base_name + "_" + kernel_name; std::stringstream string_to_be_jitted ; string_to_be_jitted << - hashable_name << std::endl << R"(#include ")" << hashable_name << R"(.cu")" << std::endl; + hashable_name << std::endl << R"(#include ")" << hashable_name << R"(.cuh")" << std::endl; // dump it: std::cout << string_to_be_jitted.str(); @@ -229,13 +218,13 @@ class phase2launchFactory const int64_t mnz = GB_nnz (M) ; jit::launcher( hashable_name, string_to_be_jitted.str(), - header_names, + header_names, compiler_flags, file_callback) .set_kernel_inst( kernel_name, {}) .configure(grid, block) // parameters to AxB_phase2: - .launch( nanobuckets, blockBucket, bucketp, bucket, offset, mnz); + .launch( blockBucket, offset, get_number_of_blocks(M)); checkCudaErrors( cudaDeviceSynchronize() ); result= true; @@ -245,14 +234,14 @@ class phase2launchFactory }; -template< typename T_C, int threads_per_block = 32, int chunk_size = 128> -class phase2endlaunchFactory +template< int threads_per_block = 32, int chunk_size = 128> +class phase2endlaunchFactory { std::string base_name = "GB_jit"; std::string kernel_name = "AxB_phase2end"; -public: +public: int get_threads_per_block() { return threads_per_block; @@ -271,10 +260,8 @@ class phase2endlaunchFactory int64_t *bucketp, int64_t *bucket, int64_t *offset, GrB_Matrix C, GrB_Matrix M) { - - bool result = false; - T_C dumC; + bool result = false; dim3 grid(get_number_of_blocks(M)); dim3 block(get_threads_per_block()); @@ -282,14 +269,14 @@ class phase2endlaunchFactory std::string hashable_name = base_name + "_" + kernel_name; std::stringstream string_to_be_jitted ; string_to_be_jitted << - hashable_name << std::endl << R"(#include ")" << hashable_name << R"(.cu")" << std::endl; + hashable_name << std::endl << R"(#include ")" << hashable_name << R"(.cuh")" << std::endl; // dump it: std::cout << string_to_be_jitted.str(); jit::launcher( hashable_name, string_to_be_jitted.str(), - header_names, + header_names, compiler_flags, file_callback) .set_kernel_inst( kernel_name , {}) @@ -304,7 +291,6 @@ class phase2endlaunchFactory }; -template< typename T_C, typename T_M, typename T_A, typename T_B, typename T_XY, typename T_Z> class phase3launchFactory { std::string base_name = "GB_jit"; @@ -313,7 +299,6 @@ class phase3launchFactory GB_cuda_semiring_factory &semiring_factory_; GB_bucket_code bucket_code_; - GB_callback callback_generator; public: @@ -328,21 +313,13 @@ class phase3launchFactory bool jitGridBlockLaunch(int64_t start, int64_t end, int64_t *bucketp, int64_t *bucket, GrB_Matrix C, GrB_Matrix M, GrB_Matrix A, GrB_Matrix B) { - - bool result = false; - - T_C dumC; - T_M dumM; - T_A dumA; - T_B dumB; - T_XY dumXY; - T_Z dumZ; + bool result = false; //---------------------------------------------------------------------- // phase3: do the numerical work //---------------------------------------------------------------------- - + C->jumbled = true; C->nzombies = bucketp[1]; //set pre-zombie counts const int64_t Cnz = GB_nnz (C) ; const int64_t mnvec = M->nvec ; @@ -365,29 +342,22 @@ class phase3launchFactory string_to_be_jitted << hashable_name << std::endl << R"(#include ")" << jit::get_user_home_cache_dir() << "/" << semiring_factory_.filename << R"(")" << std::endl << - R"(#include ")" << hashable_name << R"(.cu")" << std::endl; - - std::cout << "String to be jitted: " << string_to_be_jitted.str() << std::endl; + R"(#include ")" << hashable_name << R"(.cuh")" << std::endl; dim3 grid(gridsz); dim3 block(blocksz); - std::cout<< "program name =" <type->name, + A->type->name, + B->type->name }) .configure(grid, block) //if commented, use implicit 1D configure in launch .launch( start, // input/output: @@ -416,6 +386,8 @@ class phase3launchFactory int number_of_sms = GB_Global_gpu_sm_get (0) ; std::string Opname; + + printf("LAUNCHING BUCKET CODE: %d\n", (int)bucket_code_); switch (bucket_code_) { @@ -480,12 +452,15 @@ class phase3launchFactory // let len = nnz (A (:,i) + nnz (B (:,j)), then: + printf("number_of_sms=%d\n", number_of_sms); case GB_BUCKET_VSVS_256 : sz += 256-64 ; case GB_BUCKET_VSVS_64 : sz += 64-16 ; case GB_BUCKET_VSVS_16 : sz += 16-4 ; case GB_BUCKET_VSVS_4 : sz += 4 ; Opname = "phase3_vsvs" ; - blocksz = 1024; + blocksz = 512; + + // FIXME: Is the first line not needed? gridsz = GB_IMIN( 1024*number_of_sms, ( Cnz + blocksz -1 )/blocksz); gridsz = ( Cnz + blocksz -1 )/blocksz; break ; @@ -514,6 +489,123 @@ class phase3launchFactory } }; +class reduceFactory +{ + std::string base_name = "GB_jit"; + std::string kernel_name = "reduceNonZombiesWarp"; + + int threads_per_block = 128; + +public: + + int get_threads_per_block() { + return threads_per_block; + } + + int get_number_of_blocks(unsigned int N) { + return (N + threads_per_block - 1)/threads_per_block; + } + + // Note: this does assume the erased types are compatible w/ the monoid's ztype + bool jitGridBlockLaunch(GrB_Matrix A, void* output, + GrB_Monoid op) + { + + // TODO: We probably want to "macrofy" the GrB_Monoid and define it in the `string_to_be_jitted` +// void GB_stringify_binop +// ( +// // input: +// FILE *fp, // File to write macros, assumed open already +// const char *macro_name, // name of macro to construct +// GB_Opcode opcode, // opcode of GraphBLAS operator to convert into a macro +// GB_Type_code xcode, // op->xtype->code of the operator +// bool for_semiring, // if true: op is a multiplier in a semiring +// bool flipxy // if true, use mult(y,x) else mult(x,y) +// ) + + GrB_Scalar temp_scalar; + GrB_Scalar_new(&temp_scalar, op->op->ztype); + + cuda::jit::scalar_set_element(temp_scalar, 0); + + GrB_Scalar_wait(temp_scalar, GrB_MATERIALIZE); + + std::string hashable_name = base_name + "_" + kernel_name; + std::stringstream string_to_be_jitted ; + string_to_be_jitted << + hashable_name << std::endl << R"(#include ")" << + hashable_name << R"(.cuh")" << std::endl; + + bool is_sparse = GB_IS_SPARSE(A); + int64_t N = is_sparse ? GB_nnz(A) : GB_NCOLS(A) * GB_NROWS(A); + + int blocksz = get_threads_per_block(); + int gridsz = get_number_of_blocks(N); + dim3 grid(gridsz); + dim3 block(blocksz); + + jit::launcher(hashable_name, + string_to_be_jitted.str(), + header_names, + compiler_flags, + file_callback) + .set_kernel_inst( kernel_name , { A->type->name, op->op->ztype->name, "true" }) + .configure(grid, block) + + // FIXME: GB_ADD is hardcoded into kernel for now + .launch( A, temp_scalar, N, is_sparse); + + + checkCudaErrors( cudaDeviceSynchronize() ); + + memcpy(output, temp_scalar->x, op->op->ztype->size); + + rmm_wrap_free(temp_scalar); + return true; + } +}; + +template< int threads_per_block=32, int chunk_size = 128> +inline bool GB_cuda_mxm_phase1(GB_cuda_semiring_factory &semiring_factory, int64_t *nanobuckets, int64_t *blockBucket, + GrB_Matrix C, GrB_Matrix M, GrB_Matrix A, GrB_Matrix B) { + phase1launchFactory lf(semiring_factory); + return lf.jitGridBlockLaunch(nanobuckets, blockBucket, C, M, A, B); +} + + +template +bool GB_cuda_mxm_phase2(int64_t *nanobuckets, int64_t *blockBucket, + int64_t *bucketp, int64_t *bucket, int64_t *offset, + GrB_Matrix M) { + + phase2launchFactory lf; + return lf.jitGridBlockLaunch(nanobuckets, blockBucket, bucketp, bucket, offset, M); +} + +template +inline bool GB_cuda_mxm_phase2end(int64_t *nanobuckets, int64_t *blockBucket, + int64_t *bucketp, int64_t *bucket, int64_t *offset, + GrB_Matrix C, GrB_Matrix M) { + phase2endlaunchFactory lf; + return lf.jitGridBlockLaunch(nanobuckets, blockBucket, bucketp, bucket, offset, C, M); +} + + + +inline bool GB_cuda_mxm_phase3(GB_cuda_semiring_factory &mysemiringfactory, GB_bucket_code bucket_code, + int64_t start, int64_t end, int64_t *bucketp, int64_t *bucket, + GrB_Matrix C, GrB_Matrix M, GrB_Matrix A, GrB_Matrix B) { + phase3launchFactory lf(mysemiringfactory, bucket_code); + return lf.jitGridBlockLaunch(start, end, bucketp, bucket, C, M, A, B); +} + + +inline bool GB_cuda_reduce(GrB_Matrix A, void *output, GrB_Monoid op) { + reduceFactory rf; + return rf.jitGridBlockLaunch(A, output, op); +} + + //template //class spdotFactory //{ @@ -612,56 +704,6 @@ class phase3launchFactory // //}; // -//template -//class reduceFactory -//{ -// std::string base_name = "GBjit_reduce_"; -// -//public: -// reduceFactory() { -// } -// -// bool jitGridBlockLaunch(int gridsz, int blocksz, -// T* indata, T* output, unsigned int N, -// std::string OpName) -// { -// dim3 grid(gridsz); -// dim3 block(blocksz); -// bool result = false; -// T dummy; -// -// std::cout<<" indata type ="<< GET_TYPE_NAME(dummy)<= 201103L - -/** - * This file is responsible for picking all the parameters and what kernel variaiton we will use for a given instance - * - data types - * - semiring types - * - binary ops - * - monoids - * - * Kernel factory says "Here's the actual instance I want you to build with the given parameters" - */ - -//Kernel jitifiers -template class reduceFactory ; - - -//AxB_dot3_phase1 kernel launchers -template< typename T_C, typename T_M, typename T_A, typename T_B> class phase1launchFactory ; - -//AxB_dot3_phase3 kernel launchers - -template< typename T_C, typename T_M, - typename T_A, typename T_B, typename T_xy, typename T_z> class launchFactory ; - - -const std::vector compiler_flags{ - "-std=c++14", - "-remove-unused-globals", - "-w", - "-D__CUDACC_RTC__", - "-I.", - "-I..", -// "-I../../Include", - "-I../../Source", - "-I../../Source/Template", - "-I../local_cub/block", - "-I../templates", - "-I/usr/local/cuda/include" -}; - -const std::vector header_names ={}; - -template -class reduceFactory -{ - std::string base_name = "GB_jit_reduce"; - - -public: - reduceFactory() { - } - - bool jitGridBlockLaunch(int gridsz, int blocksz, - T* indata, T* output, unsigned int N, - std::string OpName) - { - dim3 grid(gridsz); - dim3 block(blocksz); - bool result = false; - T dummy; - - std::cout<<" indata type ="<< GET_TYPE_NAME(dummy)< #include -#include #include "matrix.h" +#include + // Using tile size fixed at compile time, we don't need shared memory #define tile_sz 32 @@ -81,7 +83,7 @@ T block_ReduceSum(thread_block g, T val, T Ident) } -template< typename T_C, typename T_A, typename T_B, typename T_X, typename T_Y, typename T_Z> +template< typename T_C, typename T_A, typename T_B> __global__ void AxB_dot3_phase3_dndn ( int64_t start, @@ -94,14 +96,15 @@ __global__ void AxB_dot3_phase3_dndn int sz ) { - - T_A *Ax = (T_A*)A->x; - T_B *Bx = (T_B*)B->x; - T_C *Cx = (T_C*)C->x; - int64_t *Mi = M->i; - int64_t *Ci = C->i; - int64_t *Ap = A->p; - int64_t *Bp = B->p; + const T_A *__restrict__ Ax = (T_A *)A->x ; + const T_B *__restrict__ Bx = (T_B *)B->x ; + T_C *__restrict__ Cx = (T_C *)C->x ; + int64_t *__restrict__ Ci = C->i ; + const int64_t *__restrict__ Mi = M->i ; + const int64_t *__restrict__ Ai = A->i ; + const int64_t *__restrict__ Bi = B->i ; + const int64_t *__restrict__ Ap = A->p ; + const int64_t *__restrict__ Bp = B->p ; // zombie count int zc = 0; @@ -124,33 +127,31 @@ __global__ void AxB_dot3_phase3_dndn int64_t xend = Ap[i+1]; nnzA = xend - pA; - int64_t pB = Bp[j]; - int64_t yend = Bp[j+1]; + int64_t pB = Bp[j]; + int64_t yend = Bp[j+1]; nnzB = yend - pB; - /* if (threadIdx.x == 0 ){ - printf(" i,j = %d,%d nnz= %d xstart,end = %d,%d ystart,end = %d,%d\n", - (int)i,(int)j, (int)nnzA, (int)xstart,(int)xend, (int)ystart, (int)yend); + printf("tid=%d, i,j = %d,%d nnzA= %d, nnzB=%d\n", + threadIdx.x, (int)i,(int)j, (int)nnzA, (int)nnzB); } - __syncthreads(); - */ + __syncthreads(); // convert global data pointer to the local pointer of this block T_A aki; // *xdata = &Ax[xstart]; T_B bkj; // *ydata = &Bx[ystart]; - T_Z cij; + T_C cij; - GB_GETA ( aki=(T_Z)Ax[pA+threadIdx.x] ) ; // aki = A(0,i) - GB_GETB ( bkj=(T_Z)Bx[pB+threadIdx.x] ) ; // bkj = B(0,j) + GB_GETA ( aki=(T_C)Ax[pA+threadIdx.x] ) ; // aki = A(0,i) + GB_GETB ( bkj=(T_C)Bx[pB+threadIdx.x] ) ; // bkj = B(0,j) GB_C_MULT ( cij, aki, bkj ) ; // cij = aki * bkj for ( int tid = threadIdx.x + s; tid < nnzA; tid+= s) { // cij += A(k,i) * B(k,j) // GB_DOT_TERMINAL ( cij ) ; // break if cij == terminal - GB_GETA ( aki=(T_Z)Ax[pA+tid] ) ; // aki = A(k,i) - GB_GETB ( bkj=(T_Z)Bx[pB+tid] ) ; // bkj = B(k,j) + GB_GETA ( aki=(T_C)Ax[pA+tid] ) ; // aki = A(k,i) + GB_GETB ( bkj=(T_C)Bx[pB+tid] ) ; // bkj = B(k,j) GB_MULTADD ( cij, aki, bkj ) ; // cij += aki * bkj } @@ -159,7 +160,7 @@ __global__ void AxB_dot3_phase3_dndn // reduce per-thread sums to a single scalar //-------------------------------------------------------------------------- thread_block_tile<32> tile = tiled_partition<32>( this_thread_block() ); - cij = warp_ReduceSum ( tile, cij); + cij = warp_ReduceSum ( tile, cij); // write result for this block to global mem if (threadIdx.x == 0) diff --git a/GraphBLAS/CUDA/templates/GB_jit_AxB_dot3_phase3_mp.cu b/GraphBLAS/CUDA/templates/GB_jit_AxB_dot3_phase3_mp.cuh similarity index 77% rename from GraphBLAS/CUDA/templates/GB_jit_AxB_dot3_phase3_mp.cu rename to GraphBLAS/CUDA/templates/GB_jit_AxB_dot3_phase3_mp.cuh index 9609d581c9..3230eecdd2 100644 --- a/GraphBLAS/CUDA/templates/GB_jit_AxB_dot3_phase3_mp.cu +++ b/GraphBLAS/CUDA/templates/GB_jit_AxB_dot3_phase3_mp.cuh @@ -9,7 +9,7 @@ // ie. we want to produce C = A'*B in the sense of the given semi-ring. // This version uses a merge-path algorithm, when the sizes nnzA and nnzB are -// relatively close in size, neither is very spare nor dense, for any size of N. +// relatively close in size, neither is very sparse nor dense, for any size of N. // Handles arbitrary sparsity patterns with guaranteed load balance. // Both the grid and block are 1D, so blockDim.x is the # threads in a @@ -30,6 +30,9 @@ // matrix *M <- mask matrix // matrix *A <- input matrix A // matrix *B <- input matrix B + +#pragma once + #include #include #include @@ -69,7 +72,7 @@ T reduce_plus(thread_block_tile g, T val) #define intersects_per_thread 8 -template< typename T_C, typename T_A, typename T_B, typename T_X, typename T_Y, typename T_Z> +template< typename T_C, typename T_A, typename T_B> __global__ void AxB_dot3_phase3_mp ( int64_t start, @@ -83,6 +86,7 @@ __global__ void AxB_dot3_phase3_mp ) { + C->jumbled = true; T_A *Ax = (T_A*)A->x; T_B *Bx = (T_B*)B->x; T_C *Cx = (T_C*)C->x; @@ -93,7 +97,6 @@ __global__ void AxB_dot3_phase3_mp int64_t *Ap = A->p; int64_t *Bp = B->p; - // zombie count int zc = 0; @@ -125,14 +128,17 @@ __global__ void AxB_dot3_phase3_mp int64_t i = Mi[pair_id]; int64_t j = Ci[pair_id] >> 4; - int64_t xstart = Ap[j]; - int64_t xend = Ap[j+1]; + int64_t xstart = Ap[i]; + int64_t xend = Ap[i+1]; nnzA = xend - xstart; - int64_t ystart = Bp[i]; - int64_t yend = Bp[i+1]; + int64_t ystart = Bp[j]; + int64_t yend = Bp[j+1]; nnzB = yend - ystart; +// if(threadIdx.x == 0 && j == 139 && i == 945) +// printf("blk%d tid=%d, nnzA=%d, nnzB=%d\n", blockIdx.x, tid_global, nnzA, nnzB); +// n_intersect = GB_IMIN( xend -xstart, yend -ystart); /* if (threadIdx.x ==0 ) { @@ -197,7 +203,7 @@ __global__ void AxB_dot3_phase3_mp T_A aki; T_B bkj; - T_Z cij = GB_IDENTITY ; + T_C cij = GB_IDENTITY ; // TODO PLUS_PAIR_INT64, FP32, FP64: no need for cij_exists. // just check if cij > 0 @@ -208,34 +214,42 @@ __global__ void AxB_dot3_phase3_mp //merge-path dot product int k = tx_start; int l = ty_start; - while ( k < tx_end && l < ty_end ) + +// if(threadIdx.x == 0 && j == 139) { +// printf("blk%d, thd%d k=%d, l=%d, tx_start=%d, ty_start=%d, tx_end=%d, ty_end=%d\n", blockIdx.x, tid_global, k, l, tx_start, ty_start, tx_end, ty_end); +// } + + while ( k < tx_end && l < ty_end && nnzA != 0 && nnzB != 0) { - if (Ai [k] == Bi [l]) - { - GB_GETA ( aki=(T_Z)Ax[k] ) ; - GB_GETB ( bkj=(T_Z)Bx[l] ) ; - if (cij_exists) - { - T_Z t = GB_MULT( (T_Z)aki, (T_Z)bkj ); - GB_ADD_F (cij, t ) ; - //printf(" thd%d ix at %lld cij += %d * %d \n", tid_global, Ai[k], aki, bkj); - } - else - { - cij_exists = 1 ; - cij = GB_MULT ( (T_Z)aki, (T_Z)bkj ) ; - //printf(" thd%d ix at %lld cij = %d * %d \n", tid_global, Ai[k], Ax[k], Bx[l]); - } - // TODO check terminal condition - k+= 1; - l+= 1; - //printf(" block%u work value = %d, exists = %d\n", b, cij, cij_exists); - } - else - { + if (Ai [k] == Bi [l]) + { + GB_GETA ( aki=(T_C)Ax[k] ) ; + GB_GETB ( bkj=(T_C)Bx[l] ) ; + if (cij_exists) + { + T_C t = GB_MULT( (T_C)aki, (T_C)bkj ); + GB_ADD_F (cij, t ) ; +// if(j == 139 && i == 945) +// printf("blk%d thd%d ix at %lld %lld cij += %d * %d \n", blockIdx.x, tid_global, Ai[k], Bi[l], aki, bkj); + } + else + { + cij_exists = 1 ; + cij = GB_MULT ( (T_C)aki, (T_C)bkj ) ; +// if(j == 139 && i == 945) +// printf("blk%d thd%d ix at %lld %lld cij = %d * %d, k=%d, l=%d i=%lld j=%lld \n", blockIdx.x, tid_global, Ai[k], Bi[l], Ax[k], Bx[l], k, l, i, j); + } + // TODO check terminal condition + k+= 1; + l+= 1; +// if(j == 139 && i == 945) +// printf(" block%u work value = %d, exists = %d\n", b, cij, cij_exists); + } + else + { k += ( Ai[k] < Bi[l] ) ; l += ( Ai[k] > Bi[l] ) ; - } + } } //tile.sync( ) ; @@ -256,13 +270,13 @@ __global__ void AxB_dot3_phase3_mp if (cij_exists) { - cij = GB_reduce_sum( tile, cij ); + cij = GB_reduce_sum( tile, cij ); } // else has_zombies = 1; - //__syncthreads(); + //__syncthreads(); //tile.sync( ); // write result for this block to global mem if (tid == 0) @@ -270,13 +284,18 @@ __global__ void AxB_dot3_phase3_mp //printf ("final %d : %d exists = %d\n", b, cij, cij_exists) ; if (cij_exists) { - //printf(" cij = %d\n", cij); +// +// if(j == 139) { +// printf("what's the deal here? %d, %ld\n", cij, i); +// } + + //printf(" cij = %d\n", cij); GB_PUTC ( Cx[pair_id]=(T_C)cij ) ; GB_PUTC ( Ci[pair_id]=i ) ; } else { - //printf(" dot %d is a zombie\n", pair_id); + printf(" dot %d is a zombie\n", pair_id); zc++; GB_PUTC ( Ci[pair_id]=GB_FLIP (i) ) ; } @@ -288,9 +307,9 @@ __global__ void AxB_dot3_phase3_mp if( tid ==0 && zc > 0) { - //printf("warp %d zombie count = %d\n", blockIdx.x, zc); +// printf("warp %d zombie count = %d, nzombies = %d\n", blockIdx.x, zc, C->nzombies); atomicAdd( (unsigned long long int*)&(C->nzombies), (unsigned long long int)zc); - //printf(" Czombie = %lld\n",C->nzombies); +// printf(" Czombie = %lld\n",C->nzombies); } //__syncthreads(); diff --git a/GraphBLAS/CUDA/templates/GB_jit_AxB_dot3_phase3_spdn.cu b/GraphBLAS/CUDA/templates/GB_jit_AxB_dot3_phase3_spdn.cuh similarity index 74% rename from GraphBLAS/CUDA/templates/GB_jit_AxB_dot3_phase3_spdn.cu rename to GraphBLAS/CUDA/templates/GB_jit_AxB_dot3_phase3_spdn.cuh index 560f52d5b9..dbafd38b90 100644 --- a/GraphBLAS/CUDA/templates/GB_jit_AxB_dot3_phase3_spdn.cu +++ b/GraphBLAS/CUDA/templates/GB_jit_AxB_dot3_phase3_spdn.cuh @@ -15,6 +15,8 @@ // matrix *B <- B matrix to multiply, dense in sparse format? // int sz <- size hint for smaller vector //****************************************************************************** +#pragma once + #include #include #include @@ -43,7 +45,7 @@ __device__ T reduce_sum(thread_block_tile g, T val) } -template< typename T_C, typename T_A, typename T_B, typename T_X, typename T_Y, typename T_Z> +template< typename T_C, typename T_A, typename T_B> __global__ void AxB_dot3_phase3_spdn ( int64_t start, @@ -56,17 +58,15 @@ __global__ void AxB_dot3_phase3_spdn int sz ) { - T_A *Ax = (T_A*)A->x; - T_B *Bx = (T_B*)B->x; - T_C *Cx = (T_C*)C->x; - int64_t *Ci = C->i; - int64_t *Mi = M->i; - int64_t *Ai = A->i; - int64_t *Bi = B->i; - int64_t *Ap = A->p; - int64_t *Bp = B->p; - - C->jumbled = true; + const T_A *__restrict__ Ax = (T_A *)A->x ; + const T_B *__restrict__ Bx = (T_B *)B->x ; + T_C *__restrict__ Cx = (T_C *)C->x ; + int64_t *__restrict__ Ci = C->i ; + const int64_t *__restrict__ Mi = M->i ; + const int64_t *__restrict__ Ai = A->i ; + const int64_t *__restrict__ Bi = B->i ; + const int64_t *__restrict__ Ap = A->p ; + const int64_t *__restrict__ Bp = B->p ; // typedef cub::BlockReduce BlockReduce; // __shared__ typename BlockReduce::TempStorage temp_storage; @@ -76,9 +76,9 @@ __global__ void AxB_dot3_phase3_spdn int nvec = end - start; int dpt = nvec/32; m = dpt < m ? dpt : m; - if( threadIdx.x ==0) - printf("thd:%d %d dots/thrd, nvec = %d blockDim=%d\n",threadIdx.x, sz, nvec, blockDim.x); - __syncthreads(); +// if( threadIdx.x ==0) +// printf("thd:%d %d dots/thrd, nvec = %d blockDim=%d\n",threadIdx.x, sz, nvec, blockDim.x); +// __syncthreads(); int dots = (nvec +m -1)/m; // printf("dots=%d, m=%d, dpt=%d\n", dots, m, dpt); @@ -88,9 +88,9 @@ __global__ void AxB_dot3_phase3_spdn tid < dots; tid += blockDim.x * gridDim.x) { int pair_id, im; - if (threadIdx.x ==0) - printf("thd%u pi=%lld\n",tid, start+threadIdx.x); - __syncthreads(); +// if (threadIdx.x ==0) +// printf("thd%u pi=%lld\n",tid, start+threadIdx.x); +// __syncthreads(); for (pair_id = start+tid, im = 0; im < m && pair_id < end; @@ -101,22 +101,22 @@ __global__ void AxB_dot3_phase3_spdn // TODO: column of Ci / 16? int64_t j = Ci[pair_id] >> 4; // row number of C - printf("tid=%d, i=%lu, j=%lu\n", threadIdx.x, i, j); + //printf("tid=%d, i=%lu, j=%lu\n", threadIdx.x, i, j); - if (threadIdx.x ==0) - printf("thd%u i,j=%lld,%lld\n",tid, i,j); - __syncthreads(); +// if (threadIdx.x ==0) +// printf("thd%u i,j=%lld,%lld\n",tid, i,j); +// __syncthreads(); // Prime row offsets for both A and B - int64_t pA = Ap[j]; // row of C - int64_t pA_end = Ap[j+1]; + int64_t pA = Ap[i]; // row of C + int64_t pA_end = Ap[i+1]; int64_t nnzA = pA_end - pA; - int64_t pB = Bp[i]; // col of C - int64_t pB_end = Bp[i+1]; + int64_t pB = Bp[j]; // col of C + int64_t pB_end = Bp[j+1]; int64_t nnzB = pB_end - pB; T_A aki; T_B bkj; - T_Z cij; + T_C cij; int zombie_count = 0; @@ -132,14 +132,14 @@ __global__ void AxB_dot3_phase3_spdn */ int64_t k = Bi [pB] ; // first row index of B(:,j) // cij = A(k,i) * B(k,j) - GB_GETA ( aki=(T_Z)Ax[pA+k] ) ; // aki = A(k,i) - GB_GETB ( bkj=(T_Z)Bx[pB] ) ; // bkj = B(k,j) + GB_GETA ( aki=(T_C)Ax[pA+k] ) ; // aki = A(k,i) + GB_GETB ( bkj=(T_C)Bx[pB] ) ; // bkj = B(k,j) // TODO: Check tha GB_C_MULT applies the identity automatically since cij has not been initialized GB_C_MULT ( cij, aki, bkj ) ; // cij = aki * bkj - printf("A_dense: tid=%d, pair_id=%d, i=%lu, j=%lu, nnzA=%lu, nnzB=%lu, k[B]=%lu, aki=%d, bkj=%d, cij=%d\n", threadIdx.x, pair_id, i, j, nnzA, nnzB, k, aki, bkj, cij); + //printf("A_dense: tid=%d, pair_id=%d, i=%lu, j=%lu, nnzA=%lu, nnzB=%lu, k[B]=%lu, aki=%d, bkj=%d, cij=%d\n", threadIdx.x, pair_id, i, j, nnzA, nnzB, k, aki, bkj, cij); /** * @@ -149,10 +149,10 @@ __global__ void AxB_dot3_phase3_spdn //GB_DOT_TERMINAL (cij) ; // break if cij == terminal int64_t k = Bi [p] ; // next row index of B(:,j) // cij += A(k,i) * B(k,j) - GB_GETA ( aki=(T_Z)Ax[pA+k] ) ; // aki = A(k,i) - GB_GETB ( bkj=(T_Z)Bx[p] ) ; // bkj = B(k,j) + GB_GETA ( aki=(T_C)Ax[pA+k] ) ; // aki = A(k,i) + GB_GETB ( bkj=(T_C)Bx[p] ) ; // bkj = B(k,j) GB_MULTADD ( cij, aki, bkj ) ; // cij += aki * bkj - printf("in_loop: tid=%d, pair_id=%d, i=%lu, j=%lu, nnzA=%lu, nnzB=%lu, k[B]=%lu, aki=%d, bkj=%d, cij=%d\n", threadIdx.x, pair_id, i, j, nnzA, nnzB, k, aki, bkj, cij); + //printf("in_loop: tid=%d, pair_id=%d, i=%lu, j=%lu, nnzA=%lu, nnzB=%lu, k[B]=%lu, aki=%d, bkj=%d, cij=%d\n", threadIdx.x, pair_id, i, j, nnzA, nnzB, k, aki, bkj, cij); } } @@ -160,23 +160,23 @@ __global__ void AxB_dot3_phase3_spdn { int64_t k = Ai [pA] ; // first col index of A(i, :) // cij = A(i,k) * B(j,k) - GB_GETA ( aki=(T_Z)Ax[ pA ] ) ; // aki = A(i,k) + GB_GETA ( aki=(T_C)Ax[ pA ] ) ; // aki = A(i,k) // Jump straight to position in B vector (since we know it's dense) - GB_GETB ( bkj=(T_Z)Bx[ pB+k ] ) ; // bkj = B(k,j) + GB_GETB ( bkj=(T_C)Bx[ pB+k ] ) ; // bkj = B(k,j) GB_C_MULT ( cij, aki, bkj) ; // cij = aki * bkj - printf("B_dense: tid=%d, pair_id=%d, i=%lu, j=%lu, nnzA=%lu, nnzB=%lu, k[B]=%lu, aki=%d, bkj=%d, cij=%d\n", threadIdx.x, pair_id, i, j, nnzA, nnzB, k, aki, bkj, cij); + //printf("B_dense: tid=%d, pair_id=%d, i=%lu, j=%lu, nnzA=%lu, nnzB=%lu, k[B]=%lu, aki=%d, bkj=%d, cij=%d\n", threadIdx.x, pair_id, i, j, nnzA, nnzB, k, aki, bkj, cij); for (int64_t p = pA+1 ; p < pA_end ; ++p) { //GB_DOT_TERMINAL (cij) ; // break if cij == terminal int64_t k = Ai [p] ; // next row index of A(:,i) // cij += A(k,i) * B(k,j) - GB_GETA ( aki=(T_Z)Ax[ p ] ) ; // aki = A(i,k) - GB_GETB ( bkj=(T_Z)Bx[ pB+k] ) ; // bkj = B(j,k) + GB_GETA ( aki=(T_C)Ax[ p ] ) ; // aki = A(i,k) + GB_GETB ( bkj=(T_C)Bx[ pB+k] ) ; // bkj = B(j,k) GB_MULTADD ( cij, aki, bkj) ; // cij += aik * bjk - printf("in_loop: tid=%d, pair_id=%d, i=%lu, j=%lu, nnzA=%lu, nnzB=%lu, k[B]=%lu, aki=%d, bkj=%d, cij=%d\n", threadIdx.x, pair_id, i, j, nnzA, nnzB, k, aki, bkj, cij); + //printf("in_loop: tid=%d, pair_id=%d, i=%lu, j=%lu, nnzA=%lu, nnzB=%lu, k[B]=%lu, aki=%d, bkj=%d, cij=%d\n", threadIdx.x, pair_id, i, j, nnzA, nnzB, k, aki, bkj, cij); } } // C(i,j) = A(:,i) * B(:,j) @@ -196,8 +196,8 @@ __global__ void AxB_dot3_phase3_spdn // // cij = A(k,i) * B(k,j) // //// printf("tid=%d, A is dense, k=%ld, i=%ld\n", threadIdx.x, k, i); -// GB_GETA ( aki=(T_Z)Ax[pA + i] ) ; // aki = A(k,i) -// GB_GETB ( bkj=(T_Z)Bx[pB] ) ; // bkj = B(k,j) +// GB_GETA ( aki=(T_C)Ax[pA + i] ) ; // aki = A(k,i) +// GB_GETB ( bkj=(T_C)Bx[pB] ) ; // bkj = B(k,j) // cij = GB_MULT(aki, bkj ) ; // cij = aki * bkj // // } @@ -218,8 +218,8 @@ __global__ void AxB_dot3_phase3_spdn // // cij = A(k,i) * B(k,j) // //// printf("tid=%d, A is dense, k=%ld, i=%ld\n", threadIdx.x, k, i); -// GB_GETA ( aki=(T_Z)Ax[pA + i] ) ; // aki = A(k,i) -// GB_GETB ( bkj=(T_Z)Bx[pB] ) ; // bkj = B(k,j) +// GB_GETA ( aki=(T_C)Ax[pA + i] ) ; // aki = A(k,i) +// GB_GETB ( bkj=(T_C)Bx[pB] ) ; // bkj = B(k,j) // cij = GB_MULT(aki, bkj ) ; // cij = aki * bkj // // for (int64_t p = pB+1 ; p < pB_end ; p++) @@ -227,8 +227,8 @@ __global__ void AxB_dot3_phase3_spdn // //GB_DOT_TERMINAL (cij) ; // break if cij == terminal // int64_t k = Bi [p] ; // next row index of B(:,j) // // cij += A(k,i) * B(k,j) -// GB_GETA ( aki=(T_Z)Ax[A->vlen * i + k] ) ; // aki = A(k,i) -// GB_GETB ( bkj=(T_Z)Bx[p] ) ; // bkj = B(k,j) +// GB_GETA ( aki=(T_C)Ax[A->vlen * i + k] ) ; // aki = A(k,i) +// GB_GETB ( bkj=(T_C)Bx[p] ) ; // bkj = B(k,j) // cij = GB_ADD ( cij, GB_MULT(aki, bkj ) ) ; // cij += aki * bkj // } // } @@ -261,8 +261,8 @@ __global__ void AxB_dot3_phase3_spdn // int64_t k = Ai [pA] ; // first row index of A(:,i) //// printf("tid=%d, B is dense, k=%ld, j=%ld\n", threadIdx.x, k, j); // // cij = A(k,i) * B(k,j) -// GB_GETA ( aki= (T_Z)Ax[ pA ] ) ; // aki = A(k,i) -// GB_GETB ( bkj=(T_Z)Bx[ B->vlen*k+j ] ) ; // bkj = B(k,j) +// GB_GETA ( aki= (T_C)Ax[ pA ] ) ; // aki = A(k,i) +// GB_GETB ( bkj=(T_C)Bx[ B->vlen*k+j ] ) ; // bkj = B(k,j) // // cij = GB_MULT(aki, bkj) ; // cij = aki * bkj //// printf("aki=%d, bkj=%d, cij=%d\n", aki, bkj, cij); @@ -272,8 +272,8 @@ __global__ void AxB_dot3_phase3_spdn // //GB_DOT_TERMINAL (cij) ; // break if cij == terminal // int64_t k = Ai [p] ; // next row index of A(:,i) // // cij += A(k,i) * B(k,j) -// GB_GETA ( aki=(T_Z)Ax[ p ] ) ; // aki = A(k,i) -// GB_GETB ( bkj=(T_Z)Bx[ B->vlen*k+j] ) ; // bkj = B(k,j) +// GB_GETA ( aki=(T_C)Ax[ p ] ) ; // aki = A(k,i) +// GB_GETB ( bkj=(T_C)Bx[ B->vlen*k+j] ) ; // bkj = B(k,j) // cij = GB_ADD ( cij, GB_MULT(aki, bkj) ); // cij += aki * bkj //// printf("aki=%d, bkj=%d, cij=%d\n", aki, bkj, cij); // } diff --git a/GraphBLAS/CUDA/templates/GB_jit_AxB_dot3_phase3_vssp.cu b/GraphBLAS/CUDA/templates/GB_jit_AxB_dot3_phase3_vssp.cuh similarity index 96% rename from GraphBLAS/CUDA/templates/GB_jit_AxB_dot3_phase3_vssp.cu rename to GraphBLAS/CUDA/templates/GB_jit_AxB_dot3_phase3_vssp.cuh index 106dec76b8..33d651f395 100644 --- a/GraphBLAS/CUDA/templates/GB_jit_AxB_dot3_phase3_vssp.cu +++ b/GraphBLAS/CUDA/templates/GB_jit_AxB_dot3_phase3_vssp.cuh @@ -30,11 +30,11 @@ // GrB_Matrix M <- mask matrix // GrB_Matrix A <- input matrix A // GrB_Matrix B <- input matrix B +#pragma once #include #include #include -//#include "GB_binary_search.h" #include "matrix.h" // Using tile size fixed at compile time, we don't need shared memory @@ -56,7 +56,7 @@ __device__ T reduce_sum(thread_block_tile g, T val) #define intersects_per_thread 8 -template< typename T_C, typename T_A, typename T_B, typename T_X, typename T_Y, typename T_Z> +template< typename T_C, typename T_A, typename T_B> __global__ void AxB_dot3_phase3_vssp ( int64_t start, @@ -111,12 +111,12 @@ __global__ void AxB_dot3_phase3_vssp continue; } - int64_t pA = Ap[j]; - int64_t pA_end = Ap[j+1]; + int64_t pA = Ap[i]; + int64_t pA_end = Ap[i+1]; int64_t nnzA = pA_end - pA; - int64_t pB = B->p[i]; - int64_t pB_end = B->p[i+1]; + int64_t pB = B->p[j]; + int64_t pB_end = B->p[j+1]; int64_t nnzB = pB_end - pB; //Search for each nonzero in the smaller vector to find intersection @@ -124,7 +124,7 @@ __global__ void AxB_dot3_phase3_vssp T_A aki; T_B bkj; - T_Z cij; + T_C cij; if (nnzA <= nnzB) { //---------------------------------------------------------------------- diff --git a/GraphBLAS/CUDA/templates/GB_jit_AxB_dot3_phase3_vsvs.cu b/GraphBLAS/CUDA/templates/GB_jit_AxB_dot3_phase3_vsvs.cuh similarity index 73% rename from GraphBLAS/CUDA/templates/GB_jit_AxB_dot3_phase3_vsvs.cu rename to GraphBLAS/CUDA/templates/GB_jit_AxB_dot3_phase3_vsvs.cuh index bf3f431622..698136b6c8 100644 --- a/GraphBLAS/CUDA/templates/GB_jit_AxB_dot3_phase3_vsvs.cu +++ b/GraphBLAS/CUDA/templates/GB_jit_AxB_dot3_phase3_vsvs.cuh @@ -19,13 +19,14 @@ // Blocksize is 1024, uses warp and block reductions to count zombies produced. //****************************************************************************** + +#pragma once #define GB_CUDA_KERNEL #include #include #include #include #include "matrix.h" -//#include "GB_binary_search.h" using namespace cooperative_groups; @@ -60,6 +61,8 @@ __inline__ __device__ T block_ReduceSum(thread_block g, T val) { static __shared__ T shared[warpSize]; // Shared mem for 32 partial sums + + int lane = threadIdx.x & 31 ; // % warpSize; int wid = threadIdx.x >> 5 ; // / warpSize; thread_block_tile tile = tiled_partition( g ); @@ -70,40 +73,44 @@ T block_ReduceSum(thread_block g, T val) // Wait for all partial reductions if (lane==0) shared[wid]=val; // Write reduced value to shared memory __syncthreads(); // Wait for all partial reductions + for(int i = threadIdx.x; i < warpSize; i+= blockDim.x) { + printf("blockIdx.x=%d, wid=%d, val=%lld\n", blockIdx.x, i, shared[i]); + } - if (wid > 0 || gridDim.x == 1 ) return val; +// if (wid > 0 || gridDim.x == 1 ) return val; //read from shared memory only if that warp existed val = (threadIdx.x < (blockDim.x / warpSize ) ) ? shared[lane] : 0; - //printf("thd%d warp loaded val = %d\n", threadIdx.x, lane, val); + printf("thd%d warp loaded val = %d\n", threadIdx.x, lane, val); if (wid==0) val = warp_ReduceSumPlus( tile, val); //Final reduce within first warp return val; } -template< typename T_C, typename T_A, typename T_B, typename T_X, typename T_Y, typename T_Z> +template< typename T_C, typename T_A, typename T_B> __global__ void AxB_dot3_phase3_vsvs ( - const int64_t start, - const int64_t end, - const int64_t *__restrict__ Bucket, - const GrB_Matrix C, - const GrB_Matrix M, - const GrB_Matrix A, - const GrB_Matrix B, - const int sz + int64_t start, + int64_t end, + int64_t *Bucket, + GrB_Matrix C, + GrB_Matrix M, + GrB_Matrix A, + GrB_Matrix B, + int sz ) { +// printf("start=%lu, end=%lu\n", start, end); int dots = end - start; - // sz = expected non-zeros per dot - /* - int m = (gridDim.x*blockDim.x)*256/sz; - int dpt = (nvecs+ gridDim.x*blockDim.x -1)/(gridDim.x*blockDim.x); - m = dpt < m ? dpt : m; - - int dots = (nvecs +m -1)/m; - */ + // sz = expected non-zeros per dot +// /* +// int m = (gridDim.x*blockDim.x)*256/sz; +// int dpt = (nvecs+ gridDim.x*blockDim.x -1)/(gridDim.x*blockDim.x); +// m = dpt < m ? dpt : m; +// +// int dots = (nvecs +m -1)/m; +// */ const T_A *__restrict__ Ax = (T_A *)A->x ; const T_B *__restrict__ Bx = (T_B *)B->x ; T_C *__restrict__ Cx = (T_C *)C->x ; @@ -116,14 +123,18 @@ __global__ void AxB_dot3_phase3_vsvs int pfirst, plast; - GB_PARTITION (pfirst, plast, dots, blockIdx.x, gridDim.x ) ; - if( threadIdx.x ==0 ) - { - printf("block%d %d dots/thrd, start,end = %ld,%ld pf,pl=%d,%d blockDim=%d\n", - blockIdx.x, (dots + blockDim.x*gridDim.x -1)/(blockDim.x*gridDim.x), - start, end, pfirst, plast, blockDim.x); - } - __syncthreads(); + //#define GB_PARTITION(k1,k2,n,tid,nthreads) \ + + GB_PARTITION (pfirst, plast, dots, blockIdx.x, gridDim.x ) ; +// if( threadIdx.x ==0 ) +// { +// if( threadIdx.x ==0 ) +// { +// printf("block%d %d dots/thrd, start,end = %ld,%ld pf,pl=%d,%d blockDim=%d\n", +// blockIdx.x, (dots + blockDim.x*gridDim.x -1)/(blockDim.x*gridDim.x), +// start, end, pfirst, plast, blockDim.x); +// } +// __syncthreads(); int zc = 0 ; @@ -137,24 +148,24 @@ __global__ void AxB_dot3_phase3_vsvs tid < plast; tid += blockDim.x ) { - - pair_id = Bucket[ start + tid ]; + pair_id = Bucket[ start + tid ]; int64_t i = Mi [pair_id] ; int64_t j = Ci [pair_id]>>4 ; - + if (j < 0) continue; //don't operate on zombies + printf("start=%d, tid=%d, pair_id=%lu, (i,j)=%lu,%lu\n", pfirst, tid, pair_id,i,j); int64_t pA = Ap[i] ; int64_t pA_end = Ap[i+1] ; - int64_t pB = Bp[j] ; - int64_t pB_end = Bp[j+1] ; + int64_t pB = Bp[j] ; + int64_t pB_end = Bp[j+1] ; T_A aki; T_B bkj; - T_Z cij ; + T_C cij ; bool cij_exists = false; - while (pA < pA_end && pB < pB_end) + while (pA < pA_end && pB < pB_end ) { int64_t ia = Ai [pA] ; int64_t ib = Bi [pB] ; @@ -184,7 +195,7 @@ __global__ void AxB_dot3_phase3_vsvs GB_PUTC ( Cx[pair_id] = (T_C)cij ) ; } else{ - //printf(" %lld, %lld is zombie %d!\n",i,j,zc); + printf(" %lld, %lld is zombie %d!\n",i,j,zc); zc++; GB_PUTC( Ci[pair_id] = GB_FLIP( i ) ) ; } @@ -192,15 +203,14 @@ __global__ void AxB_dot3_phase3_vsvs __syncthreads(); - //printf("thd%d zombie count = %d\n",threadIdx.x,zc); - zc = block_ReduceSum( this_thread_block(), zc); + printf("thd%d zombie count = %d\n",threadIdx.x,zc); + zc = block_ReduceSum( this_thread_block(), zc); __syncthreads(); - if( threadIdx.x == 0 && zc > 0) { - //printf("block%d zombie count = %d\n", blockIdx.x, zc); + printf("block%d zombie count = %d\n", blockIdx.x, zc); atomicAdd( (unsigned long long int*)&(C->nzombies), (unsigned long long int)zc); - //C->nzombies += (unsigned long long int)zc; - //printf("blk:%d Czombie = %lld\n", blockIdx.x,C->nzombies); +// C->nzombies += (unsigned long long int)zc; + printf("blk:%d Czombie = %lld\n", blockIdx.x,C->nzombies); } - + } diff --git a/GraphBLAS/CUDA/templates/GB_jit_AxB_dot3_phase3_warpix.cu b/GraphBLAS/CUDA/templates/GB_jit_AxB_dot3_phase3_warpix.cuh similarity index 98% rename from GraphBLAS/CUDA/templates/GB_jit_AxB_dot3_phase3_warpix.cu rename to GraphBLAS/CUDA/templates/GB_jit_AxB_dot3_phase3_warpix.cuh index 03a0cf8b06..2386148cc4 100644 --- a/GraphBLAS/CUDA/templates/GB_jit_AxB_dot3_phase3_warpix.cu +++ b/GraphBLAS/CUDA/templates/GB_jit_AxB_dot3_phase3_warpix.cuh @@ -30,6 +30,8 @@ // matrix *M <- mask matrix // matrix *A <- input matrix A // matrix *B <- input matrix B + +#pragma once #define GB_CUDA_KERNEL #include #include @@ -70,7 +72,7 @@ T reduce_plus(thread_block_tile g, T val) #define intersects_per_thread 8 -template< typename T_C, typename T_A, typename T_B, typename T_X, typename T_Y, typename T_Z> +template< typename T_C, typename T_A, typename T_B> __global__ void AxB_dot3_phase3_warpix ( int64_t start, @@ -83,7 +85,6 @@ __global__ void AxB_dot3_phase3_warpix int sz ) { - T_A *__restrict__ Ax = (T_A*)A->x; T_B *__restrict__ Bx = (T_B*)B->x; T_C *__restrict__ Cx = (T_C*)C->x; @@ -373,9 +374,9 @@ __global__ void AxB_dot3_phase3_warpix if( zc > 0) { - //printf("warp %d zombie count = %d\n", blockIdx.x, zc); + printf("warp %d zombie count = %d\n", blockIdx.x, zc); atomicAdd( (unsigned long long int*)&(C->nzombies), (unsigned long long int)zc); - //printf("blk:%d Czombie = %lld\n",blockIdx.x,C->zombies); + printf("blk:%d Czombie = %lld\n",blockIdx.x,C->zombies); } } diff --git a/GraphBLAS/CUDA/templates/GB_jit_AxB_phase1.cu b/GraphBLAS/CUDA/templates/GB_jit_AxB_phase1.cuh similarity index 86% rename from GraphBLAS/CUDA/templates/GB_jit_AxB_phase1.cu rename to GraphBLAS/CUDA/templates/GB_jit_AxB_phase1.cuh index 99c5555443..e49b97dc66 100644 --- a/GraphBLAS/CUDA/templates/GB_jit_AxB_phase1.cu +++ b/GraphBLAS/CUDA/templates/GB_jit_AxB_phase1.cuh @@ -6,13 +6,13 @@ // This kernel scans the non-zero pattern in A and B, takes into account the // mask and computes total work required to form C. Then it classifies each // dot product into a set of buckets for efficient compute. +#pragma once #define GB_CUDA_KERNEL #include -//#include #include "matrix.h" #include "GB_cuda_buckets.h" -#include "local_cub/block/block_scan.cuh" +#include //------------------------------------------------------------------------------ // GB_bucket_assignment @@ -58,7 +58,7 @@ __device__ static inline GB_bucket_code GB_bucket_assignment // entry in A(:,i) comes before the first entry in B(:,j), or visa // versa, then there is no work to do since C(i,j) must be a zombie. - // GB_BUCKET (ia_last < ib_first || ib_last < ia_first, GB_BUCKET_ZOMBIE); + //GB_BUCKET (ia_last < ib_first || ib_last < ia_first, GB_BUCKET_ZOMBIE); } // else if (bjnz == vlen && ainz == vlen && vlen > 256) @@ -205,7 +205,7 @@ __device__ static inline GB_bucket_code GB_bucket_assignment // The kernel also computes Ci, of size nnz(C), which contains the // zombie assignment or bucket assignment for non-zombies in C. -template +template __global__ void AxB_phase1 ( // outputs, preallocated in global memory: @@ -310,21 +310,55 @@ __global__ void AxB_phase1 __shared__ int64_t Mps[pointerchunk]; __shared__ int64_t ks [chunksize]; + __syncthreads(); + if (threadIdx.x==0 && blockIdx.x == 0) + { +// printf ("Here in phase1, what I see is this:\n") ; +// printf ("MX(pM) is: %s\n", GB_XSTR (MX (pM))) ; +// printf ("GB_MULT(x,y) is: %s\n", GB_XSTR (GB_MULT (x,y))) ; +// printf ("GB_ADD(x,y) is: %s\n", GB_XSTR (GB_ADD (x,y))) ; + // #define GB_GETA(blob) + // #define GB_GETB(blob) + // #define GB_MULT(x,y) (1) + // #define GB_ADD(x,y) ((x) + (y)) + // #define GB_IDENTITY (0) + // #define GB_TERMINAL_CONDITION(cij) (false) + // #define GB_IF_TERMINAL_BREAK + // #define GB_PUTC(blob) blob + // #define GB_MTYPE void + // #define MX(p) true + // #define GB_MASK_COMP false + // #define GB_NO_MASK false + // #define GB_C_IS_SPARSE 1 + // #define GB_C_IS_HYPER 0 + // #define GB_C_IS_BITMAP 0 + // #define GB_C_IS_FULL 0 + // #define GB_M_IS_SPARSE 1 + // #define GB_M_IS_HYPER 0 + // #define GB_M_IS_BITMAP 0 + // #define GB_M_IS_FULL 0 + // #define GB_A_IS_SPARSE 1 + // #define GB_A_IS_HYPER 0 + // #define GB_A_IS_BITMAP 0 + // #define GB_A_IS_FULL 0 + // #define GB_B_IS_SPARSE 1 + // #define GB_B_IS_HYPER 0 + // #define GB_B_IS_BITMAP 0 + // #define GB_B_IS_FULL 0 + } + __syncthreads(); + //-------------------------------------------------------------------------- // compute the task descriptor //-------------------------------------------------------------------------- // all threads in this block will compute the same values for these: - int32_t pfirst, plast, kfirst, klast ; - /* - for ( int tid_global = threadIdx.x + blockIdx.x * blockDim.x ; - tid_global < (mnvec+ 7)/8 ; - tid_global += blockDim.x*gridDim.x) - */ - int chunk_max= (mnz + chunksize -1)/chunksize; - for ( int chunk = blockIdx.x; - chunk < chunk_max; - chunk += gridDim.x ) + int64_t pfirst, plast, kfirst, klast ; + + int64_t chunk_max= (mnz + chunksize -1)/chunksize; + for ( int64_t chunk = blockIdx.x; + chunk < chunk_max; + chunk += gridDim.x ) { // The slice for each task contains entries pfirst:plast-1 of M and C. @@ -332,12 +366,12 @@ __global__ void AxB_phase1 pfirst = chunksize * chunk ; plast = GB_IMIN( chunksize * (chunk+1), mnz ) ; - int chunk_end; + int64_t chunk_end; if ( mnz > chunksize) chunk_end = GB_IMIN( chunksize, mnz - chunksize*(chunk) ) ; else chunk_end = mnz; - // find the first vector of the slice for task tid_global: the + // find the first vector of the slice for this chunk: the // vector that owns the entry Ai [pfirst] and Ax [pfirst]. kfirst = GB_search_for_vector_device (pfirst, Mp, 0, mnvec, mvlen) ; //if( pfirst ==0) kfirst = 0; @@ -346,27 +380,36 @@ __global__ void AxB_phase1 // vector that owns the entry Ai [plast-1] and Ax [plast-1]. klast = GB_search_for_vector_device (plast-1, Mp, kfirst, mnvec, mvlen) ; - int k_end = GB_IMIN( pointerchunk , klast - kfirst +2 ) ; - /* - if( threadIdx.x ==0) - { - printf("chunk%d pfirst,plast,ch_end =%d,%d,%d kfirst,klast,kend = %d,%d,%d\n", - chunk, pfirst, plast, chunk_end, kfirst, klast, k_end ) ; - } + int64_t k_end = GB_IMIN( pointerchunk , klast - kfirst +2 ) ; + +// if( threadIdx.x ==0) +// { +// printf("chunk%ld pfirst,plast,ch_end =%ld,%ld,%ld kfirst,klast,kend = %ld,%ld,%ld\n", +// chunk, pfirst, plast, chunk_end, kfirst, klast, k_end ) ; +// } __syncthreads(); - */ + // load pointer values for this chunk - for ( int i = threadIdx.x; i< k_end; i+= blockDim.x) + for ( int64_t i = threadIdx.x; i< k_end; i+= blockDim.x) { Mps[i] = Mp[i + kfirst]; } __syncthreads(); + if (threadIdx.x == 0) + { +// for (int64_t i = 0 ; i < k_end ; i++) +// { +// printf ("Mps [%d] = %ld\n", i, Mps [i]) ; +// } + } + __syncthreads(); + // search for k values for each entry float slope = (float)(mnvec)/(float)(mnz* chunksize) ; - for ( int i = threadIdx.x; i< chunk_end; i+= blockDim.x) + for ( int64_t i = threadIdx.x; i< chunk_end; i+= blockDim.x) { ks[i] = kfirst + slope*( float )(i); while ( Mps[ ks[i] - kfirst + 1 ] <= (i+pfirst) ) @@ -375,6 +418,14 @@ __global__ void AxB_phase1 ks[i]--; } __syncthreads(); + if (threadIdx.x == 0) + { +// for (int64_t i = 0 ; i < chunksize ; i++) +// { +// printf ("ks [%d] = %ld\n", i, ks [i]) ; +// } + } + __syncthreads(); //ASSERT (0 <= kfirst && kfirst <= klast && klast < mnvec) ; @@ -385,8 +436,6 @@ __global__ void AxB_phase1 } __syncthreads(); */ - - //-------------------------------------------------------------------------- // assign entries in C(i,j) to the buckets @@ -403,19 +452,18 @@ __global__ void AxB_phase1 //int32_t pM_start, pM_end ; //for (int64_t pM = pfirst + threadIdx.x ; pM < plast ; pM += blockDim.x) - int32_t i,j; - int32_t k = kfirst ; //for (int64_t pM = pfirst; pM < plast; pM++ ) - for ( int pM = pfirst + threadIdx.x; - pM < pfirst + chunk_end; - pM += blockDim.x ) + for ( int64_t pM = pfirst + threadIdx.x; + pM < pfirst + chunk_end; + pM += blockDim.x ) { GB_bucket_code bucket = GB_BUCKET_ZOMBIE ; - k = ks[ pM - pfirst ] ; + int64_t k = ks[ pM - pfirst ] ; //k += ( pM == Mp[k+1] ) ; - //printf ("tid%d k %ld pM %ld\n", tid_global, k, pM; - i = Mi [ pM ] ; +// printf ("tid%d k %ld pM %ld MX(pM): %d\n", threadIdx.x, k, pM, MX (pM)); + int64_t i = Mi [ pM ] ; +int64_t j = k ; // HACK, does not need to be initialized here if ( MX ( pM ) ) { @@ -466,7 +514,9 @@ pA_end = Ap [i+1] ; //------------------------------------------------------ //bucket = GB_BUCKET_MERGEPATH ; - bucket= GB_bucket_assignment ( ainz, bjnz, bvlen) ; + bucket= GB_bucket_assignment ( ainz, bjnz, bvlen) ; +// printf ("tid%d i %ld j %ld ainz %ld bjnz %ld: bucket %d\n", +// threadIdx.x, i, j, ainz, bjnz, (int) bucket) ; } } } @@ -474,7 +524,7 @@ pA_end = Ap [i+1] ; if (bucket == GB_BUCKET_ZOMBIE) { // mark C(i,j) is a zombie - //printf ("tid%d pM=%d %d,%d prezombie\n",threadIdx.x,pM,i,j) ; +// printf ("tid%d pM=%d %d,%d prezombie\n",threadIdx.x,pM,i,j) ; Ci [pM] = GB_FLIP (i) << 4 ; // GB_BUCKET_COUNT (GB_BUCKET_ZOMBIE) ; my_bucket_0++ ; //0 is the zombie bucket @@ -484,7 +534,7 @@ pA_end = Ap [i+1] ; // place C(i,j) in its bucket Ci [pM] = (k << 4) + bucket ; GB_BUCKET_COUNT (bucket) ; - //printf ("tid%d pM=%d %d,%d b=%d\n",threadIdx.x, pM, i,j, (int)bucket) ; +// printf ("tid%d pM=%d %d,%d b=%d\n",threadIdx.x, pM, i,j, (int)bucket) ; } } @@ -508,26 +558,18 @@ pA_end = Ap [i+1] ; nanobuckets + blockIdx.x * (NBUCKETS * blockDim.x) + threadIdx.x ; #define CUMSUM_AND_STORE_NANOBUCKET(bucket) \ - BlockCumSum(temp_storage).ExclusiveSum \ - ( my_bucket_ ## bucket, my_bucket_ ## bucket) ; \ - __syncthreads(); \ if( threadIdx.x == blockDim.x-1) \ blockbucket [blockIdx.x + bucket * gridDim.x] = \ my_bucket_ ## bucket ; \ + BlockCumSum(temp_storage).ExclusiveSum \ + ( my_bucket_ ## bucket, my_bucket_ ## bucket) ; \ + __syncthreads(); \ nanobucket [bucket * blockDim.x] = my_bucket_ ## bucket ; CUMSUM_AND_STORE_NANOBUCKET (0) ; CUMSUM_AND_STORE_NANOBUCKET (1) ; CUMSUM_AND_STORE_NANOBUCKET (2) ; -// CUMSUM_AND_STORE_NANOBUCKET (3) ; - if( threadIdx.x == blockDim.x-1) - blockbucket [blockIdx.x + 3 * gridDim.x] = - my_bucket_3 ; - BlockCumSum(temp_storage).ExclusiveSum - ( my_bucket_3, my_bucket_3) ; - __syncthreads(); \ - nanobucket [3 * blockDim.x] = my_bucket_3 ; - + CUMSUM_AND_STORE_NANOBUCKET (3) ; CUMSUM_AND_STORE_NANOBUCKET (4) ; CUMSUM_AND_STORE_NANOBUCKET (5) ; CUMSUM_AND_STORE_NANOBUCKET (6) ; @@ -538,7 +580,7 @@ pA_end = Ap [i+1] ; CUMSUM_AND_STORE_NANOBUCKET (11) ; /* - if(threadIdx.x +blockIdx.x*blockDim.x <= mnvec) //blockDim.x -1){ + if(threadIdx.x +blockIdx.x*blockDim.x <= mnvec) //blockDim.x -1) { printf("thd %d blk%d nbucket0 has %ld prev\n",threadIdx.x, blockIdx.x, nanobucket[0]); printf("thd %d blk%d nbucket1 has %ld prev\n",threadIdx.x, blockIdx.x, nanobucket[1*blockDim.x]); @@ -563,7 +605,7 @@ pA_end = Ap [i+1] ; // Note that this write to global memory is not coalesced. #define STORE_GLOBAL_BUCKET_COUNT(bucket) \ - blockbucket [blockIdx.x + bucket * gridDim.x] += \ + blockbucket [bucket * gridDim.x + blockIdx.x] += \ my_bucket_ ## bucket ; if (threadIdx.x == blockDim.x - 1 ) diff --git a/GraphBLAS/CUDA/templates/GB_jit_AxB_phase2.cu b/GraphBLAS/CUDA/templates/GB_jit_AxB_phase2.cu deleted file mode 100644 index 55b6b6b44a..0000000000 --- a/GraphBLAS/CUDA/templates/GB_jit_AxB_phase2.cu +++ /dev/null @@ -1,442 +0,0 @@ -//------------------------------------------------------------------------------ -// templates/GB_AxB_cuda_dot3_phase2: fill the global buckets -//------------------------------------------------------------------------------ - -// TODO describe me - -#define GB_CUDA_KERNEL - - -//#include -#include "GB_cuda_buckets.h" -#include "matrix.h" -#include -#include "local_cub/block/block_scan.cuh" - -using namespace cooperative_groups; - -// A stateful callback functor that maintains a running prefix to be applied -// during consecutive scan operations. -struct BlockPrefixCallbackOp -{ - // Running prefix - int64_t running_total; - // Constructor - __device__ BlockPrefixCallbackOp(int64_t running_total) : running_total(running_total) {} - - // Callback operator to be entered by the first warp of threads in the block. - // Thread-0 is responsible for returning a value for seeding the block-wide scan. - __device__ int64_t operator()(int64_t block_aggregate) - { - int64_t old_prefix = running_total; - running_total += block_aggregate; - return old_prefix; - } -}; - -__inline__ -__device__ void blockBucketExclusiveSum(int bucketId, int64_t *d_data, int nblocks) -{ - #define blocksize 32 - - // Specialize BlockScan for a 1D block of 32 threads - typedef cub::BlockScan BlockScan; - - // Allocate shared memory for BlockScan - __shared__ typename BlockScan::TempStorage temp_storage; - - // Initialize running total - BlockPrefixCallbackOp prefix_op(0); - - // Have the block iterate over segments of items - int64_t data=0; - - int64_t *blockbucket= d_data; - - for (int block_id = 0; block_id < nblocks; block_id += blocksize) - { - // Load a segment of consecutive items that are blocked across threads - - //printf("block %d entering sum\n",blockIdx.x); - int loc = block_id + threadIdx.x; - if ( loc < nblocks) - { - //printf("block %di loading tid=%d\n",block_id,tid); - data = blockbucket[bucketId*nblocks +loc ] ; - } - __syncthreads(); - - //printf("bb%d_%d s0 before prefix= %ld \n", block_id,bucketId, - // blockbucket[bucketId*nblocks + block_id+threadIdx.x] ) ; - // Collectively compute the block-wide exclusive prefix sum - BlockScan(temp_storage).ExclusiveSum( data, data, prefix_op); - __syncthreads(); - - if ( loc < nblocks) - { - blockbucket[bucketId*nblocks +loc ] = data ; - } - __syncthreads(); - - //printf("bb%d_%d = %ld \n", block_id, bucketId, blockbucket[bucketId*nblocks+block_id+threadIdx.x] ) ; - - data = 0; - } -} - - -template< typename T, int tile_sz> -__inline__ __device__ -T warp_ReduceSumPlus( thread_block_tile tile, T val) -{ - // Each iteration halves the number of active threads - // Each thread adds its partial sum[i] to sum[lane+i] - for (int i = tile.size() / 2; i > 0; i /= 2) { - val += tile.shfl_down( val, i); - } - return val; // note: only thread 0 will return full sum -} - -template -__inline__ __device__ -T block_ReduceSum(thread_block g, T val) -{ - static __shared__ T shared[warpSize]; // Shared mem for 32 partial sums - int lane = threadIdx.x % warpSize; - int wid = threadIdx.x / warpSize; - thread_block_tile tile = tiled_partition( g ); - - // Each warp performs partial reduction - val = warp_ReduceSumPlus( tile, val); - - // Wait for all partial reductions - if (lane==0) { - //printf("thd%d warp%d sum is %d\n", threadIdx.x, wid, val); - shared[wid]=val; // Write reduced value to shared memory - //printf("thd%d stored warp %d sum %d\n", threadIdx.x, wid, val); - } - __syncthreads(); // Wait for all partial reductions - - if (wid > 0 ) return val ; - //Final reduce within first warp - if (wid==0) val = warp_ReduceSumPlus( tile, val) ; - - return val; -} - -// GB_AxB_cuda_dot3_phase2 is a CUDA kernel that takes as input the -// nanobuckets and blockbucket arrays computed by the first phase kernel, -// GB_AxB_cuda_dot3_phase1. The launch geometry of this kernel must match the -// GB_AxB_cuda_dot3_phase1 kernel, with the same # of threads and threadblocks. - -__global__ -void simple_nongrb_test(int i) { -} - -__global__ -void simple_grb_test(GrB_Matrix C) { -} - -__global__ -void AxB_phase2 -( - // input, not modified: - int64_t *__restrict__ nanobuckets, // array of size 12-blockDim.x-by-nblocks - int64_t *__restrict__ blockbucket, // global bucket count, of size 12*nblocks - // output: - int64_t *__restrict__ bucketp, // global bucket cumsum, of size 13 - int64_t *__restrict__ bucket, // global buckets, of size cnz (== mnz) - int64_t *__restrict__ offset, // global offsets, for each bucket - // inputs, not modified: - const int nblocks // input number of blocks to reduce -) -{ - //printf("In AxB_phase2 kernel\n"); - //printf("nanobuckets: %ld\n", nanobuckets[0]); - - //-------------------------------------------------------------------------- - // sum up the bucket counts of prior threadblocks - //-------------------------------------------------------------------------- - - // blockbucket is an array of size 12-by-nblocks, held by row. The - // entry blockbucket [bucket * nblocks + t] holds the # of entries - // in the bucket (in range 0 to 11) found by threadblock t. - - - //__shared__ uint64_t offset [12] ; - uint64_t s_0=0; - uint64_t s_1=0; - uint64_t s_2=0; - uint64_t s_3=0; - uint64_t s_4=0; - uint64_t s_5=0; - uint64_t s_6=0; - uint64_t s_7=0; - uint64_t s_8=0; - uint64_t s_9=0; - uint64_t s_10=0; - uint64_t s_11=0; - - thread_block_tile<32> tile = tiled_partition<32>(this_thread_block() ); - - //printf("block %d entering sum\n",blockIdx.x); - int tid = threadIdx.x + blockIdx.x*blockDim.x; - #define reduceBucket( B ) \ - for( tid = threadIdx.x + blockIdx.x*blockDim.x; \ - tid < nblocks; \ - tid += blockDim.x*gridDim.x) \ - { \ - s_ ## B += blockbucket[ B *nblocks +tid] ; \ - } \ - __syncthreads(); \ - s_ ## B = warp_ReduceSumPlus( tile, s_ ## B); - - reduceBucket( 0 ) - reduceBucket( 1 ) - reduceBucket( 2 ) - reduceBucket( 3 ) - reduceBucket( 4 ) - reduceBucket( 5 ) - reduceBucket( 6 ) - reduceBucket( 7 ) - reduceBucket( 8 ) - reduceBucket( 9 ) - reduceBucket( 10 ) - reduceBucket( 11 ) - - - //printf("summing blk,tid=%d,%d\n",blockIdx.x,threadIdx.x); - if (threadIdx.x ==0 ) - { - atomicAdd( (unsigned long long int*)&(offset[0]), s_0); - atomicAdd( (unsigned long long int*)&(offset[1]), s_1); - atomicAdd( (unsigned long long int*)&(offset[2]), s_2); - atomicAdd( (unsigned long long int*)&(offset[3]), s_3); - atomicAdd( (unsigned long long int*)&(offset[4]), s_4); - atomicAdd( (unsigned long long int*)&(offset[5]), s_5); - atomicAdd( (unsigned long long int*)&(offset[6]), s_6); - atomicAdd( (unsigned long long int*)&(offset[7]), s_7); - atomicAdd( (unsigned long long int*)&(offset[8]), s_8); - atomicAdd( (unsigned long long int*)&(offset[9]), s_9); - atomicAdd( (unsigned long long int*)&(offset[10]),s_10); - atomicAdd( (unsigned long long int*)&(offset[11]),s_11); - } - __syncthreads(); - - - - if( gridDim.x >= 12) - { - // Cumulative sum across blocks for each bucket - if (blockIdx.x <12) - blockBucketExclusiveSum( blockIdx.x, blockbucket, nblocks ) ; - } - else - { - if (blockIdx.x == 0) - { - blockBucketExclusiveSum( 0, blockbucket, nblocks ) ; - blockBucketExclusiveSum( 1, blockbucket, nblocks ) ; - blockBucketExclusiveSum( 2, blockbucket, nblocks ) ; - blockBucketExclusiveSum( 3, blockbucket, nblocks ) ; - blockBucketExclusiveSum( 4, blockbucket, nblocks ) ; - blockBucketExclusiveSum( 5, blockbucket, nblocks ) ; - blockBucketExclusiveSum( 6, blockbucket, nblocks ) ; - blockBucketExclusiveSum( 7, blockbucket, nblocks ) ; - blockBucketExclusiveSum( 8, blockbucket, nblocks ) ; - blockBucketExclusiveSum( 9, blockbucket, nblocks ) ; - blockBucketExclusiveSum( 10, blockbucket, nblocks) ; - blockBucketExclusiveSum( 11, blockbucket, nblocks) ; - } - } - - - - - //-------------------------------------------------------------------------- - // last threadblock saves the cumsum of the 12 global buckets - //-------------------------------------------------------------------------- - /* do on cpu - if (blockIdx.x == 0) // gridDim.x - 1) - { - - // the last threadblock: compute all 12 global bucket sizes, and its - // cumulative sum - if (threadIdx.x == 0) - { - // the work in this last threadblock is single-threaded - uint64_t s = 0; - for (int bucket = 0 ; bucket < 12 ; bucket++) - { - // write the global cumsum of all buckets to the final global - // bucketp. bucketp [bucket] is the starting position in - // the bucket. - bucketp [bucket] = s ; - - // bucket_size is the total # of entries in this bucket, for - // all threadblocks. It has nearly been computed already, - // since offset [bucket] = sum (blockbucket (bucket,0:blockDim.x-1)). - // All that is left is to add the counts for the last threadblock.` - //int64_t global_bucket_size = offset [bucket]; - // + blockbucket [bucket * gridDim.x + blockIdx.x] ; - - //printf("bucketp[%d]= %ld\n",bucket, s); - // s is a cumulative sum of the global bucket sizes - s += offset[bucket]; // global_bucket_size ; - } - // The kth global bucket (for k = 0 to 11) appears in: - // bucket [bucketp [k]... bucketp [k+1]-1], - // so the end of the last bucket needs bucketp [12]. - bucketp [12] = (int64_t)s; - //printf("bucketp[12]= %ld\n", s); - // all entries in C now appear in the buckets. - // ASSERT (s == cnz) ; - } - __syncthreads ( ) ; - } - */ - -} // phase2 - - -__global__ -void AxB_phase2end -( - // input, not modified: - int64_t *__restrict__ nanobuckets, // array of size 12-blockDim.x-by-nblocks - const int64_t *__restrict__ blockbucket, // global bucket count, of size 12*nblocks - // output: - const int64_t *__restrict__ bucketp, // global bucket cumsum, of size 13 - int64_t *__restrict__ bucket, // global buckets, of size cnz (== mnz) - const int64_t *__restrict__ offset, // global offsets, for each bucket - // inputs, not modified: - const GrB_Matrix C, // output matrix - const int64_t cnz // number of entries in C and M -) -{ - - //-------------------------------------------------------------------------- - // get C and M - //-------------------------------------------------------------------------- - - // Ci [p] for an entry C(i,j) contains either GB_FLIP(i) if C(i,j) is a - // zombie, or (k << 4) + bucket otherwise, where C(:,j) is the kth vector - // of C (j = Ch [k] if hypersparse or j = k if standard sparse), and - // where bucket is the bucket assignment for C(i,j). This phase does not - // need k, just the bucket for each entry C(i,j). - - int64_t *__restrict__ Ci = C->i ; // for zombies, or bucket assignment - int64_t *__restrict__ Mp = C->p ; // for offset calculations - int64_t mnvec = C->nvec; - - //-------------------------------------------------------------------------- - // load and shift the nanobuckets for this thread block - //-------------------------------------------------------------------------- - - // The taskbucket for this threadblock is an array of size - // 12-by-blockDim.x, held by row. It forms a 2D array within the 3D - // nanobuckets array. - int64_t *__restrict__ taskbucket = nanobuckets + blockIdx.x * (12 * blockDim.x) ; - - //printf("block%d thd%d blockbucket= %ld\n", blockIdx.x, threadIdx.x, - // blockbucket[blockIdx.x*gridDim.x+blockIdx.x]); - - // Each thread in this threadblock owns one column of this taskbucket, for - // its set of 12 nanobuckets. The nanobuckets are a column of length 12, - // with stride equal to blockDim.x. - int64_t *__restrict__ nanobucket = taskbucket + threadIdx.x; - - // Each thread loads its 12 nanobucket values into registers. - #define LOAD_NANOBUCKET(bucket) \ - int64_t my_bucket_ ## bucket = \ - nanobucket [bucket * blockDim.x] \ - + blockbucket [bucket * gridDim.x + blockIdx.x]\ - + bucketp [bucket] ; - - LOAD_NANOBUCKET (0) ; - LOAD_NANOBUCKET (1) ; - LOAD_NANOBUCKET (2) ; - LOAD_NANOBUCKET (3) ; - LOAD_NANOBUCKET (4) ; - LOAD_NANOBUCKET (5) ; - LOAD_NANOBUCKET (6) ; - LOAD_NANOBUCKET (7) ; - LOAD_NANOBUCKET (8) ; - LOAD_NANOBUCKET (9) ; - LOAD_NANOBUCKET (10) ; - LOAD_NANOBUCKET (11) ; - - // Now each thread has an index into the global set of 12 buckets, - // held in bucket, of where to place its own entries. - - //-------------------------------------------------------------------------- - // construct the global buckets - //-------------------------------------------------------------------------- - - // The slice for task blockIdx.x contains entries pfirst:plast-1 of M and - // C, which is the part of C operated on by this threadblock. - int64_t pfirst, plast ; - - /* - for ( int tid_global = threadIdx.x + blockIdx.x * blockDim.x ; - tid_global < (mnvec+7)/8 ; - tid_global += blockDim.x * gridDim.x) - */ - int chunk_max= (cnz + chunksize -1)/chunksize; - for ( int chunk = blockIdx.x; - chunk < chunk_max; - chunk += gridDim.x ) - { - - //GB_PARTITION (pfirst, plast, cnz, tid_global, (mnvec+7)/8 ) ; - pfirst = chunksize * chunk ; - plast = GB_IMIN( chunksize * (chunk+1), cnz ) ; - - int chunk_end; - if ( cnz > chunksize) chunk_end = GB_IMIN( chunksize, - cnz - chunksize*(chunk) ); - else chunk_end = cnz; - - // find the first vector of the slice for task blockIdx.x: the - // vector that owns the entry Ai [pfirst] and Ax [pfirst]. - //kfirst = GB_search_for_vector_device (pfirst, Mp, 0, mnvec) ; - - // find the last vector of the slice for task blockIdx.x: the - // vector that owns the entry Ai [plast-1] and Ax [plast-1]. - //klast = GB_search_for_vector_device (plast-1, Mp, kfirst, mnvec) ; - - - for ( int p = pfirst + threadIdx.x; - p < pfirst + chunk_end; - p += blockDim.x ) - { - // get the entry C(i,j), and extract its bucket. Then - // place the entry C(i,j) in the global bucket it belongs to. - - // TODO: these writes to global are not coalesced. Instead: each - // threadblock could buffer its writes to 12 buffers and when the - // buffers are full they can be written to global. - int ibucket = Ci[p] & 0xF; - //printf(" thd: %d p,Ci[p] = %ld,%ld,%d\n", threadIdx.x, p, Ci[p], irow ); - switch (ibucket) - { - case 0: bucket [my_bucket_0++ ] = p ; Ci[p] = Ci[p] >>4; break ; //unshift zombies - case 1: bucket [my_bucket_1++ ] = p ; break ; - case 2: bucket [my_bucket_2++ ] = p ; break ; - case 3: bucket [my_bucket_3++ ] = p ; break ; - case 4: bucket [my_bucket_4++ ] = p ; break ; - case 5: bucket [my_bucket_5++ ] = p ; break ; - case 6: bucket [my_bucket_6++ ] = p ; break ; - case 7: bucket [my_bucket_7++ ] = p ; break ; - case 8: bucket [my_bucket_8++ ] = p ; break ; - case 9: bucket [my_bucket_9++ ] = p ; break ; - case 10: bucket [my_bucket_10++] = p ; break ; - case 11: bucket [my_bucket_11++] = p ; break ; - default: break; - } - - } - //__syncthreads(); - } -} - diff --git a/GraphBLAS/CUDA/templates/GB_jit_AxB_phase2.cuh b/GraphBLAS/CUDA/templates/GB_jit_AxB_phase2.cuh new file mode 100644 index 0000000000..f052e18b29 --- /dev/null +++ b/GraphBLAS/CUDA/templates/GB_jit_AxB_phase2.cuh @@ -0,0 +1,236 @@ +//------------------------------------------------------------------------------ +// templates/GB_AxB_cuda_dot3_phase2: fill the global buckets +//------------------------------------------------------------------------------ + +// TODO describe me +#pragma once + +#define GB_CUDA_KERNEL + +#include "GB_cuda_buckets.h" +#include "matrix.h" +#include +#include + +using namespace cooperative_groups; + +// A stateful callback functor that maintains a running prefix to be applied +// during consecutive scan operations. +struct BlockPrefixCallbackOp +{ + // Running prefix + int64_t running_total; + // Constructor + __device__ BlockPrefixCallbackOp(int64_t running_total) : running_total(running_total) {} + + // Callback operator to be entered by the first warp of threads in the block. + // Thread-0 is responsible for returning a value for seeding the block-wide scan. + __device__ int64_t operator()(int64_t block_aggregate) + { + int64_t old_prefix = running_total; + running_total += block_aggregate; + return old_prefix; + } +}; + +__inline__ +__device__ void blockBucketExclusiveSum(int bucketId, int64_t *d_data, int nblocks) +{ + #define blocksize 32 + + // Specialize BlockScan for a 1D block of 32 threads + typedef cub::BlockScan BlockScan; + + // Allocate shared memory for BlockScan + __shared__ typename BlockScan::TempStorage temp_storage; + + // Initialize running total + BlockPrefixCallbackOp prefix_op(0); + + // Have the block iterate over segments of items + int64_t data=0; + + int64_t *blockbucket= d_data; + + for (int block_id = 0; block_id < nblocks; block_id += blocksize) + { + // Load a segment of consecutive items that are blocked across threads + + //printf("block %d entering sum\n",blockIdx.x); + int loc = block_id + threadIdx.x; + if ( loc < nblocks) + { + //printf("block %di loading tid=%d\n",block_id,tid); + data = blockbucket[bucketId*nblocks +loc ] ; + } + __syncthreads(); + + //printf("bb%d_%d s0 before prefix= %ld \n", block_id,bucketId, + // blockbucket[bucketId*nblocks + block_id+threadIdx.x] ) ; + // Collectively compute the block-wide exclusive prefix sum + BlockScan(temp_storage).ExclusiveSum( data, data, prefix_op); + __syncthreads(); + + if ( loc < nblocks) + { + blockbucket[bucketId*nblocks +loc ] = data ; + } + __syncthreads(); + + //printf("bb%d_%d = %ld \n", block_id, bucketId, blockbucket[bucketId*nblocks+block_id+threadIdx.x] ) ; + + data = 0; + } +} + + +template< typename T, int tile_sz> +__inline__ __device__ T warp_ReduceSumPlus( thread_block_tile tile, T val) +{ + // Each iteration halves the number of active threads + // Each thread adds its partial sum[i] to sum[lane+i] + for (int i = tile.size() / 2; i > 0; i /= 2) { + val += tile.shfl_down( val, i); + } + return val; // note: only thread 0 will return full sum +} + +template +__inline__ __device__ T block_ReduceSum(thread_block g, T val) +{ + static __shared__ T shared[warpSize]; // Shared mem for 32 partial sums + int lane = threadIdx.x % warpSize; + int wid = threadIdx.x / warpSize; + thread_block_tile tile = tiled_partition( g ); + + // Each warp performs partial reduction + val = warp_ReduceSumPlus( tile, val); + + // Wait for all partial reductions + if (lane==0) { + //printf("thd%d warp%d sum is %d\n", threadIdx.x, wid, val); + shared[wid]=val; // Write reduced value to shared memory + //printf("thd%d stored warp %d sum %d\n", threadIdx.x, wid, val); + } + __syncthreads(); // Wait for all partial reductions + + if (wid > 0 ) return val ; + //Final reduce within first warp + if (wid==0) val = warp_ReduceSumPlus( tile, val) ; + + return val; +} + +// GB_AxB_cuda_dot3_phase2 is a CUDA kernel that takes as input the +// nanobuckets and blockbucket arrays computed by the first phase kernel, +// GB_AxB_cuda_dot3_phase1. The launch geometry of this kernel must match the +// GB_AxB_cuda_dot3_phase1 kernel, with the same # of threads and threadblocks. + +__global__ void AxB_phase2 +( + // input, not modified: + int64_t *__restrict__ blockbucket, // global bucket count, of size 12*nblocks + // output: + int64_t *__restrict__ offset, // global offsets, for each bucket + // inputs, not modified: + const int nblocks // input number of blocks to reduce +) +{ + + //-------------------------------------------------------------------------- + // sum up the bucket counts of prior threadblocks + //-------------------------------------------------------------------------- + + // blockbucket is an array of size 12-by-nblocks, held by row. The + // entry blockbucket [bucket * nblocks + t] holds the # of entries + // in the bucket (in range 0 to 11) found by threadblock t. + + + //__shared__ uint64_t offset [12] ; + uint64_t s_0=0; + uint64_t s_1=0; + uint64_t s_2=0; + uint64_t s_3=0; + uint64_t s_4=0; + uint64_t s_5=0; + uint64_t s_6=0; + uint64_t s_7=0; + uint64_t s_8=0; + uint64_t s_9=0; + uint64_t s_10=0; + uint64_t s_11=0; + + thread_block_tile<32> tile = tiled_partition<32>(this_thread_block() ); + + //printf("block %d entering sum\n",blockIdx.x); + int tid = threadIdx.x + blockIdx.x * blockDim.x; + #define reduceBucket( B ) \ + for( tid = threadIdx.x + blockIdx.x*blockDim.x; \ + tid < nblocks; \ + tid += blockDim.x*gridDim.x) \ + { \ + s_ ## B += blockbucket[ B *nblocks +tid] ; \ + } \ + __syncthreads(); \ + s_ ## B = warp_ReduceSumPlus( tile, s_ ## B); + + reduceBucket( 0 ) + reduceBucket( 1 ) + reduceBucket( 2 ) + reduceBucket( 3 ) + reduceBucket( 4 ) + reduceBucket( 5 ) + reduceBucket( 6 ) + reduceBucket( 7 ) + reduceBucket( 8 ) + reduceBucket( 9 ) + reduceBucket( 10 ) + reduceBucket( 11 ) + + + //printf("summing blk,tid=%d,%d\n",blockIdx.x,threadIdx.x); + if (threadIdx.x ==0 ) + { + printf("s_0: %ld, s_1=%ld, s_10=%ld, s_11=%ld\n", s_0, s_1, s_10, s_11); + atomicAdd( (unsigned long long int*)&(offset[0]), s_0); + atomicAdd( (unsigned long long int*)&(offset[1]), s_1); + atomicAdd( (unsigned long long int*)&(offset[2]), s_2); + atomicAdd( (unsigned long long int*)&(offset[3]), s_3); + atomicAdd( (unsigned long long int*)&(offset[4]), s_4); + atomicAdd( (unsigned long long int*)&(offset[5]), s_5); + atomicAdd( (unsigned long long int*)&(offset[6]), s_6); + atomicAdd( (unsigned long long int*)&(offset[7]), s_7); + atomicAdd( (unsigned long long int*)&(offset[8]), s_8); + atomicAdd( (unsigned long long int*)&(offset[9]), s_9); + atomicAdd( (unsigned long long int*)&(offset[10]),s_10); + atomicAdd( (unsigned long long int*)&(offset[11]),s_11); + } + __syncthreads(); + + + + if( gridDim.x >= 12) + { + // Cumulative sum across blocks for each bucket + if (blockIdx.x <12) + blockBucketExclusiveSum( blockIdx.x, blockbucket, nblocks ) ; + } + else + { + if (blockIdx.x == 0) + { + blockBucketExclusiveSum( 0, blockbucket, nblocks ) ; + blockBucketExclusiveSum( 1, blockbucket, nblocks ) ; + blockBucketExclusiveSum( 2, blockbucket, nblocks ) ; + blockBucketExclusiveSum( 3, blockbucket, nblocks ) ; + blockBucketExclusiveSum( 4, blockbucket, nblocks ) ; + blockBucketExclusiveSum( 5, blockbucket, nblocks ) ; + blockBucketExclusiveSum( 6, blockbucket, nblocks ) ; + blockBucketExclusiveSum( 7, blockbucket, nblocks ) ; + blockBucketExclusiveSum( 8, blockbucket, nblocks ) ; + blockBucketExclusiveSum( 9, blockbucket, nblocks ) ; + blockBucketExclusiveSum( 10, blockbucket, nblocks) ; + blockBucketExclusiveSum( 11, blockbucket, nblocks) ; + } + } +} // phase2 diff --git a/GraphBLAS/CUDA/templates/GB_jit_AxB_phase2end.cu b/GraphBLAS/CUDA/templates/GB_jit_AxB_phase2end.cuh similarity index 99% rename from GraphBLAS/CUDA/templates/GB_jit_AxB_phase2end.cu rename to GraphBLAS/CUDA/templates/GB_jit_AxB_phase2end.cuh index fd1a449d6a..73f2232539 100644 --- a/GraphBLAS/CUDA/templates/GB_jit_AxB_phase2end.cu +++ b/GraphBLAS/CUDA/templates/GB_jit_AxB_phase2end.cuh @@ -3,15 +3,14 @@ //------------------------------------------------------------------------------ // TODO describe me +#pragma once #define GB_CUDA_KERNEL - -//#include #include "GB_cuda_buckets.h" #include "matrix.h" #include -#include "local_cub/block/block_scan.cuh" +#include using namespace cooperative_groups; diff --git a/GraphBLAS/CUDA/templates/GB_jit_atomics.cuh b/GraphBLAS/CUDA/templates/GB_jit_atomics.cuh new file mode 100644 index 0000000000..2894296987 --- /dev/null +++ b/GraphBLAS/CUDA/templates/GB_jit_atomics.cuh @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of NVIDIA CORPORATION nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * Specializations for different atomic operations on different types + */ + +#pragma once + +// TODO: These should really be pre-compiled into the graphblascuda binary + +template +__device__ void atomic_add(T* ptr, T val); + +template<> __device__ __inline__ void atomic_add(int* ptr, int val) { atomicAdd(ptr, val); } +template<> __device__ __inline__ void atomic_add(int64_t* ptr, int64_t val) { atomicAdd((unsigned long long*)ptr, (unsigned long long)val); } +template<> __device__ __inline__ void atomic_add(float* ptr, float val) { atomicAdd(ptr, val); } +template<> __device__ __inline__ void atomic_add(double* ptr, double val) { atomicAdd(ptr, val); } + + +template +__device__ void atomic_max(T* ptr, T val); + +template<> __device__ __inline__ void atomic_max(int* ptr, int val) { atomicMax(ptr, val); } +template<> __device__ __inline__ void atomic_max(int64_t* ptr, int64_t val) { atomicMax((unsigned long long*)ptr, (unsigned long long)val); } + +template +__device__ void atomic_min(T* ptr, T val); + +template<> __device__ __inline__ void atomic_min(int* ptr, int val) { atomicMin(ptr, val); } +template<> __device__ __inline__ void atomic_min(int64_t* ptr, int64_t val) { atomicMin((unsigned long long*)ptr, (unsigned long long)val); } + + +template +__device__ void atomic_sub(T* ptr, T val); + +template<> __device__ __inline__ void atomic_sub(int* ptr, int val) { atomicSub(ptr, val); } diff --git a/GraphBLAS/CUDA/templates/reduceNonZombiesWarp.cu b/GraphBLAS/CUDA/templates/GB_jit_reduceNonZombiesWarp.cuh similarity index 80% rename from GraphBLAS/CUDA/templates/reduceNonZombiesWarp.cu rename to GraphBLAS/CUDA/templates/GB_jit_reduceNonZombiesWarp.cuh index 5be75e6730..2264e9f13b 100644 --- a/GraphBLAS/CUDA/templates/reduceNonZombiesWarp.cu +++ b/GraphBLAS/CUDA/templates/GB_jit_reduceNonZombiesWarp.cuh @@ -17,9 +17,16 @@ #define GB_CUDA_KERNEL #include +#include +#include "matrix.h" +#include "GB_cuda_atomics.cuh" #include #include +// TODO: Temporary +#define GB_IDENTITY 0 +#define GB_ADD(a, b) a + b + using namespace cooperative_groups; template< typename T, int tile_sz> @@ -37,6 +44,7 @@ T warp_ReduceSum( thread_block_tile g, T val) return val; // note: only thread 0 will return full sum } + template __inline__ __device__ T block_ReduceSum(thread_block g, T val) @@ -68,29 +76,33 @@ T block_ReduceSum(thread_block g, T val) return val; } -template< typename T> + +template< typename T, typename Accum, bool atomic_reduce = true> __global__ void reduceNonZombiesWarp ( - int64_t *index, // array of size n - T *g_idata, // array of size n - T *g_odata, // array of size grid.x - unsigned int N + GrB_Matrix A, + GrB_Scalar O, // array of size grid.x if atomic_reduce==false and size 1 if atomic_reduce==true + int64_t N, // number of edges for sparse, size of x array for full/bitmap + bool is_sparse ) { // set thread ID int tid = threadIdx.x ; + int64_t *index = A->i; + T *g_idata = (T*) A->x; + Accum *g_odata = (Accum*) O->x; + // each thread tid reduces its result into sum - T sum = (T) GB_IDENTITY; + Accum sum = (Accum) GB_IDENTITY; for(int i = blockIdx.x * blockDim.x + threadIdx.x; - i < N; + i < N; i += blockDim.x * gridDim.x) { - if ( index[i] < 0) continue; + if (is_sparse && index[i] < 0) continue; // skip zombies T fold = g_idata[i]; sum = GB_ADD( sum, fold ); } - //printf("thd%d sum is %d\n", threadIdx.x + blockDim.x*blockIdx.x, sum); __syncthreads(); //-------------------------------------------------------------------------- // reduce work [0..s-1] to a single scalar @@ -101,7 +113,12 @@ __global__ void reduceNonZombiesWarp // write result for this block to global mem if (tid == 0) { - g_odata [blockIdx.x] = sum ; + // TODO: Assuming sum for now (liek the rest of the kernel) + if(atomic_reduce) { + atomic_add(g_odata, sum); + } else { + g_odata [blockIdx.x] = sum ; + } } } diff --git a/GraphBLAS/CUDA/templates/reduceUnrolled.cu b/GraphBLAS/CUDA/templates/GB_jit_reduceUnrolled.cu similarity index 100% rename from GraphBLAS/CUDA/templates/reduceUnrolled.cu rename to GraphBLAS/CUDA/templates/GB_jit_reduceUnrolled.cu diff --git a/GraphBLAS/CUDA/templates/cooperative_groups.h b/GraphBLAS/CUDA/templates/cooperative_groups.h deleted file mode 100755 index 1f296729e5..0000000000 --- a/GraphBLAS/CUDA/templates/cooperative_groups.h +++ /dev/null @@ -1,996 +0,0 @@ -/* - * Copyright 1993-2016 NVIDIA Corporation. All rights reserved. - * - * NOTICE TO LICENSEE: - * - * This source code and/or documentation ("Licensed Deliverables") are - * subject to NVIDIA intellectual property rights under U.S. and - * international Copyright laws. - * - * These Licensed Deliverables contained herein is PROPRIETARY and - * CONFIDENTIAL to NVIDIA and is being provided under the terms and - * conditions of a form of NVIDIA software license agreement by and - * between NVIDIA and Licensee ("License Agreement") or electronically - * accepted by Licensee. Notwithstanding any terms or conditions to - * the contrary in the License Agreement, reproduction or disclosure - * of the Licensed Deliverables to any third party without the express - * written consent of NVIDIA is prohibited. - * - * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE - * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE - * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS - * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND. - * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED - * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY, - * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE. - * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE - * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY - * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY - * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, - * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS - * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE - * OF THESE LICENSED DELIVERABLES. - * - * U.S. Government End Users. These Licensed Deliverables are a - * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT - * 1995), consisting of "commercial computer software" and "commercial - * computer software documentation" as such terms are used in 48 - * C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government - * only as a commercial end item. Consistent with 48 C.F.R.12.212 and - * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all - * U.S. Government End Users acquire the Licensed Deliverables with - * only those rights set forth herein. - * - * Any use of the Licensed Deliverables in individual and commercial - * software must include, in the user documentation and internal - * comments to the code, the above Disclaimer and U.S. Government End - * Users Notice. - */ - -#ifndef _COOPERATIVE_GROUPS_H_ -# define _COOPERATIVE_GROUPS_H_ - -#if defined(__cplusplus) && defined(__CUDACC__) - -# include "cooperative_groups_helpers.h" - -_CG_BEGIN_NAMESPACE - -/** - * class thread_group; - * - * Generic thread group type, into which all groups are convertible. - * It acts as a container for all storage necessary for the derived groups, - * and will dispatch the API calls to the correct derived group. This means - * that all derived groups must implement the same interface as thread_group. - */ -class thread_group -{ - friend _CG_QUALIFIER thread_group this_thread(); - friend _CG_QUALIFIER thread_group tiled_partition(const thread_group& parent, unsigned int tilesz); - friend class thread_block; - - protected: - union __align__(8) { - unsigned int type : 8; - struct { - unsigned int type : 8; - unsigned int size : 24; - unsigned int mask; - } coalesced; - struct { - void* ptr[2]; - } buffer; - } _data; - - _CG_QUALIFIER thread_group operator=(const thread_group& src); - _CG_QUALIFIER thread_group(__internal::groupType type) { - _data.type = type; - } - -#if __cplusplus >= 201103L - static_assert(sizeof(_data) == 16, "Failed size check"); -#endif - -public: - _CG_QUALIFIER unsigned int size() const; - _CG_QUALIFIER unsigned int thread_rank() const; - _CG_QUALIFIER void sync() const; -}; - -/** - * thread_group this_thread() - * - * Constructs a generic thread_group containing only the calling thread - */ -_CG_QUALIFIER thread_group this_thread() -{ - thread_group g = thread_group(__internal::Coalesced); - g._data.coalesced.mask = __internal::lanemask32_eq(); - g._data.coalesced.size = 1; - return (g); -} - -#if defined(_CG_HAS_MULTI_GRID_GROUP) - -/** - * class multi_grid_group; - * - * Threads within this this group are guaranteed to be co-resident on the - * same system, on multiple devices within the same launched kernels. - * To use this group, the kernel must have been launched with - * cuLaunchCooperativeKernelMultiDevice (or the CUDA Runtime equivalent), - * and the device must support it (queryable device attribute). - * - * Constructed via this_multi_grid(); - */ -class multi_grid_group -{ - friend _CG_QUALIFIER multi_grid_group this_multi_grid(); - - struct __align__(8) { - unsigned long long handle; - unsigned int size; - unsigned int rank; - } _data; - -#if __cplusplus >= 201103L - static_assert(sizeof(_data) == 16, "Failed size check"); -#endif - -public: - _CG_QUALIFIER multi_grid_group() { - _data.handle = __internal::multi_grid::get_intrinsic_handle(); - _data.size = __internal::multi_grid::size(_data.handle); - _data.rank = __internal::multi_grid::thread_rank(_data.handle); - } - - _CG_QUALIFIER bool is_valid() const { - return (_data.handle != 0); - } - - _CG_QUALIFIER void sync() const { - _CG_ASSERT(is_valid()); - __internal::multi_grid::sync(_data.handle); - } - - _CG_QUALIFIER unsigned int size() const { - _CG_ASSERT(is_valid()); - return (_data.size); - } - - _CG_QUALIFIER unsigned int thread_rank() const { - _CG_ASSERT(is_valid()); - return (_data.rank); - } - - _CG_QUALIFIER unsigned int grid_rank() const { - _CG_ASSERT(is_valid()); - return (__internal::multi_grid::grid_rank(_data.handle)); - } - - _CG_QUALIFIER unsigned int num_grids() const { - _CG_ASSERT(is_valid()); - return (__internal::multi_grid::num_grids(_data.handle)); - } -}; - -/** - * multi_grid_group this_multi_grid() - * - * Constructs a multi_grid_group - */ -_CG_QUALIFIER multi_grid_group this_multi_grid() -{ - return (multi_grid_group()); -} - -#endif - -#if defined(_CG_HAS_GRID_GROUP) - -/** - * class grid_group; - * - * Threads within this this group are guaranteed to be co-resident on the - * same device within the same launched kernel. To use this group, the kernel - * must have been launched with cuLaunchCooperativeKernel (or the CUDA Runtime equivalent), - * and the device must support it (queryable device attribute). - * - * Constructed via this_grid(); - */ -class grid_group -{ - friend _CG_QUALIFIER grid_group this_grid(); - - struct __align__(8) { - unsigned long long handle; - unsigned int size; - unsigned int rank; - } _data; - -#if __cplusplus >= 201103L - static_assert(sizeof(_data) == 16, "Failed size check"); -#endif - - public: - _CG_QUALIFIER grid_group() { - _data.handle = (__internal::grid::get_intrinsic_handle()); - _data.size = __internal::grid::size(_data.handle); - _data.rank = __internal::grid::thread_rank(_data.handle); - } - - _CG_QUALIFIER bool is_valid() const { - return (_data.handle != 0); - } - - _CG_QUALIFIER void sync() const { - _CG_ASSERT(is_valid()); - __internal::grid::sync(_data.handle); - } - - _CG_QUALIFIER unsigned int size() const { - _CG_ASSERT(is_valid()); - return (_data.size); - } - - _CG_QUALIFIER unsigned int thread_rank() const { - _CG_ASSERT(is_valid()); - return (_data.rank); - } - - _CG_QUALIFIER dim3 group_dim() const { - _CG_ASSERT(is_valid()); - return (__internal::grid::grid_dim()); - } - -}; - -/** - * grid_group this_grid() - * - * Constructs a grid_group - */ -_CG_QUALIFIER grid_group this_grid() -{ - return (grid_group()); -} - -#endif - -/** - * class thread_block - * - * Every GPU kernel is executed by a grid of thread blocks, and threads within - * each block are guaranteed to reside on the same streaming multiprocessor. - * A thread_block represents a thread block whose dimensions are not known until runtime. - * - * Constructed via this_thread_block(); - */ -class thread_block : public thread_group -{ - friend _CG_QUALIFIER thread_block this_thread_block(); - friend _CG_QUALIFIER thread_group tiled_partition(const thread_group& parent, unsigned int tilesz); - friend _CG_QUALIFIER thread_group tiled_partition(const thread_block& parent, unsigned int tilesz); - - _CG_QUALIFIER thread_block() : thread_group(__internal::ThreadBlock) { - } - - // Internal Use - _CG_QUALIFIER thread_group _get_tiled_threads(unsigned int tilesz) const { - const bool pow2_tilesz = ((tilesz & (tilesz - 1)) == 0); - - // Invalid, immediately fail - if (tilesz == 0 || (tilesz > 32) || !pow2_tilesz) { - __internal::abort(); - return (thread_block()); - } - - unsigned int mask; - unsigned int base_offset = thread_rank() & (~(tilesz - 1)); - unsigned int masklength = min(size() - base_offset, tilesz); - - mask = (unsigned int)(-1) >> (32 - masklength); - mask <<= (__internal::laneid() & ~(tilesz - 1)); - thread_group tile = thread_group(__internal::CoalescedTile); - tile._data.coalesced.mask = mask; - tile._data.coalesced.size = __popc(mask); - return (tile); - } - - public: - _CG_QUALIFIER void sync() const { - __internal::cta::sync(); - } - - _CG_QUALIFIER unsigned int size() const { - return (__internal::cta::size()); - } - - _CG_QUALIFIER unsigned int thread_rank() const { - return (__internal::cta::thread_rank()); - } - - // Additional functionality exposed by the group - _CG_QUALIFIER dim3 group_index() const { - return (__internal::cta::group_index()); - } - - _CG_QUALIFIER dim3 thread_index() const { - return (__internal::cta::thread_index()); - } - - _CG_QUALIFIER dim3 group_dim() const { - return (__internal::cta::block_dim()); - } - -}; - -/** - * thread_block this_thread_block() - * - * Constructs a thread_block group - */ -_CG_QUALIFIER thread_block this_thread_block() -{ - return (thread_block()); -} - -/** - * class coalesced_group - * - * A group representing the current set of converged threads in a warp. - * The size of the group is not guaranteed and it may return a group of - * only one thread (itself). - * - * This group exposes warp-synchronous builtins. - * Constructed via coalesced_threads(); - */ -class coalesced_group : public thread_group -{ - friend _CG_QUALIFIER coalesced_group coalesced_threads(); - friend _CG_QUALIFIER thread_group tiled_partition(const thread_group& parent, unsigned int tilesz); - friend _CG_QUALIFIER coalesced_group tiled_partition(const coalesced_group& parent, unsigned int tilesz); - - _CG_QUALIFIER unsigned int _packLanes(unsigned laneMask) const { - unsigned int member_pack = 0; - unsigned int member_rank = 0; - for (int bit_idx = 0; bit_idx < 32; bit_idx++) { - unsigned int lane_bit = _data.coalesced.mask & (1 << bit_idx); - if (lane_bit) { - if (laneMask & lane_bit) - member_pack |= 1 << member_rank; - member_rank++; - } - } - return (member_pack); - } - - // Internal Use - _CG_QUALIFIER coalesced_group _get_tiled_threads(unsigned int tilesz) const { - const bool pow2_tilesz = ((tilesz & (tilesz - 1)) == 0); - - // Invalid, immediately fail - if (tilesz == 0 || (tilesz > 32) || !pow2_tilesz) { - __internal::abort(); - return (coalesced_group(0)); - } - if (size() <= tilesz) { - return (*this); - } - - if ((_data.type == __internal::CoalescedTile) && pow2_tilesz) { - unsigned int base_offset = (thread_rank() & (~(tilesz - 1))); - unsigned int masklength = min(size() - base_offset, tilesz); - unsigned int mask = (unsigned int)(-1) >> (32 - masklength); - - mask <<= (__internal::laneid() & ~(tilesz - 1)); - coalesced_group coalesced_tile = coalesced_group(mask); - coalesced_tile._data.type = __internal::CoalescedTile; - return (coalesced_tile); - } - else if ((_data.type == __internal::Coalesced) && pow2_tilesz) { - unsigned int mask = 0; - unsigned int member_rank = 0; - int seen_lanes = (thread_rank() / tilesz) * tilesz; - for (unsigned int bit_idx = 0; bit_idx < 32; bit_idx++) { - unsigned int lane_bit = _data.coalesced.mask & (1 << bit_idx); - if (lane_bit) { - if (seen_lanes <= 0 && member_rank < tilesz) { - mask |= lane_bit; - member_rank++; - } - seen_lanes--; - } - } - return (coalesced_group(mask)); - } - else { - // None in _CG_VERSION 1000 - __internal::abort(); - } - - return (coalesced_group(0)); - } - - protected: - // Construct a group from scratch (coalesced_threads) - _CG_QUALIFIER coalesced_group(unsigned int mask) : thread_group(__internal::Coalesced) { - _data.coalesced.mask = mask; - _data.coalesced.size = __popc(mask); - } - - public: - _CG_QUALIFIER unsigned int size() const { - return (_data.coalesced.size); - } - _CG_QUALIFIER unsigned int thread_rank() const { - return (__popc(_data.coalesced.mask & __internal::lanemask32_lt())); - } - _CG_QUALIFIER void sync() const { - __syncwarp(_data.coalesced.mask); - } - -#define COALESCED_SHFL_FUNCTION(type) \ - _CG_QUALIFIER type shfl(type var, unsigned int src_rank) const { \ - unsigned int lane = (src_rank == 0) ? __ffs(_data.coalesced.mask) - 1 : \ - (size() == 32) ? src_rank : __fns(_data.coalesced.mask, 0, (src_rank + 1)); \ - return (__shfl_sync(_data.coalesced.mask, var, lane, 32)); \ - } - -#define COALESCED_SHFL_UP_FUNCTION(type) \ - _CG_QUALIFIER type shfl_up(type var, int delta) const { \ - if (size() == 32) { \ - return (__shfl_up_sync(0xFFFFFFFF, var, delta, 32)); \ - } \ - unsigned lane = __fns(_data.coalesced.mask, __internal::laneid(), -(delta + 1)); \ - if (lane >= 32) lane = __internal::laneid(); \ - return (__shfl_sync(_data.coalesced.mask, var, lane, 32)); \ - } - -#define COALESCED_SHFL_DOWN_FUNCTION(type) \ - _CG_QUALIFIER type shfl_down(type var, int delta) const { \ - if (size() == 32) { \ - return (__shfl_down_sync(0xFFFFFFFF, var, delta, 32)); \ - } \ - unsigned int lane = __fns(_data.coalesced.mask, __internal::laneid(), delta + 1); \ - if (lane >= 32) lane = __internal::laneid(); \ - return (__shfl_sync(_data.coalesced.mask, var, lane, 32)); \ - } - - COALESCED_SHFL_FUNCTION(int); - COALESCED_SHFL_FUNCTION(unsigned int); - COALESCED_SHFL_FUNCTION(long); - COALESCED_SHFL_FUNCTION(unsigned long); - COALESCED_SHFL_FUNCTION(long long); - COALESCED_SHFL_FUNCTION(unsigned long long); - COALESCED_SHFL_FUNCTION(float); - COALESCED_SHFL_FUNCTION(double); - - COALESCED_SHFL_UP_FUNCTION(int); - COALESCED_SHFL_UP_FUNCTION(unsigned int); - COALESCED_SHFL_UP_FUNCTION(long); - COALESCED_SHFL_UP_FUNCTION(unsigned long); - COALESCED_SHFL_UP_FUNCTION(long long); - COALESCED_SHFL_UP_FUNCTION(unsigned long long); - COALESCED_SHFL_UP_FUNCTION(float); - COALESCED_SHFL_UP_FUNCTION(double); - - COALESCED_SHFL_DOWN_FUNCTION(int); - COALESCED_SHFL_DOWN_FUNCTION(unsigned int); - COALESCED_SHFL_DOWN_FUNCTION(long); - COALESCED_SHFL_DOWN_FUNCTION(unsigned long); - COALESCED_SHFL_DOWN_FUNCTION(long long); - COALESCED_SHFL_DOWN_FUNCTION(unsigned long long); - COALESCED_SHFL_DOWN_FUNCTION(float); - COALESCED_SHFL_DOWN_FUNCTION(double); - -# ifdef _CG_HAS_FP16_COLLECTIVE - COALESCED_SHFL_FUNCTION(__half); - COALESCED_SHFL_UP_FUNCTION(__half); - COALESCED_SHFL_DOWN_FUNCTION(__half); - - COALESCED_SHFL_FUNCTION(__half2); - COALESCED_SHFL_UP_FUNCTION(__half2); - COALESCED_SHFL_DOWN_FUNCTION(__half2); -# endif - -#undef COALESCED_SHFL_FUNCTION -#undef COALESCED_SHFL_UP_FUNCTION -#undef COALESCED_SHFL_DOWN_FUNCTION - - _CG_QUALIFIER int any(int predicate) const { - return (__ballot_sync(_data.coalesced.mask, predicate) != 0); - } - _CG_QUALIFIER int all(int predicate) const { - return (__ballot_sync(_data.coalesced.mask, predicate) == _data.coalesced.mask); - } - _CG_QUALIFIER unsigned int ballot(int predicate) const { - if (size() == 32) { - return (__ballot_sync(0xFFFFFFFF, predicate)); - } - unsigned int lane_ballot = __ballot_sync(_data.coalesced.mask, predicate); - return (_packLanes(lane_ballot)); - } - -#ifdef _CG_HAS_MATCH_COLLECTIVE - -# define COALESCED_MATCH_ANY_FUNCTION(type) \ - _CG_QUALIFIER unsigned int match_any(type val) const { \ - if (size() == 32) { \ - return (__match_any_sync(0xFFFFFFFF, val)); \ - } \ - unsigned int lane_match = __match_any_sync(_data.coalesced.mask, val); \ - return (_packLanes(lane_match)); \ - } -# define COALESCED_MATCH_ALL_FUNCTION(type) \ - _CG_QUALIFIER unsigned int match_all(type val, int &pred) const { \ - if (size() == 32) { \ - return (__match_all_sync(0xFFFFFFFF, val, &pred)); \ - } \ - unsigned int lane_match = __match_all_sync(_data.coalesced.mask, val, &pred); \ - return (_packLanes(lane_match)); \ - } - - COALESCED_MATCH_ANY_FUNCTION(int); - COALESCED_MATCH_ANY_FUNCTION(unsigned int); - COALESCED_MATCH_ANY_FUNCTION(long); - COALESCED_MATCH_ANY_FUNCTION(unsigned long); - COALESCED_MATCH_ANY_FUNCTION(long long); - COALESCED_MATCH_ANY_FUNCTION(unsigned long long); - COALESCED_MATCH_ANY_FUNCTION(float); - COALESCED_MATCH_ANY_FUNCTION(double); - - COALESCED_MATCH_ALL_FUNCTION(int); - COALESCED_MATCH_ALL_FUNCTION(unsigned int); - COALESCED_MATCH_ALL_FUNCTION(long); - COALESCED_MATCH_ALL_FUNCTION(unsigned long); - COALESCED_MATCH_ALL_FUNCTION(long long); - COALESCED_MATCH_ALL_FUNCTION(unsigned long long); - COALESCED_MATCH_ALL_FUNCTION(float); - COALESCED_MATCH_ALL_FUNCTION(double); - -# undef COALESCED_MATCH_ANY_FUNCTION -# undef COALESCED_MATCH_ALL_FUNCTION - -#endif /* !_CG_HAS_MATCH_COLLECTIVE */ - -}; - -_CG_QUALIFIER coalesced_group coalesced_threads() -{ - return (coalesced_group(__activemask())); -} - -template -class __thread_block_tile_base : public thread_group -{ - static const unsigned int numThreads = Size; - - _CG_QUALIFIER unsigned int build_mask() const { - unsigned int mask; - - if (numThreads == 32) { - mask = 0xFFFFFFFF; - } - else { - mask = (unsigned int)(-1) >> (32 - numThreads); - mask <<= (__internal::laneid() & (~(numThreads - 1))); - } - return (mask); - } - - protected: - _CG_QUALIFIER __thread_block_tile_base() : thread_group(__internal::CoalescedTile) { - _data.coalesced.mask = build_mask(); - _data.coalesced.size = numThreads; - } - - public: - _CG_QUALIFIER void sync() const { - __syncwarp(build_mask()); - } - _CG_QUALIFIER unsigned int thread_rank() const { - return (__internal::laneid() & (numThreads - 1)); - } - _CG_QUALIFIER unsigned int size() const { - return (numThreads); - } - - // PTX supported collectives - _CG_QUALIFIER int shfl(int var, int srcRank) const { - return (__shfl_sync(build_mask(), var, srcRank, numThreads)); - } - _CG_QUALIFIER int shfl_down(int var, unsigned int delta) const { - return (__shfl_down_sync(build_mask(), var, delta, numThreads)); - } - _CG_QUALIFIER int shfl_up(int var, unsigned int delta) const { - return (__shfl_up_sync(build_mask(), var, delta, numThreads)); - } - _CG_QUALIFIER int shfl_xor(int var, unsigned int laneMask) const { - return (__shfl_xor_sync(build_mask(), var, laneMask, numThreads)); - } - _CG_QUALIFIER unsigned int shfl(unsigned int var, int srcRank) const { - return (__shfl_sync(build_mask(), var, srcRank, numThreads)); - } - _CG_QUALIFIER unsigned int shfl_down(unsigned int var, unsigned int delta) const { - return (__shfl_down_sync(build_mask(), var, delta, numThreads)); - } - _CG_QUALIFIER unsigned int shfl_up(unsigned int var, unsigned int delta) const { - return (__shfl_up_sync(build_mask(), var, delta, numThreads)); - } - _CG_QUALIFIER unsigned int shfl_xor(unsigned int var, unsigned int laneMask) const { - return (__shfl_xor_sync(build_mask(), var, laneMask, numThreads)); - } - _CG_QUALIFIER long shfl(long var, int srcRank) const { - return (__shfl_sync(build_mask(), var, srcRank, numThreads)); - } - _CG_QUALIFIER long shfl_down(long var, unsigned int delta) const { - return (__shfl_down_sync(build_mask(), var, delta, numThreads)); - } - _CG_QUALIFIER long shfl_up(long var, unsigned int delta) const { - return (__shfl_up_sync(build_mask(), var, delta, numThreads)); - } - _CG_QUALIFIER long shfl_xor(long var, unsigned int laneMask) const { - return (__shfl_xor_sync(build_mask(), var, laneMask, numThreads)); - } - _CG_QUALIFIER unsigned long shfl(unsigned long var, int srcRank) const { - return (__shfl_sync(build_mask(), var, srcRank, numThreads)); - } - _CG_QUALIFIER unsigned long shfl_down(unsigned long var, unsigned int delta) const { - return (__shfl_down_sync(build_mask(), var, delta, numThreads)); - } - _CG_QUALIFIER unsigned long shfl_up(unsigned long var, unsigned int delta) const { - return (__shfl_up_sync(build_mask(), var, delta, numThreads)); - } - _CG_QUALIFIER unsigned long shfl_xor(unsigned long var, unsigned int laneMask) const { - return (__shfl_xor_sync(build_mask(), var, laneMask, numThreads)); - } - _CG_QUALIFIER long long shfl(long long var, int srcRank) const { - return (__shfl_sync(build_mask(), var, srcRank, numThreads)); - } - _CG_QUALIFIER long long shfl_down(long long var, unsigned int delta) const { - return (__shfl_down_sync(build_mask(), var, delta, numThreads)); - } - _CG_QUALIFIER long long shfl_up(long long var, unsigned int delta) const { - return (__shfl_up_sync(build_mask(), var, delta, numThreads)); - } - _CG_QUALIFIER long long shfl_xor(long long var, unsigned int laneMask) const { - return (__shfl_xor_sync(build_mask(), var, laneMask, numThreads)); - } - _CG_QUALIFIER unsigned long long shfl(unsigned long long var, int srcRank) const { - return (__shfl_sync(build_mask(), var, srcRank, numThreads)); - } - _CG_QUALIFIER unsigned long long shfl_down(unsigned long long var, unsigned int delta) const { - return (__shfl_down_sync(build_mask(), var, delta, numThreads)); - } - _CG_QUALIFIER unsigned long long shfl_up(unsigned long long var, unsigned int delta) const { - return (__shfl_up_sync(build_mask(), var, delta, numThreads)); - } - _CG_QUALIFIER unsigned long long shfl_xor(unsigned long long var, unsigned int laneMask) const { - return (__shfl_xor_sync(build_mask(), var, laneMask, numThreads)); - } - _CG_QUALIFIER float shfl(float var, int srcRank) const { - return (__shfl_sync(build_mask(), var, srcRank, numThreads)); - } - _CG_QUALIFIER float shfl_down(float var, unsigned int delta) const { - return (__shfl_down_sync(build_mask(), var, delta, numThreads)); - } - _CG_QUALIFIER float shfl_up(float var, unsigned int delta) const { - return (__shfl_up_sync(build_mask(), var, delta, numThreads)); - } - _CG_QUALIFIER float shfl_xor(float var, unsigned int laneMask) const { - return (__shfl_xor_sync(build_mask(), var, laneMask, numThreads)); - } - _CG_QUALIFIER double shfl(double var, int srcRank) const { - return (__shfl_sync(build_mask(), var, srcRank, numThreads)); - } - _CG_QUALIFIER double shfl_down(double var, unsigned int delta) const { - return (__shfl_down_sync(build_mask(), var, delta, numThreads)); - } - _CG_QUALIFIER double shfl_up(double var, unsigned int delta) const { - return (__shfl_up_sync(build_mask(), var, delta, numThreads)); - } - _CG_QUALIFIER double shfl_xor(double var, unsigned int laneMask) const { - return (__shfl_xor_sync(build_mask(), var, laneMask, numThreads)); - } - _CG_QUALIFIER int any(int predicate) const { - unsigned int lane_ballot = build_mask() & __ballot_sync(build_mask(), predicate); - return (lane_ballot != 0); - } - _CG_QUALIFIER int all(int predicate) const { - unsigned int lane_ballot = build_mask() & __ballot_sync(build_mask(), predicate); - return (lane_ballot == build_mask()); - } - _CG_QUALIFIER unsigned int ballot(int predicate) const { - unsigned int lane_ballot = build_mask() & __ballot_sync(build_mask(), predicate); - return (lane_ballot >> (__internal::laneid() & (~(numThreads - 1)))); - } - -#ifdef _CG_HAS_FP16_COLLECTIVE - _CG_QUALIFIER __half shfl(__half var, int srcRank) const { - return (__shfl_sync(build_mask(), var, srcRank, numThreads)); - } - _CG_QUALIFIER __half shfl_down(__half var, unsigned int delta) const { - return (__shfl_down_sync(build_mask(), var, delta, numThreads)); - } - _CG_QUALIFIER __half shfl_up(__half var, unsigned int delta) const { - return (__shfl_up_sync(build_mask(), var, delta, numThreads)); - } - _CG_QUALIFIER __half shfl_xor(__half var, unsigned int laneMask) const { - return (__shfl_xor_sync(build_mask(), var, laneMask, numThreads)); - } - _CG_QUALIFIER __half2 shfl(__half2 var, int srcRank) const { - return (__shfl_sync(build_mask(), var, srcRank, numThreads)); - } - _CG_QUALIFIER __half2 shfl_down(__half2 var, unsigned int delta) const { - return (__shfl_down_sync(build_mask(), var, delta, numThreads)); - } - _CG_QUALIFIER __half2 shfl_up(__half2 var, unsigned int delta) const { - return (__shfl_up_sync(build_mask(), var, delta, numThreads)); - } - _CG_QUALIFIER __half2 shfl_xor(__half2 var, unsigned int laneMask) const { - return (__shfl_xor_sync(build_mask(), var, laneMask, numThreads)); - } -#endif - -#ifdef _CG_HAS_MATCH_COLLECTIVE - _CG_QUALIFIER unsigned int match_any(int val) const { - unsigned int lane_match = build_mask() & __match_any_sync(build_mask(), val); - return (lane_match >> (__internal::laneid() & (~(numThreads - 1)))); - } - _CG_QUALIFIER unsigned int match_any(unsigned int val) const { - unsigned int lane_match = build_mask() & __match_any_sync(build_mask(), val); - return (lane_match >> (__internal::laneid() & (~(numThreads - 1)))); - } - _CG_QUALIFIER unsigned int match_any(long val) const { - unsigned int lane_match = build_mask() & __match_any_sync(build_mask(), val); - return (lane_match >> (__internal::laneid() & (~(numThreads - 1)))); - } - _CG_QUALIFIER unsigned int match_any(unsigned long val) const { - unsigned int lane_match = build_mask() & __match_any_sync(build_mask(), val); - return (lane_match >> (__internal::laneid() & (~(numThreads - 1)))); - } - _CG_QUALIFIER unsigned int match_any(long long val) const { - unsigned int lane_match = build_mask() & __match_any_sync(build_mask(), val); - return (lane_match >> (__internal::laneid() & (~(numThreads - 1)))); - } - _CG_QUALIFIER unsigned int match_any(unsigned long long val) const { - unsigned int lane_match = build_mask() & __match_any_sync(build_mask(), val); - return (lane_match >> (__internal::laneid() & (~(numThreads - 1)))); - } - _CG_QUALIFIER unsigned int match_any(float val) const { - unsigned int lane_match = build_mask() & __match_any_sync(build_mask(), val); - return (lane_match >> (__internal::laneid() & (~(numThreads - 1)))); - } - _CG_QUALIFIER unsigned int match_any(double val) const { - unsigned int lane_match = build_mask() & __match_any_sync(build_mask(), val); - return (lane_match >> (__internal::laneid() & (~(numThreads - 1)))); - } - - _CG_QUALIFIER unsigned int match_all(int val, int &pred) const { - unsigned int lane_match = build_mask() & __match_all_sync(build_mask(), val, &pred); - return (lane_match >> (__internal::laneid() & (~(numThreads - 1)))); - } - _CG_QUALIFIER unsigned int match_all(unsigned int val, int &pred) const { - unsigned int lane_match = build_mask() & __match_all_sync(build_mask(), val, &pred); - return (lane_match >> (__internal::laneid() & (~(numThreads - 1)))); - } - _CG_QUALIFIER unsigned int match_all(long val, int &pred) const { - unsigned int lane_match = build_mask() & __match_all_sync(build_mask(), val, &pred); - return (lane_match >> (__internal::laneid() & (~(numThreads - 1)))); - } - _CG_QUALIFIER unsigned int match_all(unsigned long val, int &pred) const { - unsigned int lane_match = build_mask() & __match_all_sync(build_mask(), val, &pred); - return (lane_match >> (__internal::laneid() & (~(numThreads - 1)))); - } - _CG_QUALIFIER unsigned int match_all(long long val, int &pred) const { - unsigned int lane_match = build_mask() & __match_all_sync(build_mask(), val, &pred); - return (lane_match >> (__internal::laneid() & (~(numThreads - 1)))); - } - _CG_QUALIFIER unsigned int match_all(unsigned long long val, int &pred) const { - unsigned int lane_match = build_mask() & __match_all_sync(build_mask(), val, &pred); - return (lane_match >> (__internal::laneid() & (~(numThreads - 1)))); - } - _CG_QUALIFIER unsigned int match_all(float val, int &pred) const { - unsigned int lane_match = build_mask() & __match_all_sync(build_mask(), val, &pred); - return (lane_match >> (__internal::laneid() & (~(numThreads - 1)))); - } - _CG_QUALIFIER unsigned int match_all(double val, int &pred) const { - unsigned int lane_match = build_mask() & __match_all_sync(build_mask(), val, &pred); - return (lane_match >> (__internal::laneid() & (~(numThreads - 1)))); - } -#endif - -}; - -/** - * class thread_block_tile - * - * Statically-sized group type, representing one tile of a thread block. - * The only specializations currently supported are those with native - * hardware support (1/2/4/8/16/32) - * - * This group exposes warp-synchronous builtins. - * Constructed via tiled_partition(class thread_block); - */ -template -class thread_block_tile; -template <> class thread_block_tile<32> : public __thread_block_tile_base<32> { }; -template <> class thread_block_tile<16> : public __thread_block_tile_base<16> { }; -template <> class thread_block_tile<8> : public __thread_block_tile_base<8> { }; -template <> class thread_block_tile<4> : public __thread_block_tile_base<4> { }; -template <> class thread_block_tile<2> : public __thread_block_tile_base<2> { }; -template <> class thread_block_tile<1> : public __thread_block_tile_base<1> { }; - -/** - * Outer level API calls - * void sync(GroupT) - see .sync() - * void thread_rank(GroupT) - see .thread_rank() - * void group_size(GroupT) - see .size() - */ -template _CG_QUALIFIER void sync(GroupT const &g) -{ - g.sync(); -} - -template _CG_QUALIFIER unsigned int thread_rank(GroupT const& g) -{ - return (g.thread_rank()); -} - -template _CG_QUALIFIER unsigned int group_size(GroupT const &g) -{ - return (g.size()); -} - -/** - * .sync() - * - * Executes a barrier across the group - * - * Implements both a compiler fence and an architectural fence to prevent, - * memory reordering around the barrier. - */ -_CG_QUALIFIER void thread_group::sync() const -{ - if (_data.type == __internal::Coalesced || _data.type == __internal::CoalescedTile) { - static_cast(this)->sync(); - } - else { - static_cast(this)->sync(); - } -} - -/** - * .size() - * - * Returns the total number of threads in the group. - */ -_CG_QUALIFIER unsigned int thread_group::size() const -{ - if (_data.type == __internal::Coalesced || _data.type == __internal::CoalescedTile) { - return (static_cast(this)->size()); - } - else { - return (static_cast(this)->size()); - } -} - -/** - * .thread_rank() - * - * Returns the linearized rank of the calling thread along the interval [0, size()). - */ -_CG_QUALIFIER unsigned int thread_group::thread_rank() const -{ - if (_data.type == __internal::Coalesced || _data.type == __internal::CoalescedTile) { - return (static_cast(this)->thread_rank()); - } - else { - return (static_cast(this)->thread_rank()); - } -} - -/** - * tiled_partition - * - * The tiled_partition(parent, tilesz) method is a collective operation that - * partitions the parent group into a one-dimensional, row-major, tiling of subgroups. - * - * A total of ((size(parent)+tilesz-1)/tilesz) subgroups will - * be created where threads having identical k = (thread_rank(parent)/tilesz) - * will be members of the same subgroup. - * - * The implementation may cause the calling thread to wait until all the members - * of the parent group have invoked the operation before resuming execution. - * - * Functionality is limited to power-of-two sized subgorup instances of at most - * 32 threads. Only thread_block, thread_block_tile<>, and their subgroups can be - * tiled_partition() in _CG_VERSION 1000. - */ -_CG_QUALIFIER thread_group tiled_partition(const thread_group& parent, unsigned int tilesz) -{ - if (parent._data.type == __internal::Coalesced || parent._data.type == __internal::CoalescedTile) { - return (static_cast(parent)._get_tiled_threads(tilesz)); - } - else { - return (static_cast(parent)._get_tiled_threads(tilesz)); - } -} -// Thread block type overload: returns a basic thread_group for now (may be specialized later) -_CG_QUALIFIER thread_group tiled_partition(const thread_block& parent, unsigned int tilesz) -{ - return (parent._get_tiled_threads(tilesz)); -} -// Coalesced group type overload: retains its ability to stay coalesced -_CG_QUALIFIER coalesced_group tiled_partition(const coalesced_group& parent, unsigned int tilesz) -{ - return (parent._get_tiled_threads(tilesz)); -} - -namespace __internal { - - // For specializing on different tiled_partition template arguments - template - struct tiled_partition_impl; - - template - struct tiled_partition_impl : public thread_block_tile { - _CG_QUALIFIER tiled_partition_impl(thread_block const &) : thread_block_tile() {} - }; - template - struct tiled_partition_impl > : public thread_block_tile { - _CG_QUALIFIER tiled_partition_impl(thread_block_tile<32> const&) : thread_block_tile() {} - }; - template - struct tiled_partition_impl > : public thread_block_tile { - _CG_QUALIFIER tiled_partition_impl(thread_block_tile<16> const&) : thread_block_tile() {} - }; - template - struct tiled_partition_impl > : public thread_block_tile { - _CG_QUALIFIER tiled_partition_impl(thread_block_tile<8> const&) : thread_block_tile() {} - }; - template - struct tiled_partition_impl > : public thread_block_tile { - _CG_QUALIFIER tiled_partition_impl(thread_block_tile<4> const&) : thread_block_tile() {} - }; - template - struct tiled_partition_impl > : public thread_block_tile { - _CG_QUALIFIER tiled_partition_impl(thread_block_tile<2> const&) : thread_block_tile() {} - }; - template <> - struct tiled_partition_impl<1, thread_block_tile<1> > : public thread_block_tile<1> { - _CG_QUALIFIER tiled_partition_impl(thread_block_tile<1> const&) : thread_block_tile<1>() {} - }; - -}; - -/** - * tiled_partition - * - * The tiled_partition(parent) method is a collective operation that - * partitions the parent group into a one-dimensional, row-major, tiling of subgroups. - * - * A total of ((size(parent)/tilesz) subgroups will be created, - * therefore the parent group size must be evenly divisible by the tilesz. - * The allow parent groups are thread_block or thread_block_tile. - * - * The implementation may cause the calling thread to wait until all the members - * of the parent group have invoked the operation before resuming execution. - * - * Functionality is limited to native hardware sizes, 1/2/4/8/16/32. - * The size(parent) must be greater than the template Size parameter - * otherwise the results are undefined. - */ -template -_CG_QUALIFIER thread_block_tile tiled_partition(const ParentT& g) -{ - return (__internal::tiled_partition_impl(g)); -} - -_CG_END_NAMESPACE - -# endif /* ! (__cplusplus, __CUDACC__) */ - -#endif /* !_COOPERATIVE_GROUPS_H_ */ diff --git a/GraphBLAS/CUDA/templates/cooperative_groups_helpers.h b/GraphBLAS/CUDA/templates/cooperative_groups_helpers.h deleted file mode 100755 index f1c499f62e..0000000000 --- a/GraphBLAS/CUDA/templates/cooperative_groups_helpers.h +++ /dev/null @@ -1,286 +0,0 @@ - /* Copyright 1993-2016 NVIDIA Corporation. All rights reserved. - * - * NOTICE TO LICENSEE: - * - * The source code and/or documentation ("Licensed Deliverables") are - * subject to NVIDIA intellectual property rights under U.S. and - * international Copyright laws. - * - * The Licensed Deliverables contained herein are PROPRIETARY and - * CONFIDENTIAL to NVIDIA and are being provided under the terms and - * conditions of a form of NVIDIA software license agreement by and - * between NVIDIA and Licensee ("License Agreement") or electronically - * accepted by Licensee. Notwithstanding any terms or conditions to - * the contrary in the License Agreement, reproduction or disclosure - * of the Licensed Deliverables to any third party without the express - * written consent of NVIDIA is prohibited. - * - * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE - * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE - * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. THEY ARE - * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND. - * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED - * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY, - * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE. - * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE - * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY - * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY - * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, - * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS - * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE - * OF THESE LICENSED DELIVERABLES. - * - * U.S. Government End Users. These Licensed Deliverables are a - * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT - * 1995), consisting of "commercial computer software" and "commercial - * computer software documentation" as such terms are used in 48 - * C.F.R. 12.212 (SEPT 1995) and are provided to the U.S. Government - * only as a commercial end item. Consistent with 48 C.F.R.12.212 and - * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all - * U.S. Government End Users acquire the Licensed Deliverables with - * only those rights set forth herein. - * - * Any use of the Licensed Deliverables in individual and commercial - * software must include, in the user documentation and internal - * comments to the code, the above Disclaimer and U.S. Government End - * Users Notice. - */ - -/* -** Define: _CG_VERSION -*/ -# define _CG_VERSION 1000 - -/* -** Define: _CG_ABI_VERSION -*/ -# ifndef _CG_ABI_VERSION -# define _CG_ABI_VERSION 1 -# endif - -/* -** Define: _CG_ABI_EXPERIMENTAL -** Desc: If enabled, sets all features enabled (ABI-breaking or experimental) -*/ -# if defined(_CG_ABI_EXPERIMENTAL) -# endif - -# define _CG_CONCAT_INNER(x, y) x ## y -# define _CG_CONCAT_OUTER(x, y) _CG_CONCAT_INNER(x, y) -# define _CG_NAMESPACE _CG_CONCAT_OUTER(__v, _CG_ABI_VERSION) - -# define _CG_BEGIN_NAMESPACE \ - namespace cooperative_groups { namespace _CG_NAMESPACE { -# define _CG_END_NAMESPACE \ - }; using namespace _CG_NAMESPACE; }; - -# if !defined(_CG_STATIC_QUALIFIER) -# define _CG_STATIC_QUALIFIER static __forceinline__ __device__ -# endif -# if !defined(_CG_QUALIFIER) -# define _CG_QUALIFIER __forceinline__ __device__ -# endif - -# if (__CUDA_ARCH__ >= 600) || !defined(__CUDA_ARCH__) -# define _CG_HAS_GRID_GROUP -# endif -# if (__CUDA_ARCH__ >= 600) || !defined(__CUDA_ARCH__) -# define _CG_HAS_MULTI_GRID_GROUP -# endif -# if (__CUDA_ARCH__ >= 700) || !defined(__CUDA_ARCH__) -# define _CG_HAS_MATCH_COLLECTIVE -# endif -// Has __half and __half2 -// Only usable if you include the cuda_fp16.h extension, and -// _before_ including cooperative_groups.h -# ifdef __CUDA_FP16_TYPES_EXIST__ -# define _CG_HAS_FP16_COLLECTIVE -# endif - -/* -** Define: CG_DEBUG -** What: Enables various runtime safety checks -*/ -#if defined(__CUDACC_DEBUG__) && !defined(_CG_DEBUG) -# define _CG_DEBUG 1 -#endif - -#if defined(_CG_DEBUG) && (_CG_DEBUG == 1) && !defined(NDEBUG) -# include -# define _CG_ASSERT(x) assert((x)); -# define _CG_ABORT() assert(0); -#else -# define _CG_ASSERT(x) -# define _CG_ABORT() __trap(); -#endif - -_CG_BEGIN_NAMESPACE - -namespace __internal { - - enum groupType { - CoalescedTile, - Coalesced, - ThreadBlock, - Grid, - MultiGrid, - }; - -#if defined(_CG_HAS_GRID_GROUP) - - namespace grid { - - _CG_STATIC_QUALIFIER unsigned long long get_intrinsic_handle() - { - return (cudaCGGetIntrinsicHandle(cudaCGScopeGrid)); - } - - _CG_STATIC_QUALIFIER void sync(const unsigned long long handle) - { - cudaCGSynchronizeGrid(handle, 0); - } - - _CG_STATIC_QUALIFIER unsigned int size(const unsigned long long handle) - { - return (blockDim.z * gridDim.z) * - (blockDim.y * gridDim.y) * - (blockDim.x * gridDim.x); - } - - _CG_STATIC_QUALIFIER unsigned int thread_rank(const unsigned long long handle) - { - unsigned int blkIdx = ((blockIdx.z * gridDim.y * gridDim.x) + - (blockIdx.y * gridDim.x) + - blockIdx.x); - return (blkIdx * (blockDim.x * blockDim.y * blockDim.z) + - ((threadIdx.z * blockDim.y * blockDim.x) + - (threadIdx.y * blockDim.x) + - threadIdx.x)); - } - - _CG_STATIC_QUALIFIER dim3 grid_dim() - { - return (dim3(gridDim.x, gridDim.y, gridDim.z)); - } - }; - -#endif - -#if defined(_CG_HAS_MULTI_GRID_GROUP) - - namespace multi_grid { - - _CG_STATIC_QUALIFIER unsigned long long get_intrinsic_handle() - { - return (cudaCGGetIntrinsicHandle(cudaCGScopeMultiGrid)); - } - - _CG_STATIC_QUALIFIER void sync(const unsigned long long handle) - { - cudaError_t err = cudaCGSynchronize(handle, 0); - } - - _CG_STATIC_QUALIFIER unsigned int size(const unsigned long long handle) - { - unsigned int numThreads = 0; - cudaCGGetSize(&numThreads, NULL, handle); - return numThreads; - } - - _CG_STATIC_QUALIFIER unsigned int thread_rank(const unsigned long long handle) - { - unsigned int threadRank = 0; - cudaCGGetRank(&threadRank, NULL, handle); - return threadRank; - } - - _CG_STATIC_QUALIFIER unsigned int grid_rank(const unsigned long long handle) - { - unsigned int gridRank = 0; - cudaCGGetRank(NULL, &gridRank, handle); - return gridRank; - } - - _CG_STATIC_QUALIFIER unsigned int num_grids(const unsigned long long handle) - { - unsigned int numGrids = 0; - cudaCGGetSize(NULL, &numGrids, handle); - return numGrids; - } - - }; - -#endif - - namespace cta { - - _CG_STATIC_QUALIFIER void sync() - { - __barrier_sync(0); - } - - _CG_STATIC_QUALIFIER unsigned int size() - { - return (blockDim.x * blockDim.y * blockDim.z); - } - - _CG_STATIC_QUALIFIER unsigned int thread_rank() - { - return ((threadIdx.z * blockDim.y * blockDim.x) + - (threadIdx.y * blockDim.x) + - threadIdx.x); - } - - _CG_STATIC_QUALIFIER dim3 group_index() - { - return (dim3(blockIdx.x, blockIdx.y, blockIdx.z)); - } - - _CG_STATIC_QUALIFIER dim3 thread_index() - { - return (dim3(threadIdx.x, threadIdx.y, threadIdx.z)); - } - - _CG_STATIC_QUALIFIER dim3 block_dim() - { - return (dim3(blockDim.x, blockDim.y, blockDim.z)); - } - - }; - - _CG_STATIC_QUALIFIER unsigned int laneid() - { - unsigned int laneid; - asm volatile("mov.u32 %0, %%laneid;" : "=r"(laneid)); - return laneid; - } - - _CG_STATIC_QUALIFIER unsigned int warpsz() - { - unsigned int warpSize; - asm volatile("mov.u32 %0, WARP_SZ;" : "=r"(warpSize)); - return warpSize; - } - - _CG_STATIC_QUALIFIER unsigned int lanemask32_eq() - { - unsigned int lanemask32_eq; - asm volatile("mov.u32 %0, %%lanemask_eq;" : "=r"(lanemask32_eq)); - return (lanemask32_eq); - } - - _CG_STATIC_QUALIFIER unsigned int lanemask32_lt() - { - unsigned int lanemask32_lt; - asm volatile("mov.u32 %0, %%lanemask_lt;" : "=r"(lanemask32_lt)); - return (lanemask32_lt); - } - - _CG_STATIC_QUALIFIER void abort() - { - _CG_ABORT(); - } - -}; // !Namespace internal - -_CG_END_NAMESPACE diff --git a/GraphBLAS/CUDA/test/AxB_dot3_cuda_tests.cu b/GraphBLAS/CUDA/test/AxB_dot3_cuda_tests.cpp similarity index 100% rename from GraphBLAS/CUDA/test/AxB_dot3_cuda_tests.cu rename to GraphBLAS/CUDA/test/AxB_dot3_cuda_tests.cpp diff --git a/GraphBLAS/CUDA/test/GpuTimer.h b/GraphBLAS/CUDA/test/GpuTimer.h index 63c3e1aaf4..014d7616de 100644 --- a/GraphBLAS/CUDA/test/GpuTimer.h +++ b/GraphBLAS/CUDA/test/GpuTimer.h @@ -2,6 +2,7 @@ #ifndef __GPU_TIMER_H__ #define __GPU_TIMER_H__ +#include struct GpuTimer { cudaEvent_t start; diff --git a/GraphBLAS/CUDA/test/cuda_tests_template.cpp b/GraphBLAS/CUDA/test/cuda_tests_template.cpp index 1a20a5c6cc..e6256bb340 100644 --- a/GraphBLAS/CUDA/test/cuda_tests_template.cpp +++ b/GraphBLAS/CUDA/test/cuda_tests_template.cpp @@ -12,6 +12,7 @@ #include #include #include "jitTestFactory.hpp" +#include "../GB_cuda_buckets.h" //Test instances and groupings diff --git a/GraphBLAS/CUDA/test/dataFactory.hpp b/GraphBLAS/CUDA/test/dataFactory.hpp index 3007dce662..52932423a4 100644 --- a/GraphBLAS/CUDA/test/dataFactory.hpp +++ b/GraphBLAS/CUDA/test/dataFactory.hpp @@ -8,60 +8,11 @@ #include #include "GB.h" -#include "../type_convert.hpp" +#include "../GB_cuda_type_wrap.hpp" #include "../GB_Matrix_allocate.h" #include "test_utility.hpp" +#include "../GB_cuda_error.h" -static const char *_cudaGetErrorEnum(cudaError_t error) { - return cudaGetErrorName(error); -} - -template -void check(T result, char const *const func, const char *const file, - int const line) { - if (result) { - fprintf(stderr, "CUDA error at %s:%d code=%d(%s) \"%s\" \n", file, line, - static_cast(result), _cudaGetErrorEnum(result), func); - exit(EXIT_FAILURE); - } -} - -#define checkCudaErrors(val) check((val), #val, __FILE__, __LINE__) - -// This will output the proper error string when calling cudaGetLastError -#define getLastCudaError(msg) __getLastCudaError(msg, __FILE__, __LINE__) - -inline void __getLastCudaError(const char *errorMessage, const char *file, - const int line) { - cudaError_t err = cudaGetLastError(); - - if (cudaSuccess != err) { - fprintf(stderr, - "%s(%i) : getLastCudaError() CUDA error :" - " %s : (%d) %s.\n", - file, line, errorMessage, static_cast(err), - cudaGetErrorString(err)); - exit(EXIT_FAILURE); - } -} - -// This will only print the proper error string when calling cudaGetLastError -// but not exit program incase error detected. -#define printLastCudaError(msg) __printLastCudaError(msg, __FILE__, __LINE__) - -inline void __printLastCudaError(const char *errorMessage, const char *file, - const int line) { - cudaError_t err = cudaGetLastError(); - - if (cudaSuccess != err) { - fprintf(stderr, - "%s(%i) : getLastCudaError() CUDA error :" - " %s : (%d) %s.\n", - file, line, errorMessage, static_cast(err), - cudaGetErrorString(err)); - } -} -#define CHECK_CUDA(call) checkCudaErrors( call ) // CAUTION: This assumes our indices are small enough to fit into a 32-bit int. inline std::int64_t gen_key(std::int64_t i, std::int64_t j) { @@ -70,8 +21,8 @@ inline std::int64_t gen_key(std::int64_t i, std::int64_t j) { //Vector generators template -void fillvector_linear( int N, T *vec) { - for (int i = 0; i< N; ++i) vec[i] = T(i); +void fillvector_linear( int N, T *vec, int start=0) { + for (int i = start; i< N+start; ++i) vec[i] = T(i); } template void fillvector_constant( int N, T *vec, T val) { @@ -120,7 +71,7 @@ class matrix : public Managed { } void alloc() { - GrB_Type type = cuda::to_grb_type(); + GrB_Type type = cuda::jit::to_grb_type(); GRB_TRY (GrB_Matrix_new (&mat, type, nrows_, ncols_)) ; // GxB_Matrix_Option_set (mat, GxB_SPARSITY_CONTROL, @@ -162,8 +113,9 @@ class matrix : public Managed { std::mt19937 r(seed); std::uniform_real_distribution dis(0.0, 1.0); - if (nnz < 0) + if (nnz < 0 || inv_sparsity == 1.) { + std::cout<<"filling dense"< (mat, x, i, j) ; + cuda::jit::set_element (mat, x, i, j) ; // A (j,i) = x - cuda::set_element (mat, x, j, i) ; + cuda::jit::set_element (mat, x, j, i) ; } else { // A (i,j) = x - cuda::set_element (mat, x, i, j) ; + cuda::jit::set_element (mat, x, i, j) ; } } } } else { + std::cout<<"filling sparse"< row_lookup; unordered_set key_lookup; + for ( int co = 0; co < 2*nrows_; co++ ) + { + GrB_Index i = ((GrB_Index) (dis(r) * nrows_)) % ((GrB_Index) nrows_) ; + row_lookup.insert( i ); + } + int remain= nnz; //countdown to done + + while ( remain > 0) + { + std::cout<< remain<<" nonzeroes left to fill.."< 0 ) + { + GrB_Index j = ((GrB_Index) (dis(r) * ncols_)) % ((GrB_Index) ncols_) ; + if (key_lookup.count( gen_key(i,j) ) == 1) continue; + if (no_self_edges && (i == j)) continue ; + + key_lookup.insert( gen_key(i, j) ); + col_guess--; + remain= (nnz- key_lookup.size() ); + if (remain <= 0) break; + if (make_symmetric) { + // A (j,i) = x + if (key_lookup.count( gen_key( j, i) ) == 0) + { + key_lookup.insert( gen_key( j, i) ) ; + col_guess--; + remain= (nnz- key_lookup.size() ); + } + } + if (remain <= 0) break; + } + if (remain <= 0) break; + //std::cout<< remain<<" nonzeroes left..."< 0 + /* while(key_lookup.size() < nnz) { GrB_Index i = ((GrB_Index) (dis(r) * nrows_)) % ((GrB_Index) nrows_) ; GrB_Index j = ((GrB_Index) (dis(r) * ncols_)) % ((GrB_Index) ncols_) ; - key_lookup.insert(gen_key(i, j)); - } + key_lookup.insert( gen_key(i, j) ); + if (make_symmetric) { + // A (j,i) = x + key_lookup.insert( gen_key( j, i) ) ; + } + } */ for (int64_t k : key_lookup) { GrB_Index i = k >> 32; GrB_Index j = k & 0x0000ffff; - if (no_self_edges && (i == j)) continue ; - T x = (T)(dis(r) * (val_max - val_min)) + (T)val_min ; + T x = (T)val_min + (T)(dis(r) * (val_max - val_min)) ; // A (i,j) = x - cuda::set_element (mat, x, i, j) ; + cuda::jit::set_element (mat, x, i, j) ; if (make_symmetric) { // A (j,i) = x - cuda::set_element(mat, x, j, i) ; + cuda::jit::set_element(mat, x, j, i) ; } } } @@ -217,7 +216,7 @@ class matrix : public Managed { GRB_TRY (GxB_Matrix_Option_set (mat, GxB_SPARSITY_CONTROL, gxb_sparsity_control)) ; GRB_TRY (GxB_Matrix_Option_set(mat, GxB_FORMAT, gxb_format)); GRB_TRY (GrB_Matrix_nvals ((GrB_Index *) &nnz, mat)) ; - GRB_TRY (GxB_Matrix_fprint (mat, "my mat", GxB_SHORT_VERBOSE, stdout)) ; + //GRB_TRY (GxB_Matrix_fprint (mat, "my random mat", GxB_SHORT_VERBOSE, stdout)) ; bool iso ; GRB_TRY (GxB_Matrix_iso (&iso, mat)) ; @@ -340,7 +339,7 @@ class SpGEMM_problem_generator { BucketStart[b] = BucketStart[b-1] + (Cnz / 12); //std::cout<< "bucket "<< b<<" starts at "< p1lF(mysemiringfactory); + phase1launchFactory p1lF(mysemiringfactory); GpuTimer kernTimer; kernTimer.Start(); @@ -166,12 +166,14 @@ bool test_AxB_phase1_factory( int TB, int64_t N, int64_t Anz, int64_t Bnz, GrB_M kernTimer.Stop(); std::cout<<"returned from phase1 kernel "<(Nanobuckets, nanobuckets_size, "Nanobuckets"); -// print_array(Blockbucket, blockbuckets_size, "Blockbucket"); -// std::cout<<"==== phase1 done=============================" <(Nanobuckets, nanobuckets_size, "Nanobuckets"); + print_array(Blockbucket, blockbuckets_size, "Blockbucket"); + std::cout<<"==== phase1 done=============================" < p2lF; - phase2endlaunchFactory p2elF; + phase2launchFactory p2lF; + phase2endlaunchFactory p2elF; SpGEMM_problem_generator G(N, N); int64_t Annz = N*N; @@ -227,12 +229,11 @@ bool test_AxB_phase2_factory( int TB, int64_t N, int64_t Anz, int64_t Bnz) fillvector_constant(NBUCKETS * ntasks, blockbucket, (int64_t)1); fillvector_constant(NBUCKETS, bucketp, (int64_t)1); -// print_array(nanobuckets, NBUCKETS*nthrd*ntasks, "nanobuckets"); -// print_array(blockbucket, NBUCKETS*ntasks, "blockbucket"); + print_array(nanobuckets, NBUCKETS*nthrd*ntasks, "nanobuckets"); + print_array(blockbucket, NBUCKETS*ntasks, "blockbucket"); // // // launch phase2 (just with p2ntasks as the # of tasks) - p2lF.jitGridBlockLaunch(nanobuckets, blockbucket, - bucketp, bucket, offset, M); + p2lF.jitGridBlockLaunch(blockbucket, offset, M); // // // do the reduction between phase2 and phase2end int64_t s= 0; @@ -251,26 +252,24 @@ bool test_AxB_phase2_factory( int TB, int64_t N, int64_t Anz, int64_t Bnz) // std::cout<<"returned from phase2 kernel "<(bucketp, NBUCKETS, "bucketp"); -// print_array(bucket, mnz, "bucket"); -// std::cout<<"phase2 kernel done =================="<(bucketp, NBUCKETS, "bucketp"); + print_array(bucket, mnz, "bucket"); + std::cout<<"phase2 kernel done =================="< -void make_grb_matrix(GrB_Matrix &mat, std::vector &indptr, std::vector &indices, std::vector &data, +void make_grb_matrix(GrB_Matrix &mat, int64_t n_rows, int64_t n_cols, std::vector &indptr, std::vector &indices, std::vector &data, int gxb_sparsity_control = GxB_SPARSE, int gxb_format = GxB_BY_ROW) { - GrB_Type type = cuda::to_grb_type(); + GrB_Type type = cuda::jit::to_grb_type(); - int64_t n_rows = indptr.size() -1; - int64_t n_cols = n_rows; GRB_TRY (GrB_Matrix_new (&mat, type, n_rows, n_cols)) ; for(int64_t row = 0; row < n_rows; ++row) { @@ -282,7 +281,7 @@ void make_grb_matrix(GrB_Matrix &mat, std::vector &indptr, std::vector< GrB_Index j = (GrB_Index) indices[offset]; T x = data[offset]; - cuda::set_element (mat, x, i, j) ; + cuda::jit::set_element (mat, x, i, j) ; } } @@ -309,12 +308,39 @@ bool test_AxB_dot3_full_factory( int TB, int64_t N, int64_t Anz, int64_t Bnz, // FIXME: Allow the adaptive tests in this guy -// N = 20; - //Generate test data and setup for using a jitify kernel with 'bucket' interface // The testBucket arg tells the generator which bucket we want to exercise - int64_t Annz = N * 5; - int64_t Bnnz = N*5; + int64_t Annz; + int64_t Bnnz; + + switch(TB) { + case GB_BUCKET_DNDN: + Annz = N * N; + Bnnz = N * N; + break; + case GB_BUCKET_SPDN: + Annz = N * N; + Bnnz = N * 5; + break; + case GB_BUCKET_VSSP: + Annz = N * 2; + Bnnz = N * 10; + break; + case GB_BUCKET_VSVS_4: + case GB_BUCKET_VSVS_16: + case GB_BUCKET_VSVS_64: + case GB_BUCKET_VSVS_256: + Annz = N * 2; + Bnnz = N * 4; + break; + case GB_BUCKET_MERGEPATH: + Annz = N * 5; + Bnnz = N * 2; + break; + default: + printf("Bucket not yet being tested!\n"); + exit(1); + } int64_t Cnz = N; float Cnzpercent = (float) Cnz/(N*N); @@ -347,7 +373,7 @@ bool test_AxB_dot3_full_factory( int TB, int64_t N, int64_t Anz, int64_t Bnz, G.init_A(Annz, GxB_SPARSE, GxB_BY_ROW, 543210, 0, 2); // std::cout << "Filling B" << std::endl; - G.init_B(-1, GxB_SPARSE, GxB_BY_ROW, 32, 0, 2); + G.init_B(Bnnz, GxB_SPARSE, GxB_BY_ROW, 32, 0, 2); /** * For testing, we need to create our output C and configure @@ -360,15 +386,11 @@ bool test_AxB_dot3_full_factory( int TB, int64_t N, int64_t Anz, int64_t Bnz, GrB_Matrix A = G.getA(); GrB_Matrix B = G.getB(); - GRB_TRY (GxB_Matrix_fprint (A, "A", GxB_SHORT_VERBOSE, stdout)) ; - GRB_TRY (GxB_Matrix_fprint (B, "B", GxB_SHORT_VERBOSE, stdout)) ; - GRB_TRY (GxB_Matrix_fprint (M, "M", GxB_SHORT_VERBOSE, stdout)) ; +// GRB_TRY (GxB_Matrix_fprint (A, "A", GxB_SHORT_VERBOSE, stdout)) ; +// GRB_TRY (GxB_Matrix_fprint (B, "B", GxB_SHORT_VERBOSE, stdout)) ; +// GRB_TRY (GxB_Matrix_fprint (M, "M", GxB_SHORT_VERBOSE, stdout)) ; // GRB_TRY (GxB_Matrix_fprint (C, "C", GxB_SHORT_VERBOSE, stdout)) ; // - T_C *Cx = (T_C*)C->x; - T_A *Ax = (T_A*)A->x; - T_B *Bx = (T_B*)B->x; - std::cout << "Building semiring factgory" << std::endl; GB_cuda_semiring_factory mysemiringfactory = GB_cuda_semiring_factory ( ) ; GrB_Semiring mysemiring; @@ -389,7 +411,6 @@ bool test_AxB_dot3_full_factory( int TB, int64_t N, int64_t Anz, int64_t Bnz, GB_sparsity(A), GB_sparsity(B) ) ; - int zc_valid = 0; bool result = false; /** @@ -400,15 +421,11 @@ bool test_AxB_dot3_full_factory( int TB, int64_t N, int64_t Anz, int64_t Bnz, int chunk_size = 128; int number_of_sms = GB_Global_gpu_sm_get (0); - int nblks = ( GB_nnz (M) + chunk_size - 1)/chunk_size; - int ntasks = GB_IMIN( nblks, 128 * number_of_sms); - int nthrd = 32; int64_t *bucketp = (int64_t*)rmm_wrap_malloc((NBUCKETS+1) * sizeof (int64_t)); - bucketp[1] = 0; + CHECK_CUDA(cudaMemset(bucketp, 0, (NBUCKETS+1)*sizeof(int64_t))); int64_t *bucket = (int64_t*)rmm_wrap_malloc(Cnz * sizeof (int64_t)); - int64_t *offset = (int64_t*)rmm_wrap_malloc(NBUCKETS * sizeof (int64_t)); /** * Run Phase 3: Execute dot3 on all buckets @@ -425,40 +442,23 @@ bool test_AxB_dot3_full_factory( int TB, int64_t N, int64_t Anz, int64_t Bnz, if (nvecs > 0) std::cout<< "bucket "< lF(mysemiringfactory, (GB_bucket_code)b); - lF.jitGridBlockLaunch(b_start, b_end, bucketp, Bucket, C, M, A, B); - kernTimer.Stop(); - - std::cout<<"returned from kernel "<(bucketp, NBUCKETS+1, "bucketp"); - // printing manually since (I think) the jumbled form is causing issues for the standard GB_Matrix printer -// std::cout << "Printing matrix C:" << std::endl; - -// zc_valid = C->zombie_count; -// C->zombie_count = 0; -// for (int i =0 ; i< GB_nnz(C); ++i) { -// //std::cout<<"Cx[i] = "<i[i]; -// } + kernTimer.Stop(); -// G.loadCj(); + std::cout<<"returned from kernel "<(); + GrB_Type type = cuda::jit::to_grb_type(); GRB_TRY (GrB_Matrix_new (&C_actual, type, N, N)) ; // ensure the GPU is not used @@ -473,9 +473,12 @@ bool test_AxB_dot3_full_factory( int TB, int64_t N, int64_t Anz, int64_t Bnz, GRB_TRY (GxB_Matrix_fprint (M, "M actual", GxB_SHORT_VERBOSE, stdout)); GRB_TRY (GxB_Matrix_fprint (A, "A actual", GxB_SHORT_VERBOSE, stdout)); GRB_TRY (GxB_Matrix_fprint (B, "B actual", GxB_SHORT_VERBOSE, stdout)); - GRB_TRY (GxB_Matrix_fprint (C, "C", GxB_SHORT_VERBOSE, stdout)); - GRB_TRY (GxB_Matrix_fprint (C_actual, "C_actual", GxB_SHORT_VERBOSE, stdout)); + GRB_TRY(GrB_Matrix_wait(C, GrB_MATERIALIZE)); + GRB_TRY(GrB_Matrix_wait(C_actual, GrB_MATERIALIZE)); + + GRB_TRY (GxB_Matrix_fprint (C, "C GPU", GxB_COMPLETE, stdout)); + GRB_TRY (GxB_Matrix_fprint (C_actual, "C_actual", GxB_COMPLETE, stdout)); // compare double tol = 0 ; GrB_Index nvals1 = 0, nvals2 = 0 ; @@ -491,7 +494,6 @@ bool test_AxB_dot3_full_factory( int TB, int64_t N, int64_t Anz, int64_t Bnz, GRB_TRY (GrB_Matrix_new (&T, GrB_BOOL, nrows, ncols)) ; GrB_BinaryOp op = NULL; GrB_UnaryOp op_abs = NULL ; - GrB_Monoid monoid_sum = NULL ; if (type == GrB_BOOL ) op = GrB_EQ_BOOL ; else if (type == GrB_INT8 ) op = GrB_EQ_INT8 ; else if (type == GrB_INT16 ) op = GrB_EQ_INT16 ; @@ -522,6 +524,7 @@ bool test_AxB_dot3_full_factory( int TB, int64_t N, int64_t Anz, int64_t Bnz, op_abs = GxB_ABS_FC64 ; } + // Diff = C - C_actual GrB_Matrix Diff ; GRB_TRY (GrB_Matrix_new (&Diff, GrB_FP64, nrows, ncols)) ; @@ -537,7 +540,7 @@ bool test_AxB_dot3_full_factory( int TB, int64_t N, int64_t Anz, int64_t Bnz, GRB_TRY (GrB_Matrix_eWiseMult_BinaryOp (T, NULL, NULL, op, C, C_actual, NULL)) ; GrB_Index nvals3 = 1 ; - GRB_TRY (GxB_Matrix_fprint (T, "T actual", GxB_COMPLETE, stdout)); + GRB_TRY (GxB_Matrix_fprint (T, "T actual", GxB_SHORT_VERBOSE, stdout)); GRB_TRY (GrB_Matrix_nvals (&nvals3, T)) ; if (nvals1 != nvals3) { printf ("!!\n") ; abort ( ) ; } bool is_same = false ; @@ -559,14 +562,126 @@ bool test_AxB_dot3_full_factory( int TB, int64_t N, int64_t Anz, int64_t Bnz, rmm_wrap_free(bucket); rmm_wrap_free(bucketp); - rmm_wrap_free(offset); -// G.del(); + G.del(); return result; +} + +template +bool test_reduce_factory(unsigned int N, GrB_Monoid monoid ) { + //std::cout<<" alloc'ing data and output"< indptr(N+1); + std::vector index(N); + std::vector d_data(N); + + indptr[N] = N; + fillvector_linear((int)N, indptr.data(), (int64_t)0); + fillvector_constant((int)N, index.data(), (int64_t)1); + fillvector_linear ( N, d_data.data()); + + GrB_Type t = cuda::jit::to_grb_type(); + + GrB_Matrix A; + make_grb_matrix(A, N, N, indptr, index, d_data, GxB_SPARSE, GxB_BY_ROW); + + GRB_TRY (GrB_Matrix_wait (A, GrB_MATERIALIZE)) ; + GRB_TRY (GxB_Matrix_fprint (A, "A", GxB_COMPLETE, stdout)); + + T actual; + GB_cuda_reduce( A, &actual, monoid ); + + GrB_Vector v; + GrB_Vector_new(&v, t, N); + + // Just sum in place for now (since we are assuming sum) + int sum = 0; + for(int i = 0; i < N; ++i) { + sum+= d_data[i]; + cuda::jit::vector_set_element(v, i, d_data[i]); + } + printf("Sum: %d\n", sum); + + GRB_TRY (GxB_Global_Option_set (GxB_GLOBAL_GPU_CONTROL, GxB_GPU_NEVER)) ; + + printf("Invoking grb reduce\n"); + T expected; + GRB_TRY(cuda::jit::vector_reduce(&expected, v, monoid)); + printf("Done.\n"); + + GRB_TRY (GxB_Global_Option_set (GxB_GLOBAL_GPU_CONTROL, GxB_GPU_ALWAYS)) ; + if(expected != actual) { + std::cout << "results do not match: reduced=" << expected << ", actual=" << actual << std::endl; + exit(1); + } else { + std::cout << "Results matched!" << std::endl; + } + + return expected == actual; } +//bool test_triangle_counting() { +// +// // Hardcoding int64_t for now +// TestData data = *make_karate_tricount(); +// +// GrB_Monoid monoid = GrB_PLUS_MONOID_INT64; +// GrB_BinaryOp binop = GrB_TIMES_INT64; +// std::cout << "Creating problem gen" << std::endl; +// N = data.A_indptr.size()-1; +// +// GrB_Matrix A; +// GrB_Matrix B; +// GrB_Matrix C; +// GrB_Matrix M; +// +// make_grb_matrix(A, data.A_indptr, data.A_indices, data.A_data, GxB_SPARSE); +// make_grb_matrix(B, data.B_indptr, data.B_indices, data.B_data, GxB_FULL, GxB_BY_ROW); +// make_grb_matrix(C, data.C_indptr, data.C_indices, data.C_data); +// make_grb_matrix(M, data.M_indptr, data.M_indices, data.M_data); +// +// GrB_Semiring mysemiring; +// auto grb_info = GrB_Semiring_new(&mysemiring, monoid, binop); +// GRB_TRY (grb_info) ; +// +// mysemiringfactory.semiring_factory ( mysemiring, false, +// C->type, M->type, +// A->type, B->type, +// true, // matrix types +// false, +// GB_sparsity(C), +// GB_sparsity(M), +// GB_sparsity(A), +// GB_sparsity(B) +// ) ; +// +// bool result = false; +// +// /** +// * Run Phase 1: Compute nanobuckets and blockbuckets +// */ +// const int64_t mnz = GB_nnz (M) ; +// +// int chunk_size = 128; +// +// // Use GrB_DESC_S for structural because dot3 mask will never be complemented +// GRB_TRY (GrB_mxm(C_actual, M, NULL, mysemiring, A, B, GrB_DESC_ST1)); +// +// GRB_TRY (GxB_Matrix_fprint (M, "M actual", GxB_SHORT_VERBOSE, stdout)); +// GRB_TRY (GxB_Matrix_fprint (A, "A actual", GxB_SHORT_VERBOSE, stdout)); +// GRB_TRY (GxB_Matrix_fprint (B, "B actual", GxB_SHORT_VERBOSE, stdout)); +// GRB_TRY (GxB_Matrix_fprint (C, "C GPU", GxB_SHORT_VERBOSE, stdout)); +// GRB_TRY (GxB_Matrix_fprint (C_actual, "C_actual", GxB_SHORT_VERBOSE, stdout)); +// +// GRB_TRY(GrB_reduce_) +// +// return result; +// +//} + + + //template //bool test_AxB_dot3_dndn_factory( int TB, int64_t N, int64_t Anz, int64_t Bnz, std::string& SEMI_RING) { //// Assumes all matrices are square so far, so only N dimension given. @@ -1800,82 +1915,6 @@ bool test_AxB_dot3_full_factory( int TB, int64_t N, int64_t Anz, int64_t Bnz, // //} // -//template -//bool test_reducefactoryUM( unsigned int N, std::string OP) { -// -// reduceFactory rF; -// -// int block(32); -// int nblock= (N + 8*block -1)/(8*block); -// int grid(nblock); -// T* d_data; -// T* output; -// -// //std::cout<<" alloc'ing data and output"< ( N, d_data); -// -// //std::cout<<" data fill complete"< = myOP_plus; -// } -// if (OP == "MIN") { -// sum = (T)std::numeric_limits::max(); -// myOpPTR = myOP_min; -// } -// if (OP == "MAX") { -// sum = (T)std::numeric_limits::min(); -// myOpPTR = myOP_max; -// } -// -// for (int i =0; i< nblock; ++i) sum = (*myOpPTR)(sum ,output[i]); -// -// T expect; -// bool result = false; -// if (OP == "PLUS") { -// expect = (T)(N*(N-1)/2); -// T temp = (sum - expect) ; -// if (temp < 0) temp = -temp ; -// //result = (temp < (T)1) ; //adjust formula for leading 0 -// EXPECT_LE(temp, 1); -// } -// else if (OP == "MAX") { -// expect = (T)(N-1); -// //result = (sum)== (T)(N-1) ; //max is N-1 -// EXPECT_EQ( sum , (T)(N-1) ); -// -// } -// else if (OP == "MIN") { -// expect = (T)0; -// //result = (sum)== (T)(0) ; //min is 0 -// EXPECT_EQ( sum , (T)(0) ); -// } -// else expect = (T) 0; -// std::cout < //bool test_dndotfactoryUM( unsigned int N, std::string SEMI_RING) { diff --git a/GraphBLAS/CUDA/test/jitTestFactory_reduce.hpp b/GraphBLAS/CUDA/test/jitTestFactory_reduce.hpp deleted file mode 100644 index 307df82f94..0000000000 --- a/GraphBLAS/CUDA/test/jitTestFactory_reduce.hpp +++ /dev/null @@ -1,112 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include -#include -#include -#include -#include "../jitFactory.hpp" -//#include "GB_binary_search.h" -#include "GpuTimer.h" -#include "../../rmm_wrap/rmm_wrap.h" -#include -#include "dataFactory.hpp" - -//reduceNonZombiesWarp kernel -template -bool test_reduce_non_zombies_warp_factory( unsigned int, std::string ) ; - -//Fixture to generate valid inputs and hold them for tests -class ReduceNonZombiesWarpTest : public ::testing::Test -{ - void SetUp() - { - - - } - - void TearDown() - { - - } - -}; - -template -bool test_reduce_non_zombies_warp_factory( unsigned int N, std::string OP) { - - reduceFactory rF; - - int block(32); - int nblock= (N + 8*block -1)/(8*block); - int grid(nblock); - T* d_data; - T* output; - - //std::cout<<" alloc'ing data and output"< ( N, d_data); - - std::cout<<" data fill complete"< = myOP_plus; - } - if (OP == "MIN") { - sum = (T)std::numeric_limits::max(); - myOpPTR = myOP_min; - } - if (OP == "MAX") { - sum = (T)std::numeric_limits::min(); - myOpPTR = myOP_max; - } - - for (int i =0; i< nblock; ++i) sum = (*myOpPTR)(sum ,output[i]); - - T expect; - bool result = true; - if (OP == "PLUS") { - expect = (T)(N*(N-1)/2); - T temp = (sum - expect) ; - if (temp < 0) temp = -temp ; - //result = (temp < (T)1) ; //adjust formula for leading 0 - EXPECT_LE(temp, 1); - } - else if (OP == "MAX") { - expect = (T)(N-1); - //result = (sum)== (T)(N-1) ; //max is N-1 - EXPECT_EQ( sum , (T)(N-1) ); - - } - else if (OP == "MIN") { - expect = (T)0; - //result = (sum)== (T)(0) ; //min is 0 - EXPECT_EQ( sum , (T)(0) ); - } - else expect = (T) 0; - std::cout <add (built-in) + GraphBLAS BinaryOp: monoid->op (built-in) z=plus(x,y) + GraphBLAS type: ztype int32_t size: 4 + GraphBLAS type: xtype int32_t size: 4 + GraphBLAS type: ytype int32_t size: 4 + identity: [ 0 ] + + GraphBLAS BinaryOp: semiring->multiply (built-in) z=times(x,y) + GraphBLAS type: ztype int32_t size: 4 + GraphBLAS type: xtype int32_t size: 4 + GraphBLAS type: ytype int32_t size: 4 +Getting semiring add +Getting semiring mult +Getting semiring add op +Getting types +Getting opcodes +Getting typecodes +Performing asserts +Invoking boolean rename +Invoking boolean rename +Invoking enumify binop +e 14 +Invoking enumify monoid +Calling enumify binop +Inside plus binop code +e 11 +Calling enumify identity +Calling enumify terminal +Done enumify monoid +Done invoking enumify monoid +atype +btype +ctype +Invoking enumify_mask, mtype 0x7f2028b57180 +GB_enumify_mask gets mcode: 1 Mask_struct: 0 Mask_comp: 0 +got mask_ecode: 4 +constructing semiring scode +before: add_ecode: 11, id_ecode: 0, term_ecode: 31, mult_ecode: 14, flipxy: 0, zcode: 6, xcode: 6, ycode: 6, mask_ecode: 4, ccode: 6, acode: 6, bcode: 6, csparsity: 0, msparsity: 0, asparsity: 0, bsparsity: 0 +serialized_scode: 397409434374399488 +done enumify semiring +scode=397409434374399488 +done stringify semiring + returned from stringify semiring +nanobuckets_size: 384 +blockbuckets_size: 12 + rmm_wrap_alloc 3072 bytes + rmm_wrap_alloc 256 bytes +A TYpe: 0x7f2028b56f40 +B TYpe: 0x7f2028b56f40 +INside get cached file +looking for prog in file /home/cjnolet/.SuiteSparse/GraphBLAS/6.3.0/GB_semiring_397409434374399488.h +opening /home/cjnolet/.SuiteSparse/GraphBLAS/6.3.0/GB_semiring_397409434374399488.h for write +about to close + read cache file /home/cjnolet/.SuiteSparse/GraphBLAS/6.3.0/GB_semiring_397409434374399488.h +successful_read: 1 +Just closed +GB_jit_AxB_phase1 +#include "/home/cjnolet/.SuiteSparse/GraphBLAS/6.3.0/GB_semiring_397409434374399488.h" +#include "GB_jit_AxB_phase1.cuh" + jit_cache get program GB_jit_AxB_phase1 +about to close + read cache file /home/cjnolet/.SuiteSparse/GraphBLAS/6.3.0/GB_jit_AxB_phase1 + got kernel instance AxB_phase1_bool +about to close + read cache file /home/cjnolet/.SuiteSparse/GraphBLAS/6.3.0/AxB_phase1_bool +--------------------------------------- +--- Linker for void AxB_phase1(long long*, long long*, GB_Matrix_opaque*, GB_Matrix_opaque*, GB_Matrix_opaque*, GB_Matrix_opaque*) --- +--------------------------------------- +info : 0 bytes gmem +info : Function properties for '_Z10AxB_phase1IbEvPxS0_P16GB_Matrix_opaqueS2_S2_S2_': +info : used 199 registers, 4576 stack, 3104 bytes smem, 400 bytes cmem[0], 0 bytes lmem + + +--------------------------------------- +Launching _Z10AxB_phase1IbEvPxS0_P16GB_Matrix_opaqueS2_S2_S2_<<<1,32,0,0>>>(long*,long*,GB_Matrix_opaque*,GB_Matrix_opaque*,GB_Matrix_opaque*,GB_Matrix_opaque*) +returned from phase1 kernel 10.4223ms +Printing Nanobuckets +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +Done. +Printing Blockbucket +0, 0, 62, 0, 0, 0, 0, 0, 0, 0, 0, 0, +Done. +==== phase1 done============================= +[ OK ] AxB_dot3_tests_PLUS_TIMES_1.tinyxtinyPLUS_TIMES_Cint32_tMboolAint32_tBint32_tXint32_tYint32_tZint32_t (11 ms) +[ RUN ] AxB_dot3_tests_PLUS_TIMES_1.smallxsmallPLUS_TIMES_Cint32_tMboolAint32_tBint32_tXint32_tYint32_tZint32_t +found device 0 +inside fill, using seed 12345 +fill_random nrows=1024ncols=1024 need 1048576 values, invsparse = 1 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling dense + rmm_wrap_alloc 16384 bytes + rmm_wrap_alloc 16384 bytes + rmm_wrap_alloc 32768 bytes + rmm_wrap_alloc 32768 bytes + rmm_wrap_alloc 65536 bytes + rmm_wrap_alloc 65536 bytes + rmm_wrap_alloc 131072 bytes + rmm_wrap_alloc 262144 bytes + rmm_wrap_alloc 262144 bytes + rmm_wrap_alloc 524288 bytes + rmm_wrap_alloc 524288 bytes + rmm_wrap_alloc 1048576 bytes + rmm_wrap_alloc 1048576 bytes + rmm_wrap_alloc 524288 bytes + rmm_wrap_alloc 2097152 bytes + rmm_wrap_alloc 2097152 bytes + rmm_wrap_alloc 1048576 bytes + rmm_wrap_alloc 4194304 bytes + rmm_wrap_alloc 4194304 bytes + rmm_wrap_alloc 2097152 bytes + rmm_wrap_alloc 8388608 bytes + rmm_wrap_alloc 8388608 bytes + rmm_wrap_alloc 4194304 bytes + rmm_wrap_alloc 8192 bytes + rmm_wrap_alloc 8388608 bytes + rmm_wrap_alloc 4194304 bytes + rmm_wrap_alloc 8388608 bytes +inside fill, using seed 54321 +fill_random nrows=1024ncols=1024 need 1048576 values, invsparse = 1 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling dense + rmm_wrap_alloc 16384 bytes + rmm_wrap_alloc 524288 bytes + rmm_wrap_alloc 524288 bytes + rmm_wrap_alloc 1048576 bytes + rmm_wrap_alloc 1048576 bytes + rmm_wrap_alloc 524288 bytes + rmm_wrap_alloc 2097152 bytes + rmm_wrap_alloc 2097152 bytes + rmm_wrap_alloc 1048576 bytes + rmm_wrap_alloc 4194304 bytes + rmm_wrap_alloc 4194304 bytes + rmm_wrap_alloc 2097152 bytes + rmm_wrap_alloc 8388608 bytes + rmm_wrap_alloc 8388608 bytes + rmm_wrap_alloc 4194304 bytes + rmm_wrap_alloc 8388608 bytes + rmm_wrap_alloc 4194304 bytes + rmm_wrap_alloc 8388608 bytes +filling matrices +inside fill, using seed 4567 +fill_random nrows=1024ncols=1024 need 1024 values, invsparse = 1024 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling sparse +1024 nonzeroes left to fill.. +35 nonzeroes left to fill.. + rmm_wrap_alloc 8192 bytes + rmm_wrap_alloc 8192 bytes + rmm_wrap_alloc 8192 bytes + rmm_wrap_alloc 8192 bytes +inside fill, using seed 4567 +fill_random nrows=1024ncols=1024 need 1024 values, invsparse = 1024 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling sparse +1024 nonzeroes left to fill.. +35 nonzeroes left to fill.. + rmm_wrap_alloc 16384 bytes +1024 slots to fill +all pairs to bucket 5, no filling +done assigning buckets + calling stringify semiring: 0x7f1fea02dd00 +inside enumify: 0x7f1fea02dd00 + + GraphBLAS Semiring: semiring (user-defined) + GraphBLAS Monoid: semiring->add (built-in) + GraphBLAS BinaryOp: monoid->op (built-in) z=plus(x,y) + GraphBLAS type: ztype int32_t size: 4 + GraphBLAS type: xtype int32_t size: 4 + GraphBLAS type: ytype int32_t size: 4 + identity: [ 0 ] + + GraphBLAS BinaryOp: semiring->multiply (built-in) z=times(x,y) + GraphBLAS type: ztype int32_t size: 4 + GraphBLAS type: xtype int32_t size: 4 + GraphBLAS type: ytype int32_t size: 4 +Getting semiring add +Getting semiring mult +Getting semiring add op +Getting types +Getting opcodes +Getting typecodes +Performing asserts +Invoking boolean rename +Invoking boolean rename +Invoking enumify binop +e 14 +Invoking enumify monoid +Calling enumify binop +Inside plus binop code +e 11 +Calling enumify identity +Calling enumify terminal +Done enumify monoid +Done invoking enumify monoid +atype +btype +ctype +Invoking enumify_mask, mtype 0x7f2028b57180 +GB_enumify_mask gets mcode: 1 Mask_struct: 0 Mask_comp: 0 +got mask_ecode: 4 +constructing semiring scode +before: add_ecode: 11, id_ecode: 0, term_ecode: 31, mult_ecode: 14, flipxy: 0, zcode: 6, xcode: 6, ycode: 6, mask_ecode: 4, ccode: 6, acode: 6, bcode: 6, csparsity: 0, msparsity: 0, asparsity: 0, bsparsity: 0 +serialized_scode: 397409434374399488 +done enumify semiring +scode=397409434374399488 +done stringify semiring + returned from stringify semiring +nanobuckets_size: 3072 +blockbuckets_size: 96 + rmm_wrap_alloc 24576 bytes + rmm_wrap_alloc 768 bytes +A TYpe: 0x7f2028b56f40 +B TYpe: 0x7f2028b56f40 +INside get cached file +looking for prog in file /home/cjnolet/.SuiteSparse/GraphBLAS/6.3.0/GB_semiring_397409434374399488.h +opening /home/cjnolet/.SuiteSparse/GraphBLAS/6.3.0/GB_semiring_397409434374399488.h for write +about to close + read cache file /home/cjnolet/.SuiteSparse/GraphBLAS/6.3.0/GB_semiring_397409434374399488.h +successful_read: 1 +Just closed +GB_jit_AxB_phase1 +#include "/home/cjnolet/.SuiteSparse/GraphBLAS/6.3.0/GB_semiring_397409434374399488.h" +#include "GB_jit_AxB_phase1.cuh" + jit_cache get program GB_jit_AxB_phase1 +found memory-cached prog GB_jit_AxB_phase1 + got kernel instance AxB_phase1_bool +found memory-cached prog AxB_phase1_bool +Launching _Z10AxB_phase1IbEvPxS0_P16GB_Matrix_opaqueS2_S2_S2_<<<8,32,0,0>>>(long*,long*,GB_Matrix_opaque*,GB_Matrix_opaque*,GB_Matrix_opaque*,GB_Matrix_opaque*) +returned from phase1 kernel 1.93946ms +Printing Nanobuckets +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64, 68, 72, 76, 80, 84, 88, 92, 96, 100, 104, 108, 112, 116, 120, 124, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64, 68, 72, 76, 80, 84, 88, 92, 96, 100, 104, 108, 112, 116, 120, 124, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64, 68, 72, 76, 80, 84, 88, 92, 96, 100, 104, 108, 112, 116, 120, 124, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64, 68, 72, 76, 80, 84, 88, 92, 96, 100, 104, 108, 112, 116, 120, 124, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64, 68, 72, 76, 80, 84, 88, 92, 96, 100, 104, 108, 112, 116, 120, 124, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64, 68, 72, 76, 80, 84, 88, 92, 96, 100, 104, 108, 112, 116, 120, 124, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64, 68, 72, 76, 80, 84, 88, 92, 96, 100, 104, 108, 112, 116, 120, 124, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64, 68, 72, 76, 80, 84, 88, 92, 96, 100, 104, 108, 112, 116, 120, 124, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +Done. +Printing Blockbucket +0, 0, 0, 0, 0, 0, 0, 0, 248, 248, 248, 248, 248, 248, 248, 248, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +Done. +==== phase1 done============================= +[ OK ] AxB_dot3_tests_PLUS_TIMES_1.smallxsmallPLUS_TIMES_Cint32_tMboolAint32_tBint32_tXint32_tYint32_tZint32_t (235 ms) +[ RUN ] AxB_dot3_tests_PLUS_TIMES_1.tinyxtinyPLUS_TIMES_Cint32_tMint32_tAint32_tBint32_tXint32_tYint32_tZint32_t +found device 0 +inside fill, using seed 12345 +fill_random nrows=32ncols=32 need 1024 values, invsparse = 1 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling dense + rmm_wrap_alloc 512 bytes + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 512 bytes +inside fill, using seed 54321 +fill_random nrows=32ncols=32 need 1024 values, invsparse = 1 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling dense + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 512 bytes +filling matrices +inside fill, using seed 4567 +fill_random nrows=32ncols=32 need 32 values, invsparse = 32 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling sparse +32 nonzeroes left to fill.. +2 nonzeroes left to fill.. + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes +inside fill, using seed 4567 +fill_random nrows=32ncols=32 need 32 values, invsparse = 32 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling sparse +32 nonzeroes left to fill.. +2 nonzeroes left to fill.. + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 512 bytes +32 slots to fill +all pairs to bucket 5, no filling +done assigning buckets + calling stringify semiring: 0x7f1fea02db00 +inside enumify: 0x7f1fea02db00 + + GraphBLAS Semiring: semiring (user-defined) + GraphBLAS Monoid: semiring->add (built-in) + GraphBLAS BinaryOp: monoid->op (built-in) z=plus(x,y) + GraphBLAS type: ztype int32_t size: 4 + GraphBLAS type: xtype int32_t size: 4 + GraphBLAS type: ytype int32_t size: 4 + identity: [ 0 ] + + GraphBLAS BinaryOp: semiring->multiply (built-in) z=times(x,y) + GraphBLAS type: ztype int32_t size: 4 + GraphBLAS type: xtype int32_t size: 4 + GraphBLAS type: ytype int32_t size: 4 +Getting semiring add +Getting semiring mult +Getting semiring add op +Getting types +Getting opcodes +Getting typecodes +Performing asserts +Invoking boolean rename +Invoking boolean rename +Invoking enumify binop +e 14 +Invoking enumify monoid +Calling enumify binop +Inside plus binop code +e 11 +Calling enumify identity +Calling enumify terminal +Done enumify monoid +Done invoking enumify monoid +atype +btype +ctype +Invoking enumify_mask, mtype 0x7f2028b56f40 +GB_enumify_mask gets mcode: 6 Mask_struct: 0 Mask_comp: 0 +got mask_ecode: 8 +constructing semiring scode +before: add_ecode: 11, id_ecode: 0, term_ecode: 31, mult_ecode: 14, flipxy: 0, zcode: 6, xcode: 6, ycode: 6, mask_ecode: 8, ccode: 6, acode: 6, bcode: 6, csparsity: 0, msparsity: 0, asparsity: 0, bsparsity: 0 +serialized_scode: 397409434378593792 +done enumify semiring +scode=397409434378593792 +done stringify semiring + returned from stringify semiring +nanobuckets_size: 384 +blockbuckets_size: 12 + rmm_wrap_alloc 3072 bytes + rmm_wrap_alloc 256 bytes +A TYpe: 0x7f2028b56f40 +B TYpe: 0x7f2028b56f40 +INside get cached file +looking for prog in file /home/cjnolet/.SuiteSparse/GraphBLAS/6.3.0/GB_semiring_397409434378593792.h +opening /home/cjnolet/.SuiteSparse/GraphBLAS/6.3.0/GB_semiring_397409434378593792.h for write +about to close + read cache file /home/cjnolet/.SuiteSparse/GraphBLAS/6.3.0/GB_semiring_397409434378593792.h +successful_read: 1 +Just closed +GB_jit_AxB_phase1 +#include "/home/cjnolet/.SuiteSparse/GraphBLAS/6.3.0/GB_semiring_397409434378593792.h" +#include "GB_jit_AxB_phase1.cuh" + jit_cache get program GB_jit_AxB_phase1 +found memory-cached prog GB_jit_AxB_phase1 + got kernel instance AxB_phase1_int32_t +about to close + read cache file /home/cjnolet/.SuiteSparse/GraphBLAS/6.3.0/AxB_phase1_int32_t +--------------------------------------- +--- Linker for void AxB_phase1(long long*, long long*, GB_Matrix_opaque*, GB_Matrix_opaque*, GB_Matrix_opaque*, GB_Matrix_opaque*) --- +--------------------------------------- +info : 0 bytes gmem +info : Function properties for '_Z10AxB_phase1IiEvPxS0_P16GB_Matrix_opaqueS2_S2_S2_': +info : used 199 registers, 4576 stack, 3104 bytes smem, 400 bytes cmem[0], 0 bytes lmem + + +--------------------------------------- +Launching _Z10AxB_phase1IiEvPxS0_P16GB_Matrix_opaqueS2_S2_S2_<<<1,32,0,0>>>(long*,long*,GB_Matrix_opaque*,GB_Matrix_opaque*,GB_Matrix_opaque*,GB_Matrix_opaque*) +returned from phase1 kernel 9.86829ms +Printing Nanobuckets +0, 0, 0, 0, 1, 1, 1, 2, 2, 3, 4, 5, 6, 6, 6, 6, 7, 8, 8, 9, 10, 11, 12, 13, 13, 13, 13, 13, 13, 13, 14, 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 3, 4, 5, 5, 6, 6, 6, 6, 6, 7, 8, 9, 9, 9, 10, 10, 10, 10, 10, 10, 11, 12, 13, 14, 15, 16, 16, 17, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +Done. +Printing Blockbucket +28, 0, 34, 0, 0, 0, 0, 0, 0, 0, 0, 0, +Done. +==== phase1 done============================= +[ OK ] AxB_dot3_tests_PLUS_TIMES_1.tinyxtinyPLUS_TIMES_Cint32_tMint32_tAint32_tBint32_tXint32_tYint32_tZint32_t (11 ms) +[ RUN ] AxB_dot3_tests_PLUS_TIMES_1.smallxsmallPLUS_TIMES_Cint32_tMint32_tAint32_tBint32_tXint32_tYint32_tZint32_t +found device 0 +inside fill, using seed 12345 +fill_random nrows=1024ncols=1024 need 1048576 values, invsparse = 1 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling dense + rmm_wrap_alloc 16384 bytes + rmm_wrap_alloc 16384 bytes + rmm_wrap_alloc 524288 bytes + rmm_wrap_alloc 524288 bytes + rmm_wrap_alloc 1048576 bytes + rmm_wrap_alloc 1048576 bytes + rmm_wrap_alloc 524288 bytes + rmm_wrap_alloc 2097152 bytes + rmm_wrap_alloc 2097152 bytes + rmm_wrap_alloc 1048576 bytes + rmm_wrap_alloc 4194304 bytes + rmm_wrap_alloc 4194304 bytes + rmm_wrap_alloc 2097152 bytes + rmm_wrap_alloc 8388608 bytes + rmm_wrap_alloc 8388608 bytes + rmm_wrap_alloc 4194304 bytes + rmm_wrap_alloc 8388608 bytes + rmm_wrap_alloc 4194304 bytes + rmm_wrap_alloc 8388608 bytes +inside fill, using seed 54321 +fill_random nrows=1024ncols=1024 need 1048576 values, invsparse = 1 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling dense + rmm_wrap_alloc 16384 bytes + rmm_wrap_alloc 524288 bytes + rmm_wrap_alloc 524288 bytes + rmm_wrap_alloc 1048576 bytes + rmm_wrap_alloc 1048576 bytes + rmm_wrap_alloc 524288 bytes + rmm_wrap_alloc 2097152 bytes + rmm_wrap_alloc 2097152 bytes + rmm_wrap_alloc 1048576 bytes + rmm_wrap_alloc 4194304 bytes + rmm_wrap_alloc 4194304 bytes + rmm_wrap_alloc 2097152 bytes + rmm_wrap_alloc 8388608 bytes + rmm_wrap_alloc 8388608 bytes + rmm_wrap_alloc 4194304 bytes + rmm_wrap_alloc 8388608 bytes + rmm_wrap_alloc 4194304 bytes + rmm_wrap_alloc 8388608 bytes +filling matrices +inside fill, using seed 4567 +fill_random nrows=1024ncols=1024 need 1024 values, invsparse = 1024 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling sparse +1024 nonzeroes left to fill.. +35 nonzeroes left to fill.. +inside fill, using seed 4567 +fill_random nrows=1024ncols=1024 need 1024 values, invsparse = 1024 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling sparse +1024 nonzeroes left to fill.. +35 nonzeroes left to fill.. + rmm_wrap_alloc 8192 bytes + rmm_wrap_alloc 16384 bytes +1024 slots to fill +all pairs to bucket 5, no filling +done assigning buckets + calling stringify semiring: 0x7f1fea03ef00 +inside enumify: 0x7f1fea03ef00 + + GraphBLAS Semiring: semiring (user-defined) + GraphBLAS Monoid: semiring->add (built-in) + GraphBLAS BinaryOp: monoid->op (built-in) z=plus(x,y) + GraphBLAS type: ztype int32_t size: 4 + GraphBLAS type: xtype int32_t size: 4 + GraphBLAS type: ytype int32_t size: 4 + identity: [ 0 ] + + GraphBLAS BinaryOp: semiring->multiply (built-in) z=times(x,y) + GraphBLAS type: ztype int32_t size: 4 + GraphBLAS type: xtype int32_t size: 4 + GraphBLAS type: ytype int32_t size: 4 +Getting semiring add +Getting semiring mult +Getting semiring add op +Getting types +Getting opcodes +Getting typecodes +Performing asserts +Invoking boolean rename +Invoking boolean rename +Invoking enumify binop +e 14 +Invoking enumify monoid +Calling enumify binop +Inside plus binop code +e 11 +Calling enumify identity +Calling enumify terminal +Done enumify monoid +Done invoking enumify monoid +atype +btype +ctype +Invoking enumify_mask, mtype 0x7f2028b56f40 +GB_enumify_mask gets mcode: 6 Mask_struct: 0 Mask_comp: 0 +got mask_ecode: 8 +constructing semiring scode +before: add_ecode: 11, id_ecode: 0, term_ecode: 31, mult_ecode: 14, flipxy: 0, zcode: 6, xcode: 6, ycode: 6, mask_ecode: 8, ccode: 6, acode: 6, bcode: 6, csparsity: 0, msparsity: 0, asparsity: 0, bsparsity: 0 +serialized_scode: 397409434378593792 +done enumify semiring +scode=397409434378593792 +done stringify semiring + returned from stringify semiring +nanobuckets_size: 3072 +blockbuckets_size: 96 + rmm_wrap_alloc 24576 bytes + rmm_wrap_alloc 768 bytes +A TYpe: 0x7f2028b56f40 +B TYpe: 0x7f2028b56f40 +INside get cached file +looking for prog in file /home/cjnolet/.SuiteSparse/GraphBLAS/6.3.0/GB_semiring_397409434378593792.h +opening /home/cjnolet/.SuiteSparse/GraphBLAS/6.3.0/GB_semiring_397409434378593792.h for write +about to close + read cache file /home/cjnolet/.SuiteSparse/GraphBLAS/6.3.0/GB_semiring_397409434378593792.h +successful_read: 1 +Just closed +GB_jit_AxB_phase1 +#include "/home/cjnolet/.SuiteSparse/GraphBLAS/6.3.0/GB_semiring_397409434378593792.h" +#include "GB_jit_AxB_phase1.cuh" + jit_cache get program GB_jit_AxB_phase1 +found memory-cached prog GB_jit_AxB_phase1 + got kernel instance AxB_phase1_int32_t +found memory-cached prog AxB_phase1_int32_t +Launching _Z10AxB_phase1IiEvPxS0_P16GB_Matrix_opaqueS2_S2_S2_<<<8,32,0,0>>>(long*,long*,GB_Matrix_opaque*,GB_Matrix_opaque*,GB_Matrix_opaque*,GB_Matrix_opaque*) +returned from phase1 kernel 2.08486ms +Printing Nanobuckets +0, 3, 4, 6, 9, 12, 13, 15, 18, 20, 21, 23, 26, 30, 32, 35, 37, 38, 40, 43, 45, 45, 48, 51, 54, 55, 58, 58, 61, 61, 63, 66, 0, 1, 4, 6, 7, 8, 11, 13, 14, 16, 19, 21, 22, 22, 24, 25, 27, 30, 32, 33, 35, 39, 40, 41, 42, 45, 46, 50, 51, 55, 57, 58, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 4, 8, 12, 15, 19, 19, 22, 23, 25, 27, 30, 32, 33, 34, 37, 39, 40, 42, 45, 48, 50, 50, 54, 57, 61, 63, 65, 67, 68, 69, 0, 2, 4, 4, 4, 5, 5, 9, 10, 13, 15, 17, 18, 20, 23, 26, 27, 29, 32, 34, 35, 36, 38, 42, 42, 43, 43, 45, 47, 49, 52, 55, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 4, 7, 8, 9, 11, 12, 15, 16, 18, 19, 22, 25, 27, 29, 31, 34, 35, 37, 38, 40, 41, 44, 46, 48, 50, 53, 56, 57, 58, 0, 3, 5, 8, 9, 12, 15, 17, 20, 21, 24, 26, 29, 30, 31, 33, 35, 37, 38, 41, 43, 46, 48, 51, 52, 54, 56, 58, 59, 60, 63, 66, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 6, 9, 11, 14, 15, 17, 18, 19, 22, 24, 25, 28, 31, 32, 33, 36, 38, 41, 45, 46, 47, 48, 50, 52, 54, 55, 55, 58, 60, 62, 0, 2, 2, 3, 5, 6, 9, 11, 14, 17, 18, 20, 23, 24, 25, 28, 31, 32, 34, 35, 35, 38, 41, 44, 46, 48, 50, 53, 57, 58, 60, 62, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 4, 7, 8, 12, 14, 15, 16, 20, 23, 26, 29, 32, 34, 35, 38, 40, 41, 45, 47, 48, 50, 53, 57, 59, 61, 63, 66, 68, 71, 72, 0, 1, 4, 5, 8, 8, 10, 13, 16, 16, 17, 18, 19, 20, 22, 25, 26, 28, 31, 31, 33, 36, 38, 39, 39, 41, 43, 45, 46, 48, 49, 52, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 3, 5, 8, 10, 11, 12, 13, 15, 16, 18, 20, 24, 24, 26, 28, 29, 30, 32, 34, 35, 35, 36, 39, 42, 43, 46, 48, 50, 52, 0, 3, 5, 9, 11, 12, 14, 17, 20, 23, 25, 28, 30, 32, 32, 36, 38, 40, 43, 46, 48, 50, 53, 57, 60, 61, 62, 65, 66, 68, 70, 72, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 5, 8, 10, 11, 11, 12, 13, 13, 14, 17, 17, 20, 21, 23, 26, 29, 31, 33, 35, 35, 37, 40, 42, 46, 49, 53, 54, 58, 59, 60, 0, 1, 3, 4, 6, 9, 13, 16, 19, 23, 26, 27, 31, 32, 35, 37, 38, 39, 41, 43, 45, 49, 51, 52, 54, 54, 55, 55, 58, 58, 61, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 4, 7, 9, 11, 13, 15, 18, 20, 22, 24, 25, 27, 27, 28, 28, 29, 31, 35, 38, 39, 41, 44, 47, 50, 52, 54, 58, 58, 59, 59, 0, 3, 4, 5, 7, 9, 11, 13, 14, 16, 18, 20, 23, 25, 29, 32, 36, 39, 41, 41, 42, 45, 47, 48, 49, 50, 52, 54, 54, 58, 61, 65, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +Done. +Printing Blockbucket +132, 138, 116, 124, 144, 104, 120, 118, 116, 110, 132, 124, 104, 144, 128, 130, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +Done. +==== phase1 done============================= +[ OK ] AxB_dot3_tests_PLUS_TIMES_1.smallxsmallPLUS_TIMES_Cint32_tMint32_tAint32_tBint32_tXint32_tYint32_tZint32_t (230 ms) +[----------] 4 tests from AxB_dot3_tests_PLUS_TIMES_1 (489 ms total) + +[----------] 4 tests from AxB_dot3_tests_PLUS_TIMES_2 +[ RUN ] AxB_dot3_tests_PLUS_TIMES_2.tinyxtinyPLUS_TIMES_Cint32_tMboolAint32_tBint32_tXint32_tYint32_tZint32_t +found device 0 +inside fill, using seed 12345 +fill_random nrows=32ncols=32 need 1024 values, invsparse = 1 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling dense + rmm_wrap_alloc 512 bytes + rmm_wrap_alloc 512 bytes +inside fill, using seed 54321 +fill_random nrows=32ncols=32 need 1024 values, invsparse = 1 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling dense + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 512 bytes +filling matrices +inside fill, using seed 4567 +fill_random nrows=32ncols=32 need 32 values, invsparse = 32 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling sparse +32 nonzeroes left to fill.. +2 nonzeroes left to fill.. + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes +inside fill, using seed 4567 +fill_random nrows=32ncols=32 need 32 values, invsparse = 32 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling sparse +32 nonzeroes left to fill.. +2 nonzeroes left to fill.. + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes +32 slots to fill +all pairs to bucket 5, no filling +done assigning buckets + rmm_wrap_alloc 3072 bytes + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes +nthrd: 32, ntasks: 1 +Printing nanobuckets +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +Done. +Printing blockbucket +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +Done. +GB_jit_AxB_phase2 +#include "GB_jit_AxB_phase2.cuh" + jit_cache get program GB_jit_AxB_phase2 +about to close + read cache file /home/cjnolet/.SuiteSparse/GraphBLAS/6.3.0/GB_jit_AxB_phase2 + got kernel instance AxB_phase2 +about to close + read cache file /home/cjnolet/.SuiteSparse/GraphBLAS/6.3.0/AxB_phase2 +--------------------------------------- +--- Linker for AxB_phase2(long long*, long long*, int) --- +--------------------------------------- +info : 39 bytes gmem +info : Function properties for '_Z10AxB_phase2PxS_i': +info : used 88 registers, 720 stack, 32 bytes smem, 372 bytes cmem[0], 0 bytes lmem + + +--------------------------------------- +Launching _Z10AxB_phase2PxS_i<<<1,32,0,0>>>(long*,long*,int) +s_0: 1, s_1=1, s_10=1, s_11=1 +GB_jit_AxB_phase2end +#include "GB_jit_AxB_phase2end.cuh" + jit_cache get program GB_jit_AxB_phase2end +about to close + read cache file /home/cjnolet/.SuiteSparse/GraphBLAS/6.3.0/GB_jit_AxB_phase2end + got kernel instance AxB_phase2end +about to close + read cache file /home/cjnolet/.SuiteSparse/GraphBLAS/6.3.0/AxB_phase2end +--------------------------------------- +--- Linker for AxB_phase2end(long long*, long long const*, long long const*, long long*, long long const*, GB_Matrix_opaque*, long long) --- +--------------------------------------- +info : 0 bytes gmem +info : Function properties for '_Z13AxB_phase2endPxPKxS1_S_S1_P16GB_Matrix_opaquex': +info : used 107 registers, 0 stack, 0 bytes smem, 408 bytes cmem[0], 0 bytes lmem + + +--------------------------------------- +Launching _Z13AxB_phase2endPxPKxS1_S_S1_P16GB_Matrix_opaquex<<<1,32,0,0>>>(long*,long*,long*,long*,long*,GB_Matrix_opaque*,long) +Printing bucketp +0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, +Done. +Printing bucket +0, 16, 5, 8, 9, 12, 15, 17, 20, 21, 24, 26, 29, 30, 31, 33, 35, 37, 38, 41, 43, 46, 48, 51, 52, 54, 56, 58, 59, 60, 63, 66, +Done. +phase2 kernel done ================== +[ OK ] AxB_dot3_tests_PLUS_TIMES_2.tinyxtinyPLUS_TIMES_Cint32_tMboolAint32_tBint32_tXint32_tYint32_tZint32_t (17 ms) +[ RUN ] AxB_dot3_tests_PLUS_TIMES_2.smallxsmallPLUS_TIMES_Cint32_tMboolAint32_tBint32_tXint32_tYint32_tZint32_t +found device 0 +inside fill, using seed 12345 +fill_random nrows=1024ncols=1024 need 1048576 values, invsparse = 1 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling dense + rmm_wrap_alloc 1024 bytes + rmm_wrap_alloc 16384 bytes + rmm_wrap_alloc 16384 bytes + rmm_wrap_alloc 524288 bytes + rmm_wrap_alloc 524288 bytes + rmm_wrap_alloc 1048576 bytes + rmm_wrap_alloc 1048576 bytes + rmm_wrap_alloc 524288 bytes + rmm_wrap_alloc 2097152 bytes + rmm_wrap_alloc 2097152 bytes + rmm_wrap_alloc 1048576 bytes + rmm_wrap_alloc 4194304 bytes + rmm_wrap_alloc 4194304 bytes + rmm_wrap_alloc 2097152 bytes + rmm_wrap_alloc 8388608 bytes + rmm_wrap_alloc 8388608 bytes + rmm_wrap_alloc 4194304 bytes + rmm_wrap_alloc 8388608 bytes + rmm_wrap_alloc 4194304 bytes + rmm_wrap_alloc 8388608 bytes +inside fill, using seed 54321 +fill_random nrows=1024ncols=1024 need 1048576 values, invsparse = 1 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling dense + rmm_wrap_alloc 16384 bytes + rmm_wrap_alloc 524288 bytes + rmm_wrap_alloc 524288 bytes + rmm_wrap_alloc 1048576 bytes + rmm_wrap_alloc 1048576 bytes + rmm_wrap_alloc 524288 bytes + rmm_wrap_alloc 2097152 bytes + rmm_wrap_alloc 2097152 bytes + rmm_wrap_alloc 1048576 bytes + rmm_wrap_alloc 4194304 bytes + rmm_wrap_alloc 4194304 bytes + rmm_wrap_alloc 2097152 bytes + rmm_wrap_alloc 8388608 bytes + rmm_wrap_alloc 8388608 bytes + rmm_wrap_alloc 4194304 bytes + rmm_wrap_alloc 8388608 bytes + rmm_wrap_alloc 4194304 bytes +filling matrices +inside fill, using seed 4567 +fill_random nrows=1024ncols=1024 need 1024 values, invsparse = 1024 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling sparse +1024 nonzeroes left to fill.. +35 nonzeroes left to fill.. +inside fill, using seed 4567 +fill_random nrows=1024ncols=1024 need 1024 values, invsparse = 1024 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling sparse +1024 nonzeroes left to fill.. +35 nonzeroes left to fill.. + rmm_wrap_alloc 4096 bytes +1024 slots to fill +all pairs to bucket 5, no filling +done assigning buckets + rmm_wrap_alloc 24576 bytes + rmm_wrap_alloc 768 bytes + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 8192 bytes + rmm_wrap_alloc 256 bytes +nthrd: 32, ntasks: 8 +Printing nanobuckets +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +Done. +Printing blockbucket +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +Done. +GB_jit_AxB_phase2 +#include "GB_jit_AxB_phase2.cuh" + jit_cache get program GB_jit_AxB_phase2 +found memory-cached prog GB_jit_AxB_phase2 + got kernel instance AxB_phase2 +found memory-cached prog AxB_phase2 +Launching _Z10AxB_phase2PxS_i<<<1,32,0,0>>>(long*,long*,int) +s_0: 1, s_1=1, s_10=1, s_11=1 +GB_jit_AxB_phase2end +#include "GB_jit_AxB_phase2end.cuh" + jit_cache get program GB_jit_AxB_phase2end +found memory-cached prog GB_jit_AxB_phase2end + got kernel instance AxB_phase2end +found memory-cached prog AxB_phase2end +Launching _Z13AxB_phase2endPxPKxS1_S_S1_P16GB_Matrix_opaquex<<<8,32,0,0>>>(long*,long*,long*,long*,long*,GB_Matrix_opaque*,long) +Printing bucketp +0, 2, 4, 4294967302, 4294967303, 8589934600, 12884901897, 17179869194, 17179869195, 21474836492, 21474836493, 25769803791, +Done. +Printing bucket +1, 656, 816, 848, 880, 1, 4294967296, 0, 0, 4294967296, 1, 1, 4294967297, 4294967296, 4294967296, 4294967296, 1, 4294967297, 0, 0, 0, 0, 0, 1, 4294967296, 0, 1, 1, 0, 4294967297, 4294967296, 0, 0, 4294967296, 4294967296, 0, 1, 1, 0, 4294967296, 0, 4294967296, 1, 1, 1, 1, 4294967296, 4294967297, 0, 4294967297, 4294967296, 0, 0, 0, 4294967297, 1, 1, 4294967297, 4294967296, 0, 4294967296, 0, 0, 4294967296, 4294967297, 4294967296, 1, 0, 0, 4294967297, 4294967296, 4294967296, 4294967297, 0, 0, 4294967297, 0, 1, 4294967297, 4294967297, 4294967296, 4294967297, 4294967296, 4294967297, 4294967296, 4294967296, 4294967297, 4294967296, 1, 4294967297, 4294967296, 4294967297, 0, 4294967296, 4294967296, 4294967296, 0, 4294967296, 4294967296, 4294967297, 4294967297, 4294967296, 1, 4294967296, 4294967296, 4294967296, 4294967296, 0, 4294967297, 1, 4294967296, 0, 4294967296, 4294967296, 4294967297, 0, 1, 4294967297, 0, 0, 0, 4294967296, 4294967297, 4294967297, 0, 4294967296, 4294967297, 1, 0, 0, 0, 0, 1, 4294967297, 1, 4294967296, 0, 4294967296, 4294967296, 4294967296, 1, 4294967297, 1, 4294967296, 4294967296, 0, 4294967297, 4294967297, 1, 0, 4294967296, 0, 1, 0, 1, 0, 0, 0, 4294967297, 0, 1, 1, 4294967296, 1, 4294967297, 4294967297, 1, 4294967296, 4294967297, 4294967297, 1, 0, 0, 4294967297, 4294967296, 0, 4294967297, 4294967297, 4294967297, 4294967296, 4294967296, 0, 0, 1, 0, 0, 1, 4294967296, 1, 4294967296, 4294967296, 1, 4294967297, 1, 4294967297, 1, 4294967296, 4294967297, 4294967297, 4294967296, 4294967296, 4294967297, 4294967296, 4294967296, 1, 4294967297, 4294967297, 0, 4294967297, 4294967296, 0, 0, 1, 4294967296, 0, 4294967297, 4294967297, 1, 0, 4294967296, 0, 4294967296, 0, 1, 4294967296, 1, 4294967297, 1, 0, 1, 4294967296, 1, 4294967296, 1, 4294967297, 4294967296, 0, 4294967296, 4294967296, 0, 1, 0, 4294967297, 4294967297, 1, 4294967297, 4294967296, 1, 0, 4294967297, 4294967296, 0, 0, 4294967296, 0, 0, 4294967297, 1, 0, 4294967296, 4294967297, 1, 4294967297, 4294967297, 0, 4294967296, 1, 4294967297, 4294967297, 4294967297, 4294967296, 0, 4294967297, 4294967297, 1, 0, 4294967297, 4294967297, 0, 4294967296, 0, 0, 4294967296, 1, 4294967297, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 4294967297, 4294967297, 1, 0, 4294967296, 1, 4294967297, 1, 0, 4294967297, 1, 0, 4294967297, 4294967297, 4294967297, 1, 1, 0, 4294967296, 4294967297, 4294967296, 4294967297, 0, 4294967297, 1, 0, 1, 1, 1, 4294967297, 4294967297, 1, 4294967296, 4294967297, 1, 1, 4294967297, 4294967296, 4294967296, 4294967296, 4294967297, 4294967296, 0, 0, 4294967296, 0, 1, 1, 4294967297, 4294967297, 4294967297, 1, 1, 0, 1, 1, 4294967296, 4294967296, 0, 0, 0, 4294967297, 4294967296, 0, 4294967297, 4294967296, 4294967297, 0, 4294967297, 1, 4294967296, 1, 1, 1, 4294967296, 4294967297, 1, 0, 4294967297, 0, 0, 4294967297, 0, 4294967296, 1, 4294967297, 4294967296, 0, 4294967296, 4294967297, 0, 1, 4294967296, 4294967297, 1, 4294967296, 0, 4294967296, 0, 4294967296, 4294967296, 0, 0, 0, 0, 0, 4294967297, 4294967296, 4294967296, 4294967297, 0, 4294967297, 4294967296, 0, 1, 1, 4294967297, 1, 4294967296, 1, 0, 4294967297, 1, 1, 4294967296, 0, 0, 4294967296, 4294967296, 0, 4294967297, 0, 4294967297, 4294967297, 0, 4294967297, 0, 1, 0, 1, 4294967297, 1, 1, 1, 0, 4294967296, 4294967296, 4294967297, 1, 4294967296, 1, 0, 4294967297, 4294967296, 0, 4294967297, 4294967296, 1, 4294967296, 4294967297, 0, 4294967296, 0, 0, 0, 0, 4294967297, 4294967296, 4294967296, 4294967297, 0, 1, 4294967296, 4294967296, 0, 4294967297, 1, 4294967297, 0, 1, 4294967296, 1, 4294967296, 4294967297, 4294967296, 0, 1, 4294967296, 4294967296, 4294967297, 0, 0, 4294967296, 0, 0, 4294967297, 4294967297, 0, 0, 4294967297, 4294967296, 4294967297, 0, 0, 1, 4294967297, 0, 4294967297, 0, 0, 4294967297, 0, 0, 4294967296, 0, 0, 4294967297, 1, 4294967297, 4294967297, 4294967297, 0, 0, 4294967296, 4294967297, 0, 4294967297, 1, 0, 0, 0, 4294967297, 4294967296, 0, 0, 4294967297, 1, 4294967297, 1, 1, 4294967296, 0, 4294967296, 0, 1, 0, 4294967297, 1, 4294967296, 0, 4294967296, 4294967297, 4294967297, 4294967297, 0, 1, 1, 4294967296, 4294967296, 0, 4294967296, 0, 4294967296, 4294967297, 4294967296, 4294967297, 4294967297, 1, 4294967297, 4294967296, 0, 0, 4294967297, 4294967297, 4294967296, 4294967296, 4294967297, 4294967296, 4294967296, 0, 1, 4294967296, 4294967296, 1, 4294967297, 4294967296, 0, 0, 4294967297, 1, 1, 4294967297, 4294967296, 1, 0, 1, 4294967296, 4294967297, 4294967297, 4294967297, 1, 1, 0, 4294967297, 4294967296, 0, 4294967296, 1, 1, 0, 4294967297, 4294967297, 0, 4294967296, 4294967297, 0, 4294967297, 4294967297, 4294967297, 4294967296, 0, 0, 4294967297, 0, 4294967296, 4294967297, 4294967296, 0, 4294967297, 0, 4294967297, 4294967296, 4294967297, 4294967296, 0, 1, 4294967297, 4294967296, 4294967297, 0, 4294967297, 4294967296, 1, 4294967297, 4294967296, 0, 1, 0, 4294967297, 1, 4294967296, 0, 4294967296, 4294967297, 1, 4294967297, 0, 1, 4294967296, 4294967296, 4294967296, 4294967297, 4294967297, 4294967297, 4294967296, 4294967296, 4294967297, 4294967297, 1, 0, 1, 0, 4294967297, 0, 1, 0, 4294967296, 4294967296, 4294967296, 4294967296, 0, 1, 1, 4294967297, 1, 1, 4294967297, 1, 4294967296, 4294967297, 4294967297, 4294967296, 1, 4294967297, 4294967296, 1, 1, 1, 1, 1, 0, 4294967296, 4294967297, 1, 0, 1, 4294967297, 4294967296, 0, 0, 4294967297, 4294967297, 1, 4294967297, 1, 1, 4294967297, 1, 4294967297, 0, 1, 1, 4294967297, 1, 4294967297, 4294967297, 4294967297, 4294967297, 4294967296, 0, 0, 4294967297, 0, 4294967296, 0, 0, 0, 4294967297, 1, 4294967297, 0, 4294967296, 0, 0, 4294967296, 1, 0, 4294967296, 0, 0, 4294967296, 4294967296, 1, 4294967296, 4294967297, 0, 4294967297, 0, 0, 0, 0, 1, 1, 4294967296, 4294967297, 4294967296, 4294967297, 1, 4294967296, 4294967297, 4294967296, 0, 4294967297, 0, 0, 0, 0, 0, 1, 4294967296, 4294967296, 4294967297, 4294967296, 4294967296, 4294967296, 1, 4294967296, 4294967296, 1, 1, 0, 0, 4294967297, 4294967296, 4294967296, 4294967297, 1, 0, 4294967297, 4294967296, 0, 4294967296, 4294967296, 1, 0, 4294967296, 4294967296, 4294967297, 0, 0, 4294967296, 1, 4294967297, 0, 4294967296, 4294967296, 4294967297, 4294967296, 4294967297, 4294967296, 0, 1, 4294967297, 1, 1, 0, 0, 1, 0, 1, 1, 4294967296, 0, 1, 0, 1, 4294967297, 1, 1, 4294967296, 1, 0, 4294967297, 0, 0, 4294967296, 1, 4294967297, 0, 4294967296, 0, 0, 4294967297, 4294967296, 4294967297, 1, 1, 4294967296, 4294967297, 4294967297, 4294967296, 4294967296, 4294967296, 1, 0, 4294967296, 4294967297, 0, 1, 4294967296, 4294967296, 1, 0, 4294967296, 0, 1, 1, 4294967297, 4294967296, 4294967297, 4294967296, 4294967296, 1, 4294967297, 0, 4294967296, 0, 0, 4294967297, 4294967296, 1, 0, 0, 1, 4294967296, 4294967297, 4294967297, 1, 1, 4294967296, 0, 4294967297, 0, 0, 0, 1, 0, 4294967297, 4294967296, 1, 0, 4294967297, 4294967296, 4294967296, 4294967297, 4294967296, 4294967297, 4294967296, 0, 1, 1, 0, 0, 4294967296, 4294967297, 1, 1, 0, 1, 0, 4294967296, 4294967297, 4294967297, 0, 0, 1, 1, 1, 4294967296, 0, 1, 4294967297, 4294967296, 1, 4294967296, 1, 4294967296, 4294967297, 1, 1, 1, 0, 1, 1, 4294967297, 4294967297, 1, 4294967297, 0, 1, 4294967296, 0, 4294967297, 0, 4294967297, 4294967296, 4294967297, 4294967297, 1, 4294967297, 4294967296, 4294967296, 1, 4294967296, 4294967296, 4294967296, 4294967296, 4294967296, 0, 0, 0, 1, 0, 0, 1, 0, 0, 4294967297, 1, 4294967296, 4294967296, 4294967296, 1, 1, 4294967297, 4294967297, 4294967296, 4294967296, 4294967297, 4294967296, 4294967297, 4294967296, 0, 0, 0, 0, 4294967297, 1, 1, 0, 4294967296, 4294967296, 1, 0, 4294967297, 0, 1, 4294967297, 1, 0, 4294967296, 4294967297, 4294967297, 4294967297, 4294967296, 4294967296, 0, 1, +Done. +phase2 kernel done ================== + rmm_wrap_alloc 1048576 bytes + rmm_wrap_alloc 4194304 bytes +[ OK ] AxB_dot3_tests_PLUS_TIMES_2.smallxsmallPLUS_TIMES_Cint32_tMboolAint32_tBint32_tXint32_tYint32_tZint32_t (226 ms) +[ RUN ] AxB_dot3_tests_PLUS_TIMES_2.tinyxtinyPLUS_TIMES_Cint32_tMint32_tAint32_tBint32_tXint32_tYint32_tZint32_t +found device 0 +inside fill, using seed 12345 +fill_random nrows=32ncols=32 need 1024 values, invsparse = 1 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling dense + rmm_wrap_alloc 512 bytes + rmm_wrap_alloc 512 bytes +inside fill, using seed 54321 +fill_random nrows=32ncols=32 need 1024 values, invsparse = 1 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling dense + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 512 bytes +filling matrices +inside fill, using seed 4567 +fill_random nrows=32ncols=32 need 32 values, invsparse = 32 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling sparse +32 nonzeroes left to fill.. +2 nonzeroes left to fill.. + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes +inside fill, using seed 4567 +fill_random nrows=32ncols=32 need 32 values, invsparse = 32 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling sparse +32 nonzeroes left to fill.. +2 nonzeroes left to fill.. + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes +32 slots to fill +all pairs to bucket 5, no filling +done assigning buckets + rmm_wrap_alloc 3072 bytes + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes +nthrd: 32, ntasks: 1 +Printing nanobuckets +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +Done. +Printing blockbucket +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +Done. +GB_jit_AxB_phase2 +#include "GB_jit_AxB_phase2.cuh" + jit_cache get program GB_jit_AxB_phase2 +found memory-cached prog GB_jit_AxB_phase2 + got kernel instance AxB_phase2 +found memory-cached prog AxB_phase2 +Launching _Z10AxB_phase2PxS_i<<<1,32,0,0>>>(long*,long*,int) +s_0: 1, s_1=1, s_10=1, s_11=1 +GB_jit_AxB_phase2end +#include "GB_jit_AxB_phase2end.cuh" + jit_cache get program GB_jit_AxB_phase2end +found memory-cached prog GB_jit_AxB_phase2end + got kernel instance AxB_phase2end +found memory-cached prog AxB_phase2end +Launching _Z13AxB_phase2endPxPKxS1_S_S1_P16GB_Matrix_opaquex<<<1,32,0,0>>>(long*,long*,long*,long*,long*,GB_Matrix_opaque*,long) +Printing bucketp +0, 1, 2, 4294967299, 4294967300, 4294967302, 4294967303, 8589934600, 8589934601, 8589934603, 8589934605, 8589934606, +Done. +Printing bucket +0, 16, 4294967296, 1, 1, 1, 0, 0, 4294967297, 1, 0, 4294967296, 0, 4294967296, 4294967296, 4294967297, 1, 4294967296, 4294967297, 4294967296, 4294967297, 4294967296, 0, 0, 4294967297, 4294967297, 4294967296, 1, 0, 1, 4294967296, 4294967296, +Done. +phase2 kernel done ================== +[ OK ] AxB_dot3_tests_PLUS_TIMES_2.tinyxtinyPLUS_TIMES_Cint32_tMint32_tAint32_tBint32_tXint32_tYint32_tZint32_t (2 ms) +[ RUN ] AxB_dot3_tests_PLUS_TIMES_2.smallxsmallPLUS_TIMES_Cint32_tMint32_tAint32_tBint32_tXint32_tYint32_tZint32_t +found device 0 +inside fill, using seed 12345 +fill_random nrows=1024ncols=1024 need 1048576 values, invsparse = 1 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling dense + rmm_wrap_alloc 1024 bytes + rmm_wrap_alloc 16384 bytes + rmm_wrap_alloc 16384 bytes + rmm_wrap_alloc 524288 bytes + rmm_wrap_alloc 524288 bytes + rmm_wrap_alloc 1048576 bytes + rmm_wrap_alloc 1048576 bytes + rmm_wrap_alloc 524288 bytes + rmm_wrap_alloc 2097152 bytes + rmm_wrap_alloc 2097152 bytes + rmm_wrap_alloc 1048576 bytes + rmm_wrap_alloc 4194304 bytes + rmm_wrap_alloc 4194304 bytes + rmm_wrap_alloc 2097152 bytes + rmm_wrap_alloc 8388608 bytes + rmm_wrap_alloc 8388608 bytes + rmm_wrap_alloc 4194304 bytes + rmm_wrap_alloc 8388608 bytes + rmm_wrap_alloc 4194304 bytes + rmm_wrap_alloc 8388608 bytes +inside fill, using seed 54321 +fill_random nrows=1024ncols=1024 need 1048576 values, invsparse = 1 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling dense + rmm_wrap_alloc 16384 bytes + rmm_wrap_alloc 524288 bytes + rmm_wrap_alloc 524288 bytes + rmm_wrap_alloc 1048576 bytes + rmm_wrap_alloc 1048576 bytes + rmm_wrap_alloc 524288 bytes + rmm_wrap_alloc 2097152 bytes + rmm_wrap_alloc 2097152 bytes + rmm_wrap_alloc 1048576 bytes + rmm_wrap_alloc 4194304 bytes + rmm_wrap_alloc 4194304 bytes + rmm_wrap_alloc 2097152 bytes + rmm_wrap_alloc 8388608 bytes + rmm_wrap_alloc 8388608 bytes + rmm_wrap_alloc 4194304 bytes + rmm_wrap_alloc 8388608 bytes + rmm_wrap_alloc 4194304 bytes +filling matrices +inside fill, using seed 4567 +fill_random nrows=1024ncols=1024 need 1024 values, invsparse = 1024 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling sparse +1024 nonzeroes left to fill.. +35 nonzeroes left to fill.. +inside fill, using seed 4567 +fill_random nrows=1024ncols=1024 need 1024 values, invsparse = 1024 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling sparse +1024 nonzeroes left to fill.. +35 nonzeroes left to fill.. + rmm_wrap_alloc 4096 bytes +1024 slots to fill +all pairs to bucket 5, no filling +done assigning buckets + rmm_wrap_alloc 24576 bytes + rmm_wrap_alloc 768 bytes + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 8192 bytes + rmm_wrap_alloc 256 bytes +nthrd: 32, ntasks: 8 +Printing nanobuckets +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +Done. +Printing blockbucket +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +Done. +GB_jit_AxB_phase2 +#include "GB_jit_AxB_phase2.cuh" + jit_cache get program GB_jit_AxB_phase2 +found memory-cached prog GB_jit_AxB_phase2 + got kernel instance AxB_phase2 +found memory-cached prog AxB_phase2 +Launching _Z10AxB_phase2PxS_i<<<1,32,0,0>>>(long*,long*,int) +s_0: 1, s_1=1, s_10=1, s_11=1 +GB_jit_AxB_phase2end +#include "GB_jit_AxB_phase2end.cuh" + jit_cache get program GB_jit_AxB_phase2end +found memory-cached prog GB_jit_AxB_phase2end + got kernel instance AxB_phase2end +found memory-cached prog AxB_phase2end +Launching _Z13AxB_phase2endPxPKxS1_S_S1_P16GB_Matrix_opaquex<<<8,32,0,0>>>(long*,long*,long*,long*,long*,GB_Matrix_opaque*,long) +Printing bucketp +0, 2, 4, 4294967302, 4294967303, 8589934600, 12884901897, 17179869194, 17179869195, 21474836492, 21474836493, 25769803791, +Done. +Printing bucket +1, 16, 816, 848, 880, 1, 4294967296, 0, 0, 4294967296, 1, 1, 4294967297, 4294967296, 4294967296, 4294967296, 1, 4294967297, 0, 0, 0, 0, 0, 1, 4294967296, 0, 1, 1, 0, 4294967297, 4294967296, 0, 0, 4294967296, 4294967296, 0, 1, 1, 0, 4294967296, 0, 4294967296, 1, 1, 1, 1, 4294967296, 4294967297, 0, 4294967297, 4294967296, 0, 0, 0, 4294967297, 1, 1, 4294967297, 4294967296, 0, 4294967296, 0, 0, 4294967296, 4294967297, 4294967296, 1, 0, 0, 4294967297, 4294967296, 4294967296, 4294967297, 0, 0, 4294967297, 0, 1, 4294967297, 4294967297, 4294967296, 4294967297, 4294967296, 4294967297, 4294967296, 4294967296, 4294967297, 4294967296, 1, 4294967297, 4294967296, 4294967297, 0, 4294967296, 4294967296, 4294967296, 0, 4294967296, 4294967296, 4294967297, 4294967297, 4294967296, 1, 4294967296, 4294967296, 4294967296, 4294967296, 0, 4294967297, 1, 4294967296, 0, 4294967296, 4294967296, 4294967297, 0, 1, 4294967297, 0, 0, 0, 4294967296, 4294967297, 4294967297, 0, 4294967296, 4294967297, 1, 0, 0, 0, 0, 1, 4294967297, 1, 4294967296, 0, 4294967296, 4294967296, 4294967296, 1, 4294967297, 1, 4294967296, 4294967296, 0, 4294967297, 4294967297, 1, 0, 4294967296, 0, 1, 0, 1, 0, 0, 0, 4294967297, 0, 1, 1, 4294967296, 1, 4294967297, 4294967297, 1, 4294967296, 4294967297, 4294967297, 1, 0, 0, 4294967297, 4294967296, 0, 4294967297, 4294967297, 4294967297, 4294967296, 4294967296, 0, 0, 1, 0, 0, 1, 4294967296, 1, 4294967296, 4294967296, 1, 4294967297, 1, 4294967297, 1, 4294967296, 4294967297, 4294967297, 4294967296, 4294967296, 4294967297, 4294967296, 4294967296, 1, 4294967297, 4294967297, 0, 4294967297, 4294967296, 0, 0, 1, 4294967296, 0, 4294967297, 4294967297, 1, 0, 4294967296, 0, 4294967296, 0, 1, 4294967296, 1, 4294967297, 1, 0, 1, 4294967296, 1, 4294967296, 1, 4294967297, 4294967296, 0, 4294967296, 4294967296, 0, 1, 0, 4294967297, 4294967297, 1, 4294967297, 4294967296, 1, 0, 4294967297, 4294967296, 0, 0, 4294967296, 0, 0, 4294967297, 1, 0, 4294967296, 4294967297, 1, 4294967297, 4294967297, 0, 4294967296, 1, 4294967297, 4294967297, 4294967297, 4294967296, 0, 4294967297, 4294967297, 1, 0, 4294967297, 4294967297, 0, 4294967296, 0, 0, 4294967296, 1, 4294967297, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 4294967297, 4294967297, 1, 0, 4294967296, 1, 4294967297, 1, 0, 4294967297, 1, 0, 4294967297, 4294967297, 4294967297, 1, 1, 0, 4294967296, 4294967297, 4294967296, 4294967297, 0, 4294967297, 1, 0, 1, 1, 1, 4294967297, 4294967297, 1, 4294967296, 4294967297, 1, 1, 4294967297, 4294967296, 4294967296, 4294967296, 4294967297, 4294967296, 0, 0, 4294967296, 0, 1, 1, 4294967297, 4294967297, 4294967297, 1, 1, 0, 1, 1, 4294967296, 4294967296, 0, 0, 0, 4294967297, 4294967296, 0, 4294967297, 4294967296, 4294967297, 0, 4294967297, 1, 4294967296, 1, 1, 1, 4294967296, 4294967297, 1, 0, 4294967297, 0, 0, 4294967297, 0, 4294967296, 1, 4294967297, 4294967296, 0, 4294967296, 4294967297, 0, 1, 4294967296, 4294967297, 1, 4294967296, 0, 4294967296, 0, 4294967296, 4294967296, 0, 0, 0, 0, 0, 4294967297, 4294967296, 4294967296, 4294967297, 0, 4294967297, 4294967296, 0, 1, 1, 4294967297, 1, 4294967296, 1, 0, 4294967297, 1, 1, 4294967296, 0, 0, 4294967296, 4294967296, 0, 4294967297, 0, 4294967297, 4294967297, 0, 4294967297, 0, 1, 0, 1, 4294967297, 1, 1, 1, 0, 4294967296, 4294967296, 4294967297, 1, 4294967296, 1, 0, 4294967297, 4294967296, 0, 4294967297, 4294967296, 1, 4294967296, 4294967297, 0, 4294967296, 0, 0, 0, 0, 4294967297, 4294967296, 4294967296, 4294967297, 0, 1, 4294967296, 4294967296, 0, 4294967297, 1, 4294967297, 0, 1, 4294967296, 1, 4294967296, 4294967297, 4294967296, 0, 1, 4294967296, 4294967296, 4294967297, 0, 0, 4294967296, 0, 0, 4294967297, 4294967297, 0, 0, 4294967297, 4294967296, 4294967297, 0, 0, 1, 4294967297, 0, 4294967297, 0, 0, 4294967297, 0, 0, 4294967296, 0, 0, 4294967297, 1, 4294967297, 4294967297, 4294967297, 0, 0, 4294967296, 4294967297, 0, 4294967297, 1, 0, 0, 0, 4294967297, 4294967296, 0, 0, 4294967297, 1, 4294967297, 1, 1, 4294967296, 0, 4294967296, 0, 1, 0, 4294967297, 1, 4294967296, 0, 4294967296, 4294967297, 4294967297, 4294967297, 0, 1, 1, 4294967296, 4294967296, 0, 4294967296, 0, 4294967296, 4294967297, 4294967296, 4294967297, 4294967297, 1, 4294967297, 4294967296, 0, 0, 4294967297, 4294967297, 4294967296, 4294967296, 4294967297, 4294967296, 4294967296, 0, 1, 4294967296, 4294967296, 1, 4294967297, 4294967296, 0, 0, 4294967297, 1, 1, 4294967297, 4294967296, 1, 0, 1, 4294967296, 4294967297, 4294967297, 4294967297, 1, 1, 0, 4294967297, 4294967296, 0, 4294967296, 1, 1, 0, 4294967297, 4294967297, 0, 4294967296, 4294967297, 0, 4294967297, 4294967297, 4294967297, 4294967296, 0, 0, 4294967297, 0, 4294967296, 4294967297, 4294967296, 0, 4294967297, 0, 4294967297, 4294967296, 4294967297, 4294967296, 0, 1, 4294967297, 4294967296, 4294967297, 0, 4294967297, 4294967296, 1, 4294967297, 4294967296, 0, 1, 0, 4294967297, 1, 4294967296, 0, 4294967296, 4294967297, 1, 4294967297, 0, 1, 4294967296, 4294967296, 4294967296, 4294967297, 4294967297, 4294967297, 4294967296, 4294967296, 4294967297, 4294967297, 1, 0, 1, 0, 4294967297, 0, 1, 0, 4294967296, 4294967296, 4294967296, 4294967296, 0, 1, 1, 4294967297, 1, 1, 4294967297, 1, 4294967296, 4294967297, 4294967297, 4294967296, 1, 4294967297, 4294967296, 1, 1, 1, 1, 1, 0, 4294967296, 4294967297, 1, 0, 1, 4294967297, 4294967296, 0, 0, 4294967297, 4294967297, 1, 4294967297, 1, 1, 4294967297, 1, 4294967297, 0, 1, 1, 4294967297, 1, 4294967297, 4294967297, 4294967297, 4294967297, 4294967296, 0, 0, 4294967297, 0, 4294967296, 0, 0, 0, 4294967297, 1, 4294967297, 0, 4294967296, 0, 0, 4294967296, 1, 0, 4294967296, 0, 0, 4294967296, 4294967296, 1, 4294967296, 4294967297, 0, 4294967297, 0, 0, 0, 0, 1, 1, 4294967296, 4294967297, 4294967296, 4294967297, 1, 4294967296, 4294967297, 4294967296, 0, 4294967297, 0, 0, 0, 0, 0, 1, 4294967296, 4294967296, 4294967297, 4294967296, 4294967296, 4294967296, 1, 4294967296, 4294967296, 1, 1, 0, 0, 4294967297, 4294967296, 4294967296, 4294967297, 1, 0, 4294967297, 4294967296, 0, 4294967296, 4294967296, 1, 0, 4294967296, 4294967296, 4294967297, 0, 0, 4294967296, 1, 4294967297, 0, 4294967296, 4294967296, 4294967297, 4294967296, 4294967297, 4294967296, 0, 1, 4294967297, 1, 1, 0, 0, 1, 0, 1, 1, 4294967296, 0, 1, 0, 1, 4294967297, 1, 1, 4294967296, 1, 0, 4294967297, 0, 0, 4294967296, 1, 4294967297, 0, 4294967296, 0, 0, 4294967297, 4294967296, 4294967297, 1, 1, 4294967296, 4294967297, 4294967297, 4294967296, 4294967296, 4294967296, 1, 0, 4294967296, 4294967297, 0, 1, 4294967296, 4294967296, 1, 0, 4294967296, 0, 1, 1, 4294967297, 4294967296, 4294967297, 4294967296, 4294967296, 1, 4294967297, 0, 4294967296, 0, 0, 4294967297, 4294967296, 1, 0, 0, 1, 4294967296, 4294967297, 4294967297, 1, 1, 4294967296, 0, 4294967297, 0, 0, 0, 1, 0, 4294967297, 4294967296, 1, 0, 4294967297, 4294967296, 4294967296, 4294967297, 4294967296, 4294967297, 4294967296, 0, 1, 1, 0, 0, 4294967296, 4294967297, 1, 1, 0, 1, 0, 4294967296, 4294967297, 4294967297, 0, 0, 1, 1, 1, 4294967296, 0, 1, 4294967297, 4294967296, 1, 4294967296, 1, 4294967296, 4294967297, 1, 1, 1, 0, 1, 1, 4294967297, 4294967297, 1, 4294967297, 0, 1, 4294967296, 0, 4294967297, 0, 4294967297, 4294967296, 4294967297, 4294967297, 1, 4294967297, 4294967296, 4294967296, 1, 4294967296, 4294967296, 4294967296, 4294967296, 4294967296, 0, 0, 0, 1, 0, 0, 1, 0, 0, 4294967297, 1, 4294967296, 4294967296, 4294967296, 1, 1, 4294967297, 4294967297, 4294967296, 4294967296, 4294967297, 4294967296, 4294967297, 4294967296, 0, 0, 0, 0, 4294967297, 1, 1, 0, 4294967296, 4294967296, 1, 0, 4294967297, 0, 1, 4294967297, 1, 0, 4294967296, 4294967297, 4294967297, 4294967297, 4294967296, 4294967296, 0, 1, +Done. +phase2 kernel done ================== + rmm_wrap_alloc 1048576 bytes + rmm_wrap_alloc 4194304 bytes +[ OK ] AxB_dot3_tests_PLUS_TIMES_2.smallxsmallPLUS_TIMES_Cint32_tMint32_tAint32_tBint32_tXint32_tYint32_tZint32_t (229 ms) +[----------] 4 tests from AxB_dot3_tests_PLUS_TIMES_2 (475 ms total) + +[----------] 4 tests from AxB_dot3_tests_PLUS_TIMES_3 +[ RUN ] AxB_dot3_tests_PLUS_TIMES_3.tinyxtinyPLUS_TIMES_Cint32_tMboolAint32_tBint32_tXint32_tYint32_tZint32_t +Getting test data +Creating problem gen +filling matrices +inside fill, using seed 4567 +fill_random nrows=32ncols=32 need 32 values, invsparse = 32 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling sparse +32 nonzeroes left to fill.. +2 nonzeroes left to fill.. + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 512 bytes +inside fill, using seed 4567 +fill_random nrows=32ncols=32 need 32 values, invsparse = 32 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling sparse +32 nonzeroes left to fill.. +2 nonzeroes left to fill.. + rmm_wrap_alloc 512 bytes +inside fill, using seed 543210 +fill_random nrows=32ncols=32 need 1024 values, invsparse = 1 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling dense + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 512 bytes + rmm_wrap_alloc 512 bytes +inside fill, using seed 32 +fill_random nrows=32ncols=32 need 1024 values, invsparse = 1 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling dense + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 512 bytes +32 slots to fill +all pairs to bucket 1, no filling +done assigning buckets +Building semiring factgory + calling stringify semiring: 0x7f1ff50e0200 +inside enumify: 0x7f1ff50e0200 + + GraphBLAS Semiring: semiring (user-defined) + GraphBLAS Monoid: semiring->add (built-in) + GraphBLAS BinaryOp: monoid->op (built-in) z=plus(x,y) + GraphBLAS type: ztype int32_t size: 4 + GraphBLAS type: xtype int32_t size: 4 + GraphBLAS type: ytype int32_t size: 4 + identity: [ 0 ] + + GraphBLAS BinaryOp: semiring->multiply (built-in) z=times(x,y) + GraphBLAS type: ztype int32_t size: 4 + GraphBLAS type: xtype int32_t size: 4 + GraphBLAS type: ytype int32_t size: 4 +Getting semiring add +Getting semiring mult +Getting semiring add op +Getting types +Getting opcodes +Getting typecodes +Performing asserts +Invoking boolean rename +Invoking boolean rename +Invoking enumify binop +e 14 +Invoking enumify monoid +Calling enumify binop +Inside plus binop code +e 11 +Calling enumify identity +Calling enumify terminal +Done enumify monoid +Done invoking enumify monoid +atype +btype +ctype +Invoking enumify_mask, mtype 0x7f2028b57180 +GB_enumify_mask gets mcode: 1 Mask_struct: 0 Mask_comp: 0 +got mask_ecode: 4 +constructing semiring scode +before: add_ecode: 11, id_ecode: 0, term_ecode: 31, mult_ecode: 14, flipxy: 0, zcode: 6, xcode: 6, ycode: 6, mask_ecode: 4, ccode: 6, acode: 6, bcode: 6, csparsity: 0, msparsity: 0, asparsity: 0, bsparsity: 0 +serialized_scode: 397409434374399488 +done enumify semiring +scode=397409434374399488 +done stringify semiring + returned from stringify semiring + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes +32 slots to fill +all pairs to bucket 1, no filling +done assigning buckets +bucket 1 has 32 dots to do +LAUNCHING BUCKET CODE: 1 +INside get cached file +looking for prog in file /home/cjnolet/.SuiteSparse/GraphBLAS/6.3.0/GB_semiring_397409434374399488.h +opening /home/cjnolet/.SuiteSparse/GraphBLAS/6.3.0/GB_semiring_397409434374399488.h for write +about to close + read cache file /home/cjnolet/.SuiteSparse/GraphBLAS/6.3.0/GB_semiring_397409434374399488.h +successful_read: 1 +Just closed + jit_cache get program GB_jit_AxB_dot3_phase3_dndn +about to close + read cache file /home/cjnolet/.SuiteSparse/GraphBLAS/6.3.0/GB_jit_AxB_dot3_phase3_dndn + got kernel instance AxB_dot3_phase3_dndn_int32_t_int32_t_int32_t +about to close + read cache file /home/cjnolet/.SuiteSparse/GraphBLAS/6.3.0/AxB_dot3_phase3_dndn_int32_t_int32_t_int32_t +--------------------------------------- +--- Linker for void AxB_dot3_phase3_dndn(long long, long long, long long*, GB_Matrix_opaque*, GB_Matrix_opaque*, GB_Matrix_opaque*, GB_Matrix_opaque*, int) --- +--------------------------------------- +info : 40 bytes gmem +info : Function properties for '_Z20AxB_dot3_phase3_dndnIiiiEvxxPxP16GB_Matrix_opaqueS2_S2_S2_i': +info : used 98 registers, 320 stack, 0 bytes smem, 412 bytes cmem[0], 0 bytes lmem + + +--------------------------------------- +Launching _Z20AxB_dot3_phase3_dndnIiiiEvxxPxP16GB_Matrix_opaqueS2_S2_S2_i<<<1,32,0,0>>>(long,long,long*,GB_Matrix_opaque*,GB_Matrix_opaque*,GB_Matrix_opaque*,GB_Matrix_opaque*,int) +tid=0, i,j = 6,0 nnzA= 32, nnzB=32 +tid=0, i,j = 1,1 nnzA= 32, nnzB=32 +tid=0, i,j = 12,3 nnzA= 32, nnzB=32 +tid=0, i,j = 17,3 nnzA= 32, nnzB=32 +tid=0, i,j = 19,4 nnzA= 32, nnzB=32 +tid=0, i,j = 19,5 nnzA= 32, nnzB=32 +tid=0, i,j = 22,6 nnzA= 32, nnzB=32 +tid=0, i,j = 24,6 nnzA= 32, nnzB=32 +tid=0, i,j = 10,8 nnzA= 32, nnzB=32 +tid=0, i,j = 19,9 nnzA= 32, nnzB=32 +tid=0, i,j = 31,9 nnzA= 32, nnzB=32 +tid=0, i,j = 13,11 nnzA= 32, nnzB=32 +tid=0, i,j = 11,12 nnzA= 32, nnzB=32 +tid=0, i,j = 24,14 nnzA= 32, nnzB=32 +tid=0, i,j = 30,15 nnzA= 32, nnzB=32 +tid=0, i,j = 20,16 nnzA= 32, nnzB=32 +tid=0, i,j = 30,17 nnzA= 32, nnzB=32 +tid=0, i,j = 18,18 nnzA= 32, nnzB=32 +tid=0, i,j = 1,19 nnzA= 32, nnzB=32 +tid=0, i,j = 25,20 nnzA= 32, nnzB=32 +tid=0, i,j = 24,21 nnzA= 32, nnzB=32 +tid=0, i,j = 27,21 nnzA= 32, nnzB=32 +tid=0, i,j = 30,22 nnzA= 32, nnzB=32 +tid=0, i,j = 30,23 nnzA= 32, nnzB=32 +tid=0, i,j = 14,24 nnzA= 32, nnzB=32 +tid=0, i,j = 4,25 nnzA= 32, nnzB=32 +tid=0, i,j = 15,26 nnzA= 32, nnzB=32 +tid=0, i,j = 28,27 nnzA= 32, nnzB=32 +tid=0, i,j = 16,28 nnzA= 32, nnzB=32 +tid=0, i,j = 9,29 nnzA= 32, nnzB=32 +tid=0, i,j = 24,30 nnzA= 32, nnzB=32 +tid=0, i,j = 31,31 nnzA= 32, nnzB=32 +Printing bucketp +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +Done. +returned from kernel 5.1968ms + + 32x32 GraphBLAS int32_t matrix, sparse (jumbled) by row + sparsity control: sparse only + C GPU, 32 entries, memory: 1.1 KB + + (0,6) 11 + (1,1) 10 + (3,12) 7 + (3,17) 6 + (4,19) 8 + (5,19) 10 + (6,22) 6 + (6,24) 9 + (8,10) 7 + (9,19) 8 + (9,31) 6 + (11,13) 8 + (12,11) 6 + (14,24) 10 + (15,30) 9 + (16,20) 5 + (17,30) 7 + (18,18) 12 + (19,1) 6 + (20,25) 7 + (21,24) 9 + (21,27) 6 + (22,30) 8 + (23,30) 11 + (24,14) 7 + (25,4) 9 + (26,15) 4 + (27,28) 5 + (28,16) 4 + ... +Not using cuda path. M_is_hypersparse: 0, A->iso: 0, B->iso: 0, A_BITMAP: 0, B_BITMAP: 0, GB_IS_FULL(A): 0, GB_IS_FULL(B): 0, semiring header size: 32 + rmm_wrap_alloc 512 bytes + rmm_wrap_alloc 512 bytes + + 32x32 GraphBLAS bool matrix, sparse by row + sparsity control: sparse only + M actual, 32 entries, memory: 1.0 KB + + (0,6) 1 + (1,1) 1 + (3,12) 1 + (3,17) 1 + (4,19) 1 + (5,19) 1 + (6,22) 1 + (6,24) 1 + (8,10) 1 + (9,19) 1 + (9,31) 1 + (11,13) 1 + (12,11) 1 + (14,24) 1 + (15,30) 1 + (16,20) 1 + (17,30) 1 + (18,18) 1 + (19,1) 1 + (20,25) 1 + (21,24) 1 + (21,27) 1 + (22,30) 1 + (23,30) 1 + (24,14) 1 + (25,4) 1 + (26,15) 1 + (27,28) 1 + (28,16) 1 + ... + + 32x32 GraphBLAS int32_t matrix, sparse by row + sparsity control: sparse only + C GPU, 32 entries, memory: 1.1 KB + + (0,6) 11 + (1,1) 10 + (3,12) 7 + (3,17) 6 + (4,19) 8 + (5,19) 10 + (6,22) 6 + (6,24) 9 + (8,10) 7 + (9,19) 8 + (9,31) 6 + (11,13) 8 + (12,11) 6 + (14,24) 10 + (15,30) 9 + (16,20) 5 + (17,30) 7 + (18,18) 12 + (19,1) 6 + (20,25) 7 + (21,24) 9 + (21,27) 6 + (22,30) 8 + (23,30) 11 + (24,14) 7 + (25,4) 9 + (26,15) 4 + (27,28) 5 + (28,16) 4 + (29,9) 7 + (30,24) 10 + (31,31) 10 + + + 32x32 GraphBLAS int32_t matrix, sparse by row + C_actual, 32 entries, memory: 1.5 KB + + (0,6) 11 + (1,1) 10 + (3,12) 7 + (3,17) 6 + (4,19) 8 + (5,19) 10 + (6,22) 6 + (6,24) 9 + (8,10) 7 + (9,19) 8 + (9,31) 6 + (11,13) 8 + (12,11) 6 + (14,24) 10 + (15,30) 9 + (16,20) 5 + (17,30) 7 + (18,18) 12 + (19,1) 6 + (20,25) 7 + (21,24) 9 + (21,27) 6 + (22,30) 8 + (23,30) 11 + (24,14) 7 + (25,4) 9 + (26,15) 4 + (27,28) 5 + (28,16) 4 + (29,9) 7 + (30,24) 10 + (31,31) 10 + + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 512 bytes + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes + + 32x32 GraphBLAS double matrix, sparse by row + Diff actual, 32 entries, memory: 1.2 KB + + (0,6) 0 + (1,1) 0 + (3,12) 0 + (3,17) 0 + (4,19) 0 + (5,19) 0 + (6,22) 0 + (6,24) 0 + (8,10) 0 + (9,19) 0 + (9,31) 0 + (11,13) 0 + (12,11) 0 + (14,24) 0 + (15,30) 0 + (16,20) 0 + (17,30) 0 + (18,18) 0 + (19,1) 0 + (20,25) 0 + (21,24) 0 + (21,27) 0 + (22,30) 0 + (23,30) 0 + (24,14) 0 + (25,4) 0 + (26,15) 0 + (27,28) 0 + (28,16) 0 + (29,9) 0 + (30,24) 0 + (31,31) 0 + + + 32x32 GraphBLAS bool matrix, sparse by row + T actual, 32 entries, memory: 1.0 KB + + (0,6) 1 + (1,1) 1 + (3,12) 1 + (3,17) 1 + (4,19) 1 + (5,19) 1 + (6,22) 1 + (6,24) 1 + (8,10) 1 + (9,19) 1 + (9,31) 1 + (11,13) 1 + (12,11) 1 + (14,24) 1 + (15,30) 1 + (16,20) 1 + (17,30) 1 + (18,18) 1 + (19,1) 1 + (20,25) 1 + (21,24) 1 + (21,27) 1 + (22,30) 1 + (23,30) 1 + (24,14) 1 + (25,4) 1 + (26,15) 1 + (27,28) 1 + (28,16) 1 + ... + work:32 gpus:0 Getting test data +Creating problem gen +filling matrices +inside fill, using seed 4567 +fill_random nrows=32ncols=32 need 32 values, invsparse = 32 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling sparse +32 nonzeroes left to fill.. +2 nonzeroes left to fill.. + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes +inside fill, using seed 4567 +fill_random nrows=32ncols=32 need 32 values, invsparse = 32 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling sparse +32 nonzeroes left to fill.. +2 nonzeroes left to fill.. +inside fill, using seed 543210 +fill_random nrows=32ncols=32 need 1024 values, invsparse = 1 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling dense + rmm_wrap_alloc 512 bytes + rmm_wrap_alloc 512 bytes +inside fill, using seed 32 +fill_random nrows=32ncols=32 need 160 values, invsparse = 7 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling sparse +160 nonzeroes left to fill.. +62 nonzeroes left to fill.. + rmm_wrap_alloc 2048 bytes + rmm_wrap_alloc 1024 bytes + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes +32 slots to fill +all pairs to bucket 5, no filling +done assigning buckets +Building semiring factgory + calling stringify semiring: 0x7f1fea03f000 +inside enumify: 0x7f1fea03f000 + + GraphBLAS Semiring: semiring (user-defined) + GraphBLAS Monoid: semiring->add (built-in) + GraphBLAS BinaryOp: monoid->op (built-in) z=plus(x,y) + GraphBLAS type: ztype int32_t size: 4 + GraphBLAS type: xtype int32_t size: 4 + GraphBLAS type: ytype int32_t size: 4 + identity: [ 0 ] + + GraphBLAS BinaryOp: semiring->multiply (built-in) z=times(x,y) + GraphBLAS type: ztype int32_t size: 4 + GraphBLAS type: xtype int32_t size: 4 + GraphBLAS type: ytype int32_t size: 4 +Getting semiring add +Getting semiring mult +Getting semiring add op +Getting types +Getting opcodes +Getting typecodes +Performing asserts +Invoking boolean rename +Invoking boolean rename +Invoking enumify binop +e 14 +Invoking enumify monoid +Calling enumify binop +Inside plus binop code +e 11 +Calling enumify identity +Calling enumify terminal +Done enumify monoid +Done invoking enumify monoid +atype +btype +ctype +Invoking enumify_mask, mtype 0x7f2028b57180 +GB_enumify_mask gets mcode: 1 Mask_struct: 0 Mask_comp: 0 +got mask_ecode: 4 +constructing semiring scode +before: add_ecode: 11, id_ecode: 0, term_ecode: 31, mult_ecode: 14, flipxy: 0, zcode: 6, xcode: 6, ycode: 6, mask_ecode: 4, ccode: 6, acode: 6, bcode: 6, csparsity: 0, msparsity: 0, asparsity: 0, bsparsity: 0 +serialized_scode: 397409434374399488 +done enumify semiring +scode=397409434374399488 +done stringify semiring + returned from stringify semiring + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes +32 slots to fill +all pairs to bucket 5, no filling +done assigning buckets +bucket 5 has 32 dots to do +LAUNCHING BUCKET CODE: 5 +Confiring spdnINside get cached file +looking for prog in file /home/cjnolet/.SuiteSparse/GraphBLAS/6.3.0/GB_semiring_397409434374399488.h +opening /home/cjnolet/.SuiteSparse/GraphBLAS/6.3.0/GB_semiring_397409434374399488.h for write +about to close + read cache file /home/cjnolet/.SuiteSparse/GraphBLAS/6.3.0/GB_semiring_397409434374399488.h +successful_read: 1 +Just closed + jit_cache get program GB_jit_AxB_dot3_phase3_spdn +about to close + read cache file /home/cjnolet/.SuiteSparse/GraphBLAS/6.3.0/GB_jit_AxB_dot3_phase3_spdn + got kernel instance AxB_dot3_phase3_spdn_int32_t_int32_t_int32_t +about to close + read cache file /home/cjnolet/.SuiteSparse/GraphBLAS/6.3.0/AxB_dot3_phase3_spdn_int32_t_int32_t_int32_t +--------------------------------------- +--- Linker for void AxB_dot3_phase3_spdn(long long, long long, long long*, GB_Matrix_opaque*, GB_Matrix_opaque*, GB_Matrix_opaque*, GB_Matrix_opaque*, int) --- +--------------------------------------- +info : 0 bytes gmem +info : Function properties for '_Z20AxB_dot3_phase3_spdnIiiiEvxxPxP16GB_Matrix_opaqueS2_S2_S2_i': +info : used 112 registers, 296 stack, 0 bytes smem, 412 bytes cmem[0], 0 bytes lmem + + +--------------------------------------- +Launching _Z20AxB_dot3_phase3_spdnIiiiEvxxPxP16GB_Matrix_opaqueS2_S2_S2_i<<<1,32,0,0>>>(long,long,long*,GB_Matrix_opaque*,GB_Matrix_opaque*,GB_Matrix_opaque*,GB_Matrix_opaque*,int) +Printing bucketp +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +Done. +returned from kernel 2.00294ms + + 32x32 GraphBLAS int32_t matrix, sparse (jumbled) by row + sparsity control: sparse only + C GPU, 32 entries, memory: 1.1 KB + pending tuples: 0 max pending: 0 zombies: 6 + + (0,6) zombie + (1,1) 2 + (3,12) 2 + (3,17) 2 + (4,19) zombie + (5,19) zombie + (6,22) 2 + (6,24) 0 + (8,10) 0 + (9,19) zombie + (9,31) 4 + (11,13) 3 + (12,11) 2 + (14,24) 1 + (15,30) 1 + (16,20) 0 + (17,30) 0 + (18,18) zombie + (19,1) 3 + (20,25) zombie + (21,24) 1 + (21,27) 0 + (22,30) 1 + (23,30) 0 + (24,14) 1 + (25,4) 0 + (26,15) 1 + (27,28) 2 + (28,16) 1 + ... +Not using cuda path. M_is_hypersparse: 0, A->iso: 0, B->iso: 0, A_BITMAP: 0, B_BITMAP: 0, GB_IS_FULL(A): 0, GB_IS_FULL(B): 0, semiring header size: 32 + rmm_wrap_alloc 512 bytes + rmm_wrap_alloc 512 bytes + rmm_wrap_alloc 512 bytes + + 32x32 GraphBLAS bool matrix, sparse by row + sparsity control: sparse only + M actual, 32 entries, memory: 1.0 KB + + (0,6) 1 + (1,1) 1 + (3,12) 1 + (3,17) 1 + (4,19) 1 + (5,19) 1 + (6,22) 1 + (6,24) 1 + (8,10) 1 + (9,19) 1 + (9,31) 1 + (11,13) 1 + (12,11) 1 + (14,24) 1 + (15,30) 1 + (16,20) 1 + (17,30) 1 + (18,18) 1 + (19,1) 1 + (20,25) 1 + (21,24) 1 + (21,27) 1 + (22,30) 1 + (23,30) 1 + (24,14) 1 + (25,4) 1 + (26,15) 1 + (27,28) 1 + (28,16) 1 + ... + + 32x32 GraphBLAS int32_t matrix, sparse by row + sparsity control: sparse only + C GPU, 26 entries, memory: 1.1 KB + + (1,1) 2 + (3,12) 2 + (3,17) 2 + (6,22) 2 + (6,24) 0 + (8,10) 0 + (9,31) 4 + (11,13) 3 + (12,11) 2 + (14,24) 1 + (15,30) 1 + (16,20) 0 + (17,30) 0 + (19,1) 3 + (21,24) 1 + (21,27) 0 + (22,30) 1 + (23,30) 0 + (24,14) 1 + (25,4) 0 + (26,15) 1 + (27,28) 2 + (28,16) 1 + (29,9) 0 + (30,24) 1 + (31,31) 2 + + + 32x32 GraphBLAS int32_t matrix, sparse by row + C_actual, 26 entries, memory: 1.1 KB + + (1,1) 2 + (3,12) 2 + (3,17) 2 + (6,22) 2 + (6,24) 0 + (8,10) 0 + (9,31) 4 + (11,13) 3 + (12,11) 2 + (14,24) 1 + (15,30) 1 + (16,20) 0 + (17,30) 0 + (19,1) 3 + (21,24) 1 + (21,27) 0 + (22,30) 1 + (23,30) 0 + (24,14) 1 + (25,4) 0 + (26,15) 1 + (27,28) 2 + (28,16) 1 + (29,9) 0 + (30,24) 1 + (31,31) 2 + + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes + + 32x32 GraphBLAS double matrix, sparse by row + Diff actual, 26 entries, memory: 1.2 KB + + (1,1) 0 + (3,12) 0 + (3,17) 0 + (6,22) 0 + (6,24) 0 + (8,10) 0 + (9,31) 0 + (11,13) 0 + (12,11) 0 + (14,24) 0 + (15,30) 0 + (16,20) 0 + (17,30) 0 + (19,1) 0 + (21,24) 0 + (21,27) 0 + (22,30) 0 + (23,30) 0 + (24,14) 0 + (25,4) 0 + (26,15) 0 + (27,28) 0 + (28,16) 0 + (29,9) 0 + (30,24) 0 + (31,31) 0 + + + 32x32 GraphBLAS bool matrix, sparse by row + T actual, 26 entries, memory: 1.0 KB + + (1,1) 1 + (3,12) 1 + (3,17) 1 + (6,22) 1 + (6,24) 1 + (8,10) 1 + (9,31) 1 + (11,13) 1 + (12,11) 1 + (14,24) 1 + (15,30) 1 + (16,20) 1 + (17,30) 1 + (19,1) 1 + (21,24) 1 + (21,27) 1 + (22,30) 1 + (23,30) 1 + (24,14) 1 + (25,4) 1 + (26,15) 1 + (27,28) 1 + (28,16) 1 + (29,9) 1 + (30,24) 1 + (31,31) 1 + work:26 gpus:0 Getting test data +Creating problem gen +filling matrices +inside fill, using seed 4567 +fill_random nrows=32ncols=32 need 32 values, invsparse = 32 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling sparse +32 nonzeroes left to fill.. +2 nonzeroes left to fill.. + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes +inside fill, using seed 4567 +fill_random nrows=32ncols=32 need 32 values, invsparse = 32 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling sparse +32 nonzeroes left to fill.. +2 nonzeroes left to fill.. +inside fill, using seed 543210 +fill_random nrows=32ncols=32 need 64 values, invsparse = 16 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling sparse +64 nonzeroes left to fill.. +21 nonzeroes left to fill.. + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 512 bytes + rmm_wrap_alloc 512 bytes + rmm_wrap_alloc 512 bytes + rmm_wrap_alloc 512 bytes + rmm_wrap_alloc 256 bytes +inside fill, using seed 32 +fill_random nrows=32ncols=32 need 320 values, invsparse = 4 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling sparse +320 nonzeroes left to fill.. +140 nonzeroes left to fill.. + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes +32 slots to fill +all pairs to bucket 6, no filling +done assigning buckets +Building semiring factgory + calling stringify semiring: 0x7f1ff52fd100 +inside enumify: 0x7f1ff52fd100 + + GraphBLAS Semiring: semiring (user-defined) + GraphBLAS Monoid: semiring->add (built-in) + GraphBLAS BinaryOp: monoid->op (built-in) z=plus(x,y) + GraphBLAS type: ztype int32_t size: 4 + GraphBLAS type: xtype int32_t size: 4 + GraphBLAS type: ytype int32_t size: 4 + identity: [ 0 ] + + GraphBLAS BinaryOp: semiring->multiply (built-in) z=times(x,y) + GraphBLAS type: ztype int32_t size: 4 + GraphBLAS type: xtype int32_t size: 4 + GraphBLAS type: ytype int32_t size: 4 +Getting semiring add +Getting semiring mult +Getting semiring add op +Getting types +Getting opcodes +Getting typecodes +Performing asserts +Invoking boolean rename +Invoking boolean rename +Invoking enumify binop +e 14 +Invoking enumify monoid +Calling enumify binop +Inside plus binop code +e 11 +Calling enumify identity +Calling enumify terminal +Done enumify monoid +Done invoking enumify monoid +atype +btype +ctype +Invoking enumify_mask, mtype 0x7f2028b57180 +GB_enumify_mask gets mcode: 1 Mask_struct: 0 Mask_comp: 0 +got mask_ecode: 4 +constructing semiring scode +before: add_ecode: 11, id_ecode: 0, term_ecode: 31, mult_ecode: 14, flipxy: 0, zcode: 6, xcode: 6, ycode: 6, mask_ecode: 4, ccode: 6, acode: 6, bcode: 6, csparsity: 0, msparsity: 0, asparsity: 0, bsparsity: 0 +serialized_scode: 397409434374399488 +done enumify semiring +scode=397409434374399488 +done stringify semiring + returned from stringify semiring + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes +32 slots to fill +all pairs to bucket 6, no filling +done assigning buckets +bucket 6 has 32 dots to do +LAUNCHING BUCKET CODE: 6 +INside get cached file +looking for prog in file /home/cjnolet/.SuiteSparse/GraphBLAS/6.3.0/GB_semiring_397409434374399488.h +opening /home/cjnolet/.SuiteSparse/GraphBLAS/6.3.0/GB_semiring_397409434374399488.h for write +about to close + read cache file /home/cjnolet/.SuiteSparse/GraphBLAS/6.3.0/GB_semiring_397409434374399488.h +successful_read: 1 +Just closed + jit_cache get program GB_jit_AxB_dot3_phase3_vssp +about to close + read cache file /home/cjnolet/.SuiteSparse/GraphBLAS/6.3.0/GB_jit_AxB_dot3_phase3_vssp + got kernel instance AxB_dot3_phase3_vssp_int32_t_int32_t_int32_t +about to close + read cache file /home/cjnolet/.SuiteSparse/GraphBLAS/6.3.0/AxB_dot3_phase3_vssp_int32_t_int32_t_int32_t +--------------------------------------- +--- Linker for void AxB_dot3_phase3_vssp(long long, long long, long long*, GB_Matrix_opaque*, GB_Matrix_opaque*, GB_Matrix_opaque*, GB_Matrix_opaque*, int) --- +--------------------------------------- +info : 0 bytes gmem +info : Function properties for '_Z20AxB_dot3_phase3_vsspIiiiEvxxPxP16GB_Matrix_opaqueS2_S2_S2_i': +info : used 215 registers, 296 stack, 0 bytes smem, 412 bytes cmem[0], 0 bytes lmem + + +--------------------------------------- +Launching _Z20AxB_dot3_phase3_vsspIiiiEvxxPxP16GB_Matrix_opaqueS2_S2_S2_i<<<1,32,0,0>>>(long,long,long*,GB_Matrix_opaque*,GB_Matrix_opaque*,GB_Matrix_opaque*,GB_Matrix_opaque*,int) +Printing bucketp +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +Done. +returned from kernel 2.62758ms + + 32x32 GraphBLAS int32_t matrix, sparse by row + sparsity control: sparse only + C GPU, 32 entries, memory: 1.1 KB + pending tuples: 0 max pending: 0 zombies: 15 + + (0,6) zombie + (1,1) 1 + (3,12) zombie + (3,17) 0 + (4,19) zombie + (5,19) zombie + (6,22) 0 + (6,24) 0 + (8,10) 0 + (9,19) zombie + (9,31) 1 + (11,13) zombie + (12,11) 0 + (14,24) 1 + (15,30) 1 + (16,20) zombie + (17,30) zombie + (18,18) zombie + (19,1) 1 + (20,25) zombie + (21,24) zombie + (21,27) 1 + (22,30) 0 + (23,30) 0 + (24,14) zombie + (25,4) zombie + (26,15) 0 + (27,28) zombie + (28,16) 0 + ... +Not using cuda path. M_is_hypersparse: 0, A->iso: 0, B->iso: 0, A_BITMAP: 0, B_BITMAP: 0, GB_IS_FULL(A): 0, GB_IS_FULL(B): 0, semiring header size: 32 + rmm_wrap_alloc 512 bytes + rmm_wrap_alloc 512 bytes + + 32x32 GraphBLAS bool matrix, sparse by row + sparsity control: sparse only + M actual, 32 entries, memory: 1.0 KB + + (0,6) 1 + (1,1) 1 + (3,12) 1 + (3,17) 1 + (4,19) 1 + (5,19) 1 + (6,22) 1 + (6,24) 1 + (8,10) 1 + (9,19) 1 + (9,31) 1 + (11,13) 1 + (12,11) 1 + (14,24) 1 + (15,30) 1 + (16,20) 1 + (17,30) 1 + (18,18) 1 + (19,1) 1 + (20,25) 1 + (21,24) 1 + (21,27) 1 + (22,30) 1 + (23,30) 1 + (24,14) 1 + (25,4) 1 + (26,15) 1 + (27,28) 1 + (28,16) 1 + ... + + 32x32 GraphBLAS int32_t matrix, sparse by row + sparsity control: sparse only + C GPU, 17 entries, memory: 1.1 KB + + (1,1) 1 + (3,17) 0 + (6,22) 0 + (6,24) 0 + (8,10) 0 + (9,31) 1 + (12,11) 0 + (14,24) 1 + (15,30) 1 + (19,1) 1 + (21,27) 1 + (22,30) 0 + (23,30) 0 + (26,15) 0 + (28,16) 0 + (30,24) 0 + (31,31) 0 + + + 32x32 GraphBLAS int32_t matrix, sparse by row + C_actual, 17 entries, memory: 1.1 KB + + (1,1) 1 + (3,17) 0 + (6,22) 0 + (6,24) 0 + (8,10) 0 + (9,31) 1 + (12,11) 0 + (14,24) 1 + (15,30) 1 + (19,1) 1 + (21,27) 1 + (22,30) 0 + (23,30) 0 + (26,15) 0 + (28,16) 0 + (30,24) 0 + (31,31) 0 + + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes + + 32x32 GraphBLAS double matrix, sparse by row + Diff actual, 17 entries, memory: 1.2 KB + + (1,1) 0 + (3,17) 0 + (6,22) 0 + (6,24) 0 + (8,10) 0 + (9,31) 0 + (12,11) 0 + (14,24) 0 + (15,30) 0 + (19,1) 0 + (21,27) 0 + (22,30) 0 + (23,30) 0 + (26,15) 0 + (28,16) 0 + (30,24) 0 + (31,31) 0 + + + 32x32 GraphBLAS bool matrix, sparse by row + T actual, 17 entries, memory: 1.0 KB + + (1,1) 1 + (3,17) 1 + (6,22) 1 + (6,24) 1 + (8,10) 1 + (9,31) 1 + (12,11) 1 + (14,24) 1 + (15,30) 1 + (19,1) 1 + (21,27) 1 + (22,30) 1 + (23,30) 1 + (26,15) 1 + (28,16) 1 + (30,24) 1 + (31,31) 1 + work:17 gpus:0 Getting test data +Creating problem gen +filling matrices +inside fill, using seed 4567 +fill_random nrows=32ncols=32 need 32 values, invsparse = 32 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling sparse +32 nonzeroes left to fill.. +2 nonzeroes left to fill.. + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes +inside fill, using seed 4567 +fill_random nrows=32ncols=32 need 32 values, invsparse = 32 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling sparse +32 nonzeroes left to fill.. +2 nonzeroes left to fill.. +inside fill, using seed 543210 +fill_random nrows=32ncols=32 need 64 values, invsparse = 16 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling sparse +64 nonzeroes left to fill.. +21 nonzeroes left to fill.. + rmm_wrap_alloc 512 bytes + rmm_wrap_alloc 512 bytes + rmm_wrap_alloc 512 bytes + rmm_wrap_alloc 256 bytes +inside fill, using seed 32 +fill_random nrows=32ncols=32 need 128 values, invsparse = 8 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling sparse +128 nonzeroes left to fill.. +43 nonzeroes left to fill.. + rmm_wrap_alloc 1024 bytes + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes +32 slots to fill +all pairs to bucket 7, no filling +done assigning buckets +Building semiring factgory + calling stringify semiring: 0x7f1ff50e0300 +inside enumify: 0x7f1ff50e0300 + + GraphBLAS Semiring: semiring (user-defined) + GraphBLAS Monoid: semiring->add (built-in) + GraphBLAS BinaryOp: monoid->op (built-in) z=plus(x,y) + GraphBLAS type: ztype int32_t size: 4 + GraphBLAS type: xtype int32_t size: 4 + GraphBLAS type: ytype int32_t size: 4 + identity: [ 0 ] + + GraphBLAS BinaryOp: semiring->multiply (built-in) z=times(x,y) + GraphBLAS type: ztype int32_t size: 4 + GraphBLAS type: xtype int32_t size: 4 + GraphBLAS type: ytype int32_t size: 4 +Getting semiring add +Getting semiring mult +Getting semiring add op +Getting types +Getting opcodes +Getting typecodes +Performing asserts +Invoking boolean rename +Invoking boolean rename +Invoking enumify binop +e 14 +Invoking enumify monoid +Calling enumify binop +Inside plus binop code +e 11 +Calling enumify identity +Calling enumify terminal +Done enumify monoid +Done invoking enumify monoid +atype +btype +ctype +Invoking enumify_mask, mtype 0x7f2028b57180 +GB_enumify_mask gets mcode: 1 Mask_struct: 0 Mask_comp: 0 +got mask_ecode: 4 +constructing semiring scode +before: add_ecode: 11, id_ecode: 0, term_ecode: 31, mult_ecode: 14, flipxy: 0, zcode: 6, xcode: 6, ycode: 6, mask_ecode: 4, ccode: 6, acode: 6, bcode: 6, csparsity: 0, msparsity: 0, asparsity: 0, bsparsity: 0 +serialized_scode: 397409434374399488 +done enumify semiring +scode=397409434374399488 +done stringify semiring + returned from stringify semiring + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes +32 slots to fill +all pairs to bucket 7, no filling +done assigning buckets +bucket 7 has 32 dots to do +LAUNCHING BUCKET CODE: 7 +INside get cached file +looking for prog in file /home/cjnolet/.SuiteSparse/GraphBLAS/6.3.0/GB_semiring_397409434374399488.h +opening /home/cjnolet/.SuiteSparse/GraphBLAS/6.3.0/GB_semiring_397409434374399488.h for write +about to close + read cache file /home/cjnolet/.SuiteSparse/GraphBLAS/6.3.0/GB_semiring_397409434374399488.h +successful_read: 1 +Just closed + jit_cache get program GB_jit_AxB_dot3_phase3_vsvs +about to close + read cache file /home/cjnolet/.SuiteSparse/GraphBLAS/6.3.0/GB_jit_AxB_dot3_phase3_vsvs + got kernel instance AxB_dot3_phase3_vsvs_int32_t_int32_t_int32_t +about to close + read cache file /home/cjnolet/.SuiteSparse/GraphBLAS/6.3.0/AxB_dot3_phase3_vsvs_int32_t_int32_t_int32_t +--------------------------------------- +--- Linker for void AxB_dot3_phase3_vsvs(long long, long long, long long*, GB_Matrix_opaque*, GB_Matrix_opaque*, GB_Matrix_opaque*, GB_Matrix_opaque*, int) --- +--------------------------------------- +info : 0 bytes gmem +info : Function properties for '_Z20AxB_dot3_phase3_vsvsIiiiEvxxPxP16GB_Matrix_opaqueS2_S2_S2_i': +info : used 88 registers, 336 stack, 128 bytes smem, 412 bytes cmem[0], 0 bytes lmem + + +--------------------------------------- +Launching _Z20AxB_dot3_phase3_vsvsIiiiEvxxPxP16GB_Matrix_opaqueS2_S2_S2_i<<<1,512,0,0>>>(long,long,long*,GB_Matrix_opaque*,GB_Matrix_opaque*,GB_Matrix_opaque*,GB_Matrix_opaque*,int) +Printing bucketp +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +Done. +returned from kernel 2.37363ms + + 32x32 GraphBLAS int32_t matrix, sparse (jumbled) by row + sparsity control: sparse only + C GPU, 32 entries, memory: 1.1 KB + pending tuples: 0 max pending: 0 zombies: 24 + + (0,6) zombie + (1,1) 0 + (3,12) zombie + (3,17) 0 + (4,19) zombie + (5,19) zombie + (6,22) zombie + (6,24) zombie + (8,10) zombie + (9,19) zombie + (9,31) 0 + (11,13) zombie + (12,11) zombie + (14,24) zombie + (15,30) 1 + (16,20) zombie + (17,30) zombie + (18,18) zombie + (19,1) 0 + (20,25) zombie + (21,24) zombie + (21,27) zombie + (22,30) zombie + (23,30) zombie + (24,14) zombie + (25,4) 1 + (26,15) zombie + (27,28) zombie + (28,16) zombie + ... +Not using cuda path. M_is_hypersparse: 0, A->iso: 0, B->iso: 0, A_BITMAP: 0, B_BITMAP: 0, GB_IS_FULL(A): 0, GB_IS_FULL(B): 0, semiring header size: 32 + rmm_wrap_alloc 512 bytes + rmm_wrap_alloc 512 bytes + rmm_wrap_alloc 512 bytes + + 32x32 GraphBLAS bool matrix, sparse by row + sparsity control: sparse only + M actual, 32 entries, memory: 1.0 KB + + (0,6) 1 + (1,1) 1 + (3,12) 1 + (3,17) 1 + (4,19) 1 + (5,19) 1 + (6,22) 1 + (6,24) 1 + (8,10) 1 + (9,19) 1 + (9,31) 1 + (11,13) 1 + (12,11) 1 + (14,24) 1 + (15,30) 1 + (16,20) 1 + (17,30) 1 + (18,18) 1 + (19,1) 1 + (20,25) 1 + (21,24) 1 + (21,27) 1 + (22,30) 1 + (23,30) 1 + (24,14) 1 + (25,4) 1 + (26,15) 1 + (27,28) 1 + (28,16) 1 + ... + rmm_wrap_alloc 256 bytes + + 32x32 GraphBLAS int32_t matrix, sparse by row + sparsity control: sparse only + C GPU, 8 entries, memory: 864 bytes + + (1,1) 0 + (3,17) 0 + (9,31) 0 + (15,30) 1 + (19,1) 0 + (25,4) 1 + (30,24) 0 + (31,31) 0 + + + 32x32 GraphBLAS int32_t matrix, sparse by row + C_actual, 8 entries, memory: 864 bytes + + (1,1) 0 + (3,17) 0 + (9,31) 0 + (15,30) 1 + (19,1) 0 + (25,4) 1 + (30,24) 0 + (31,31) 0 + + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes + + 32x32 GraphBLAS double matrix, sparse by row + Diff actual, 8 entries, memory: 896 bytes + + (1,1) 0 + (3,17) 0 + (9,31) 0 + (15,30) 0 + (19,1) 0 + (25,4) 0 + (30,24) 0 + (31,31) 0 + + + 32x32 GraphBLAS bool matrix, sparse by row + T actual, 8 entries, memory: 840 bytes + + (1,1) 1 + (3,17) 1 + (9,31) 1 + (15,30) 1 + (19,1) 1 + (25,4) 1 + (30,24) 1 + (31,31) 1 + work:8 gpus:0 Getting test data +Creating problem gen +filling matrices +inside fill, using seed 4567 +fill_random nrows=32ncols=32 need 32 values, invsparse = 32 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling sparse +32 nonzeroes left to fill.. +2 nonzeroes left to fill.. + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes +inside fill, using seed 4567 +fill_random nrows=32ncols=32 need 32 values, invsparse = 32 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling sparse +32 nonzeroes left to fill.. +2 nonzeroes left to fill.. +inside fill, using seed 543210 +fill_random nrows=32ncols=32 need 64 values, invsparse = 16 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling sparse +64 nonzeroes left to fill.. +21 nonzeroes left to fill.. + rmm_wrap_alloc 512 bytes + rmm_wrap_alloc 512 bytes + rmm_wrap_alloc 256 bytes +inside fill, using seed 32 +fill_random nrows=32ncols=32 need 128 values, invsparse = 8 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling sparse +128 nonzeroes left to fill.. +43 nonzeroes left to fill.. + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes +32 slots to fill +all pairs to bucket 8, no filling +done assigning buckets +Building semiring factgory + calling stringify semiring: 0x7f1ff52ff000 +inside enumify: 0x7f1ff52ff000 + + GraphBLAS Semiring: semiring (user-defined) + GraphBLAS Monoid: semiring->add (built-in) + GraphBLAS BinaryOp: monoid->op (built-in) z=plus(x,y) + GraphBLAS type: ztype int32_t size: 4 + GraphBLAS type: xtype int32_t size: 4 + GraphBLAS type: ytype int32_t size: 4 + identity: [ 0 ] + + GraphBLAS BinaryOp: semiring->multiply (built-in) z=times(x,y) + GraphBLAS type: ztype int32_t size: 4 + GraphBLAS type: xtype int32_t size: 4 + GraphBLAS type: ytype int32_t size: 4 +Getting semiring add +Getting semiring mult +Getting semiring add op +Getting types +Getting opcodes +Getting typecodes +Performing asserts +Invoking boolean rename +Invoking boolean rename +Invoking enumify binop +e 14 +Invoking enumify monoid +Calling enumify binop +Inside plus binop code +e 11 +Calling enumify identity +Calling enumify terminal +Done enumify monoid +Done invoking enumify monoid +atype +btype +ctype +Invoking enumify_mask, mtype 0x7f2028b57180 +GB_enumify_mask gets mcode: 1 Mask_struct: 0 Mask_comp: 0 +got mask_ecode: 4 +constructing semiring scode +before: add_ecode: 11, id_ecode: 0, term_ecode: 31, mult_ecode: 14, flipxy: 0, zcode: 6, xcode: 6, ycode: 6, mask_ecode: 4, ccode: 6, acode: 6, bcode: 6, csparsity: 0, msparsity: 0, asparsity: 0, bsparsity: 0 +serialized_scode: 397409434374399488 +done enumify semiring +scode=397409434374399488 +done stringify semiring + returned from stringify semiring + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes +32 slots to fill +all pairs to bucket 8, no filling +done assigning buckets +bucket 8 has 32 dots to do +LAUNCHING BUCKET CODE: 8 +INside get cached file +looking for prog in file /home/cjnolet/.SuiteSparse/GraphBLAS/6.3.0/GB_semiring_397409434374399488.h +opening /home/cjnolet/.SuiteSparse/GraphBLAS/6.3.0/GB_semiring_397409434374399488.h for write +about to close + read cache file /home/cjnolet/.SuiteSparse/GraphBLAS/6.3.0/GB_semiring_397409434374399488.h +successful_read: 1 +Just closed + jit_cache get program GB_jit_AxB_dot3_phase3_vsvs +found memory-cached prog GB_jit_AxB_dot3_phase3_vsvs + got kernel instance AxB_dot3_phase3_vsvs_int32_t_int32_t_int32_t +found memory-cached prog AxB_dot3_phase3_vsvs_int32_t_int32_t_int32_t +Launching _Z20AxB_dot3_phase3_vsvsIiiiEvxxPxP16GB_Matrix_opaqueS2_S2_S2_i<<<1,512,0,0>>>(long,long,long*,GB_Matrix_opaque*,GB_Matrix_opaque*,GB_Matrix_opaque*,GB_Matrix_opaque*,int) +Printing bucketp +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +Done. +returned from kernel 0.387072ms + + 32x32 GraphBLAS int32_t matrix, sparse (jumbled) by row + sparsity control: sparse only + C GPU, 32 entries, memory: 1.1 KB + pending tuples: 0 max pending: 0 zombies: 24 + + (0,6) zombie + (1,1) 0 + (3,12) zombie + (3,17) 0 + (4,19) zombie + (5,19) zombie + (6,22) zombie + (6,24) zombie + (8,10) zombie + (9,19) zombie + (9,31) 0 + (11,13) zombie + (12,11) zombie + (14,24) zombie + (15,30) 1 + (16,20) zombie + (17,30) zombie + (18,18) zombie + (19,1) 0 + (20,25) zombie + (21,24) zombie + (21,27) zombie + (22,30) zombie + (23,30) zombie + (24,14) zombie + (25,4) 1 + (26,15) zombie + (27,28) zombie + (28,16) zombie + ... +Not using cuda path. M_is_hypersparse: 0, A->iso: 0, B->iso: 0, A_BITMAP: 0, B_BITMAP: 0, GB_IS_FULL(A): 0, GB_IS_FULL(B): 0, semiring header size: 32 + rmm_wrap_alloc 512 bytes + rmm_wrap_alloc 512 bytes + rmm_wrap_alloc 512 bytes + + 32x32 GraphBLAS bool matrix, sparse by row + sparsity control: sparse only + M actual, 32 entries, memory: 1.0 KB + + (0,6) 1 + (1,1) 1 + (3,12) 1 + (3,17) 1 + (4,19) 1 + (5,19) 1 + (6,22) 1 + (6,24) 1 + (8,10) 1 + (9,19) 1 + (9,31) 1 + (11,13) 1 + (12,11) 1 + (14,24) 1 + (15,30) 1 + (16,20) 1 + (17,30) 1 + (18,18) 1 + (19,1) 1 + (20,25) 1 + (21,24) 1 + (21,27) 1 + (22,30) 1 + (23,30) 1 + (24,14) 1 + (25,4) 1 + (26,15) 1 + (27,28) 1 + (28,16) 1 + ... + rmm_wrap_alloc 256 bytes + + 32x32 GraphBLAS int32_t matrix, sparse by row + sparsity control: sparse only + C GPU, 8 entries, memory: 864 bytes + + (1,1) 0 + (3,17) 0 + (9,31) 0 + (15,30) 1 + (19,1) 0 + (25,4) 1 + (30,24) 0 + (31,31) 0 + + + 32x32 GraphBLAS int32_t matrix, sparse by row + C_actual, 8 entries, memory: 864 bytes + + (1,1) 0 + (3,17) 0 + (9,31) 0 + (15,30) 1 + (19,1) 0 + (25,4) 1 + (30,24) 0 + (31,31) 0 + + rmm_wrap_alloc 256 bytes + + 32x32 GraphBLAS double matrix, sparse by row + Diff actual, 8 entries, memory: 896 bytes + + (1,1) 0 + (3,17) 0 + (9,31) 0 + (15,30) 0 + (19,1) 0 + (25,4) 0 + (30,24) 0 + (31,31) 0 + + + 32x32 GraphBLAS bool matrix, sparse by row + T actual, 8 entries, memory: 840 bytes + + (1,1) 1 + (3,17) 1 + (9,31) 1 + (15,30) 1 + (19,1) 1 + (25,4) 1 + (30,24) 1 + (31,31) 1 + work:8 gpus:0 Getting test data +Creating problem gen +filling matrices +inside fill, using seed 4567 +fill_random nrows=32ncols=32 need 32 values, invsparse = 32 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling sparse +32 nonzeroes left to fill.. +2 nonzeroes left to fill.. + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes +inside fill, using seed 4567 +fill_random nrows=32ncols=32 need 32 values, invsparse = 32 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling sparse +32 nonzeroes left to fill.. +2 nonzeroes left to fill.. +inside fill, using seed 543210 +fill_random nrows=32ncols=32 need 64 values, invsparse = 16 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling sparse +64 nonzeroes left to fill.. +21 nonzeroes left to fill.. + rmm_wrap_alloc 512 bytes + rmm_wrap_alloc 512 bytes + rmm_wrap_alloc 256 bytes +inside fill, using seed 32 +fill_random nrows=32ncols=32 need 128 values, invsparse = 8 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling sparse +128 nonzeroes left to fill.. +43 nonzeroes left to fill.. + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes +32 slots to fill +all pairs to bucket 9, no filling +done assigning buckets +Building semiring factgory + calling stringify semiring: 0x7f1ff5300900 +inside enumify: 0x7f1ff5300900 + + GraphBLAS Semiring: semiring (user-defined) + GraphBLAS Monoid: semiring->add (built-in) + GraphBLAS BinaryOp: monoid->op (built-in) z=plus(x,y) + GraphBLAS type: ztype int32_t size: 4 + GraphBLAS type: xtype int32_t size: 4 + GraphBLAS type: ytype int32_t size: 4 + identity: [ 0 ] + + GraphBLAS BinaryOp: semiring->multiply (built-in) z=times(x,y) + GraphBLAS type: ztype int32_t size: 4 + GraphBLAS type: xtype int32_t size: 4 + GraphBLAS type: ytype int32_t size: 4 +Getting semiring add +Getting semiring mult +Getting semiring add op +Getting types +Getting opcodes +Getting typecodes +Performing asserts +Invoking boolean rename +Invoking boolean rename +Invoking enumify binop +e 14 +Invoking enumify monoid +Calling enumify binop +Inside plus binop code +e 11 +Calling enumify identity +Calling enumify terminal +Done enumify monoid +Done invoking enumify monoid +atype +btype +ctype +Invoking enumify_mask, mtype 0x7f2028b57180 +GB_enumify_mask gets mcode: 1 Mask_struct: 0 Mask_comp: 0 +got mask_ecode: 4 +constructing semiring scode +before: add_ecode: 11, id_ecode: 0, term_ecode: 31, mult_ecode: 14, flipxy: 0, zcode: 6, xcode: 6, ycode: 6, mask_ecode: 4, ccode: 6, acode: 6, bcode: 6, csparsity: 0, msparsity: 0, asparsity: 0, bsparsity: 0 +serialized_scode: 397409434374399488 +done enumify semiring +scode=397409434374399488 +done stringify semiring + returned from stringify semiring + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes +32 slots to fill +all pairs to bucket 9, no filling +done assigning buckets +bucket 9 has 32 dots to do +LAUNCHING BUCKET CODE: 9 +INside get cached file +looking for prog in file /home/cjnolet/.SuiteSparse/GraphBLAS/6.3.0/GB_semiring_397409434374399488.h +opening /home/cjnolet/.SuiteSparse/GraphBLAS/6.3.0/GB_semiring_397409434374399488.h for write +about to close + read cache file /home/cjnolet/.SuiteSparse/GraphBLAS/6.3.0/GB_semiring_397409434374399488.h +successful_read: 1 +Just closed + jit_cache get program GB_jit_AxB_dot3_phase3_vsvs +found memory-cached prog GB_jit_AxB_dot3_phase3_vsvs + got kernel instance AxB_dot3_phase3_vsvs_int32_t_int32_t_int32_t +found memory-cached prog AxB_dot3_phase3_vsvs_int32_t_int32_t_int32_t +Launching _Z20AxB_dot3_phase3_vsvsIiiiEvxxPxP16GB_Matrix_opaqueS2_S2_S2_i<<<1,512,0,0>>>(long,long,long*,GB_Matrix_opaque*,GB_Matrix_opaque*,GB_Matrix_opaque*,GB_Matrix_opaque*,int) +Printing bucketp +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +Done. +returned from kernel 0.468992ms + + 32x32 GraphBLAS int32_t matrix, sparse (jumbled) by row + sparsity control: sparse only + C GPU, 32 entries, memory: 1.1 KB + pending tuples: 0 max pending: 0 zombies: 24 + + (0,6) zombie + (1,1) 0 + (3,12) zombie + (3,17) 0 + (4,19) zombie + (5,19) zombie + (6,22) zombie + (6,24) zombie + (8,10) zombie + (9,19) zombie + (9,31) 0 + (11,13) zombie + (12,11) zombie + (14,24) zombie + (15,30) 1 + (16,20) zombie + (17,30) zombie + (18,18) zombie + (19,1) 0 + (20,25) zombie + (21,24) zombie + (21,27) zombie + (22,30) zombie + (23,30) zombie + (24,14) zombie + (25,4) 1 + (26,15) zombie + (27,28) zombie + (28,16) zombie + ... +Not using cuda path. M_is_hypersparse: 0, A->iso: 0, B->iso: 0, A_BITMAP: 0, B_BITMAP: 0, GB_IS_FULL(A): 0, GB_IS_FULL(B): 0, semiring header size: 32 + rmm_wrap_alloc 512 bytes + rmm_wrap_alloc 512 bytes + rmm_wrap_alloc 512 bytes + + 32x32 GraphBLAS bool matrix, sparse by row + sparsity control: sparse only + M actual, 32 entries, memory: 1.0 KB + + (0,6) 1 + (1,1) 1 + (3,12) 1 + (3,17) 1 + (4,19) 1 + (5,19) 1 + (6,22) 1 + (6,24) 1 + (8,10) 1 + (9,19) 1 + (9,31) 1 + (11,13) 1 + (12,11) 1 + (14,24) 1 + (15,30) 1 + (16,20) 1 + (17,30) 1 + (18,18) 1 + (19,1) 1 + (20,25) 1 + (21,24) 1 + (21,27) 1 + (22,30) 1 + (23,30) 1 + (24,14) 1 + (25,4) 1 + (26,15) 1 + (27,28) 1 + (28,16) 1 + ... + rmm_wrap_alloc 256 bytes + + 32x32 GraphBLAS int32_t matrix, sparse by row + sparsity control: sparse only + C GPU, 8 entries, memory: 864 bytes + + (1,1) 0 + (3,17) 0 + (9,31) 0 + (15,30) 1 + (19,1) 0 + (25,4) 1 + (30,24) 0 + (31,31) 0 + + + 32x32 GraphBLAS int32_t matrix, sparse by row + C_actual, 8 entries, memory: 864 bytes + + (1,1) 0 + (3,17) 0 + (9,31) 0 + (15,30) 1 + (19,1) 0 + (25,4) 1 + (30,24) 0 + (31,31) 0 + + rmm_wrap_alloc 256 bytes + + 32x32 GraphBLAS double matrix, sparse by row + Diff actual, 8 entries, memory: 896 bytes + + (1,1) 0 + (3,17) 0 + (9,31) 0 + (15,30) 0 + (19,1) 0 + (25,4) 0 + (30,24) 0 + (31,31) 0 + + + 32x32 GraphBLAS bool matrix, sparse by row + T actual, 8 entries, memory: 840 bytes + + (1,1) 1 + (3,17) 1 + (9,31) 1 + (15,30) 1 + (19,1) 1 + (25,4) 1 + (30,24) 1 + (31,31) 1 + work:8 gpus:0 Getting test data +Creating problem gen +filling matrices +inside fill, using seed 4567 +fill_random nrows=32ncols=32 need 32 values, invsparse = 32 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling sparse +32 nonzeroes left to fill.. +2 nonzeroes left to fill.. + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes +inside fill, using seed 4567 +fill_random nrows=32ncols=32 need 32 values, invsparse = 32 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling sparse +32 nonzeroes left to fill.. +2 nonzeroes left to fill.. +inside fill, using seed 543210 +fill_random nrows=32ncols=32 need 64 values, invsparse = 16 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling sparse +64 nonzeroes left to fill.. +21 nonzeroes left to fill.. + rmm_wrap_alloc 512 bytes + rmm_wrap_alloc 512 bytes + rmm_wrap_alloc 256 bytes +inside fill, using seed 32 +fill_random nrows=32ncols=32 need 128 values, invsparse = 8 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling sparse +128 nonzeroes left to fill.. +43 nonzeroes left to fill.. + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes +32 slots to fill +all pairs to bucket 10, no filling +done assigning buckets +Building semiring factgory + calling stringify semiring: 0x7f1ff5301f00 +inside enumify: 0x7f1ff5301f00 + + GraphBLAS Semiring: semiring (user-defined) + GraphBLAS Monoid: semiring->add (built-in) + GraphBLAS BinaryOp: monoid->op (built-in) z=plus(x,y) + GraphBLAS type: ztype int32_t size: 4 + GraphBLAS type: xtype int32_t size: 4 + GraphBLAS type: ytype int32_t size: 4 + identity: [ 0 ] + + GraphBLAS BinaryOp: semiring->multiply (built-in) z=times(x,y) + GraphBLAS type: ztype int32_t size: 4 + GraphBLAS type: xtype int32_t size: 4 + GraphBLAS type: ytype int32_t size: 4 +Getting semiring add +Getting semiring mult +Getting semiring add op +Getting types +Getting opcodes +Getting typecodes +Performing asserts +Invoking boolean rename +Invoking boolean rename +Invoking enumify binop +e 14 +Invoking enumify monoid +Calling enumify binop +Inside plus binop code +e 11 +Calling enumify identity +Calling enumify terminal +Done enumify monoid +Done invoking enumify monoid +atype +btype +ctype +Invoking enumify_mask, mtype 0x7f2028b57180 +GB_enumify_mask gets mcode: 1 Mask_struct: 0 Mask_comp: 0 +got mask_ecode: 4 +constructing semiring scode +before: add_ecode: 11, id_ecode: 0, term_ecode: 31, mult_ecode: 14, flipxy: 0, zcode: 6, xcode: 6, ycode: 6, mask_ecode: 4, ccode: 6, acode: 6, bcode: 6, csparsity: 0, msparsity: 0, asparsity: 0, bsparsity: 0 +serialized_scode: 397409434374399488 +done enumify semiring +scode=397409434374399488 +done stringify semiring + returned from stringify semiring + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes +32 slots to fill +all pairs to bucket 10, no filling +done assigning buckets +bucket 10 has 32 dots to do +LAUNCHING BUCKET CODE: 10 +INside get cached file +looking for prog in file /home/cjnolet/.SuiteSparse/GraphBLAS/6.3.0/GB_semiring_397409434374399488.h +opening /home/cjnolet/.SuiteSparse/GraphBLAS/6.3.0/GB_semiring_397409434374399488.h for write +about to close + read cache file /home/cjnolet/.SuiteSparse/GraphBLAS/6.3.0/GB_semiring_397409434374399488.h +successful_read: 1 +Just closed + jit_cache get program GB_jit_AxB_dot3_phase3_vsvs +found memory-cached prog GB_jit_AxB_dot3_phase3_vsvs + got kernel instance AxB_dot3_phase3_vsvs_int32_t_int32_t_int32_t +found memory-cached prog AxB_dot3_phase3_vsvs_int32_t_int32_t_int32_t +Launching _Z20AxB_dot3_phase3_vsvsIiiiEvxxPxP16GB_Matrix_opaqueS2_S2_S2_i<<<1,512,0,0>>>(long,long,long*,GB_Matrix_opaque*,GB_Matrix_opaque*,GB_Matrix_opaque*,GB_Matrix_opaque*,int) +Printing bucketp +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +Done. +returned from kernel 0.418816ms + + 32x32 GraphBLAS int32_t matrix, sparse (jumbled) by row + sparsity control: sparse only + C GPU, 32 entries, memory: 1.1 KB + pending tuples: 0 max pending: 0 zombies: 24 + + (0,6) zombie + (1,1) 0 + (3,12) zombie + (3,17) 0 + (4,19) zombie + (5,19) zombie + (6,22) zombie + (6,24) zombie + (8,10) zombie + (9,19) zombie + (9,31) 0 + (11,13) zombie + (12,11) zombie + (14,24) zombie + (15,30) 1 + (16,20) zombie + (17,30) zombie + (18,18) zombie + (19,1) 0 + (20,25) zombie + (21,24) zombie + (21,27) zombie + (22,30) zombie + (23,30) zombie + (24,14) zombie + (25,4) 1 + (26,15) zombie + (27,28) zombie + (28,16) zombie + ... +Not using cuda path. M_is_hypersparse: 0, A->iso: 0, B->iso: 0, A_BITMAP: 0, B_BITMAP: 0, GB_IS_FULL(A): 0, GB_IS_FULL(B): 0, semiring header size: 32 + rmm_wrap_alloc 512 bytes + rmm_wrap_alloc 512 bytes + rmm_wrap_alloc 512 bytes + + 32x32 GraphBLAS bool matrix, sparse by row + sparsity control: sparse only + M actual, 32 entries, memory: 1.0 KB + + (0,6) 1 + (1,1) 1 + (3,12) 1 + (3,17) 1 + (4,19) 1 + (5,19) 1 + (6,22) 1 + (6,24) 1 + (8,10) 1 + (9,19) 1 + (9,31) 1 + (11,13) 1 + (12,11) 1 + (14,24) 1 + (15,30) 1 + (16,20) 1 + (17,30) 1 + (18,18) 1 + (19,1) 1 + (20,25) 1 + (21,24) 1 + (21,27) 1 + (22,30) 1 + (23,30) 1 + (24,14) 1 + (25,4) 1 + (26,15) 1 + (27,28) 1 + (28,16) 1 + ... + rmm_wrap_alloc 256 bytes + + 32x32 GraphBLAS int32_t matrix, sparse by row + sparsity control: sparse only + C GPU, 8 entries, memory: 864 bytes + + (1,1) 0 + (3,17) 0 + (9,31) 0 + (15,30) 1 + (19,1) 0 + (25,4) 1 + (30,24) 0 + (31,31) 0 + + + 32x32 GraphBLAS int32_t matrix, sparse by row + C_actual, 8 entries, memory: 864 bytes + + (1,1) 0 + (3,17) 0 + (9,31) 0 + (15,30) 1 + (19,1) 0 + (25,4) 1 + (30,24) 0 + (31,31) 0 + + rmm_wrap_alloc 256 bytes + + 32x32 GraphBLAS double matrix, sparse by row + Diff actual, 8 entries, memory: 896 bytes + + (1,1) 0 + (3,17) 0 + (9,31) 0 + (15,30) 0 + (19,1) 0 + (25,4) 0 + (30,24) 0 + (31,31) 0 + + + 32x32 GraphBLAS bool matrix, sparse by row + T actual, 8 entries, memory: 840 bytes + + (1,1) 1 + (3,17) 1 + (9,31) 1 + (15,30) 1 + (19,1) 1 + (25,4) 1 + (30,24) 1 + (31,31) 1 + work:8 gpus:0 Getting test data +Creating problem gen +filling matrices +inside fill, using seed 4567 +fill_random nrows=32ncols=32 need 32 values, invsparse = 32 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling sparse +32 nonzeroes left to fill.. +2 nonzeroes left to fill.. + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes +inside fill, using seed 4567 +fill_random nrows=32ncols=32 need 32 values, invsparse = 32 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling sparse +32 nonzeroes left to fill.. +2 nonzeroes left to fill.. +inside fill, using seed 543210 +fill_random nrows=32ncols=32 need 160 values, invsparse = 7 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling sparse +160 nonzeroes left to fill.. +51 nonzeroes left to fill.. + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes +inside fill, using seed 32 +fill_random nrows=32ncols=32 need 64 values, invsparse = 16 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling sparse +64 nonzeroes left to fill.. +20 nonzeroes left to fill.. + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 512 bytes + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes +32 slots to fill +all pairs to bucket 11, no filling +done assigning buckets +Building semiring factgory + calling stringify semiring: 0x7f1ff5306300 +inside enumify: 0x7f1ff5306300 + + GraphBLAS Semiring: semiring (user-defined) + GraphBLAS Monoid: semiring->add (built-in) + GraphBLAS BinaryOp: monoid->op (built-in) z=plus(x,y) + GraphBLAS type: ztype int32_t size: 4 + GraphBLAS type: xtype int32_t size: 4 + GraphBLAS type: ytype int32_t size: 4 + identity: [ 0 ] + + GraphBLAS BinaryOp: semiring->multiply (built-in) z=times(x,y) + GraphBLAS type: ztype int32_t size: 4 + GraphBLAS type: xtype int32_t size: 4 + GraphBLAS type: ytype int32_t size: 4 +Getting semiring add +Getting semiring mult +Getting semiring add op +Getting types +Getting opcodes +Getting typecodes +Performing asserts +Invoking boolean rename +Invoking boolean rename +Invoking enumify binop +e 14 +Invoking enumify monoid +Calling enumify binop +Inside plus binop code +e 11 +Calling enumify identity +Calling enumify terminal +Done enumify monoid +Done invoking enumify monoid +atype +btype +ctype +Invoking enumify_mask, mtype 0x7f2028b57180 +GB_enumify_mask gets mcode: 1 Mask_struct: 0 Mask_comp: 0 +got mask_ecode: 4 +constructing semiring scode +before: add_ecode: 11, id_ecode: 0, term_ecode: 31, mult_ecode: 14, flipxy: 0, zcode: 6, xcode: 6, ycode: 6, mask_ecode: 4, ccode: 6, acode: 6, bcode: 6, csparsity: 0, msparsity: 0, asparsity: 0, bsparsity: 0 +serialized_scode: 397409434374399488 +done enumify semiring +scode=397409434374399488 +done stringify semiring + returned from stringify semiring + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes +32 slots to fill +all pairs to bucket 11, no filling +done assigning buckets +bucket 11 has 32 dots to do +LAUNCHING BUCKET CODE: 11 +INside get cached file +looking for prog in file /home/cjnolet/.SuiteSparse/GraphBLAS/6.3.0/GB_semiring_397409434374399488.h +opening /home/cjnolet/.SuiteSparse/GraphBLAS/6.3.0/GB_semiring_397409434374399488.h for write +about to close + read cache file /home/cjnolet/.SuiteSparse/GraphBLAS/6.3.0/GB_semiring_397409434374399488.h +successful_read: 1 +Just closed + jit_cache get program GB_jit_AxB_dot3_phase3_mp + failed to open cache file /home/cjnolet/.SuiteSparse/GraphBLAS/6.3.0/GB_jit_AxB_dot3_phase3_mp +compiling now +--------------------------------------- +--- Source of GB_jit_AxB_dot3_phase3_mp --- +--------------------------------------- + 1 #include "/home/cjnolet/.SuiteSparse/GraphBLAS/6.3.0/GB_semiring_397409434374399488.h" + 2 #include "GB_jit_AxB_dot3_phase3_mp.cuh" +--------------------------------------- +Found #include GB_jit_AxB_dot3_phase3_mp.cuh from GB_jit_AxB_dot3_phase3_mp:2 [] at: + ../templates/GB_jit_AxB_dot3_phase3_mp.cuh +Found #include limits from GB_jit_AxB_dot3_phase3_mp.cuh:36 [../templates/GB_jit_AxB_dot3_phase3_mp.cuh] at: + __jitify_builtin/limits +Found #include climits from limits:4 [__jitify_builtin/limits] at: + __jitify_builtin/climits +Found #include cfloat from limits:5 [__jitify_builtin/limits] at: + __jitify_builtin/cfloat +Found #include cstdint from GB_jit_AxB_dot3_phase3_mp.cuh:37 [../templates/GB_jit_AxB_dot3_phase3_mp.cuh] at: + __jitify_builtin/cstdint +Found #include cooperative_groups.h from GB_jit_AxB_dot3_phase3_mp.cuh:38 [../templates/GB_jit_AxB_dot3_phase3_mp.cuh] at: + /usr/local/cuda/include/cooperative_groups.h +Found #include cooperative_groups/details/info.h from cooperative_groups.h:55 [/usr/local/cuda/include/cooperative_groups.h] at: + /usr/local/cuda/include/cooperative_groups/details/info.h +Found #include cooperative_groups/details/driver_abi.h from cooperative_groups.h:56 [/usr/local/cuda/include/cooperative_groups.h] at: + /usr/local/cuda/include/cooperative_groups/details/driver_abi.h +Found #include cooperative_groups/details/helpers.h from cooperative_groups.h:57 [/usr/local/cuda/include/cooperative_groups.h] at: + /usr/local/cuda/include/cooperative_groups/details/helpers.h +Found #include sync.h from cooperative_groups/details/helpers.h:53 [/usr/local/cuda/include/cooperative_groups/details/helpers.h] at: + /usr/local/cuda/include/cooperative_groups/details/sync.h +Found #include info.h from sync.h:52 [/usr/local/cuda/include/cooperative_groups/details/sync.h] at: + /usr/local/cuda/include/cooperative_groups/details/info.h +Found #include cooperative_groups/details/partitioning.h from cooperative_groups.h:1810 [/usr/local/cuda/include/cooperative_groups.h] at: + /usr/local/cuda/include/cooperative_groups/details/partitioning.h +Found #include matrix.h from GB_jit_AxB_dot3_phase3_mp.cuh:39 [../templates/GB_jit_AxB_dot3_phase3_mp.cuh] at: + ../matrix.h +matrix.h(52): warning: stdbool.h: [jitify] File not found +Found #include stddef.h from matrix.h:53 [../matrix.h] at: + __jitify_builtin/stddef.h +Found #include GB_opaque.h from matrix.h:131 [../matrix.h] at: + ../../Source/GB_opaque.h +Found #include GB_Operator.h from GB_opaque.h:397 [../../Source/GB_opaque.h] at: + ../../Source/Template/GB_Operator.h +Found #include GB_matrix.h from GB_opaque.h:495 [../../Source/GB_opaque.h] at: + ../../Source/Template/GB_matrix.h +Found #include GB_imin.h from matrix.h:135 [../matrix.h] at: + ../../Source/GB_imin.h +Found #include GB_zombie.h from matrix.h:136 [../matrix.h] at: + ../../Source/GB_zombie.h +Found #include GB_nnz.h from matrix.h:137 [../matrix.h] at: + ../../Source/GB_nnz.h +Found #include GB_partition.h from matrix.h:138 [../matrix.h] at: + ../../Source/GB_partition.h +Found #include GB_binary_search.h from matrix.h:139 [../matrix.h] at: + ../../Source/GB_binary_search.h +Found #include GB_lookup_template.c from GB_binary_search.h:230 [../../Source/GB_binary_search.h] at: + ../../Source/Template/GB_lookup_template.c +Found #include GB_search_for_vector_template.c from matrix.h:140 [../matrix.h] at: + ../../Source/Template/GB_search_for_vector_template.c +completed func() + compiled serialized prog GB_jit_AxB_dot3_phase3_mp +writing prog in file /home/cjnolet/.SuiteSparse/GraphBLAS/6.3.0/GB_jit_AxB_dot3_phase3_mp + got kernel instance AxB_dot3_phase3_mp_int32_t_int32_t_int32_t + failed to open cache file /home/cjnolet/.SuiteSparse/GraphBLAS/6.3.0/AxB_dot3_phase3_mp_int32_t_int32_t_int32_t +compiling now +About to instantiate kernel +ABout to compile kernel +done compilling +--------------------------------------- +_Z18AxB_dot3_phase3_mpIiiiEvxxPxP16GB_Matrix_opaqueS2_S2_S2_i +--------------------------------------- +--- PTX for 0x7ffcc2488c20 in GB_jit_AxB_dot3_phase3_mp --- +--------------------------------------- +// +// Generated by NVIDIA NVVM Compiler +// +// Compiler Build ID: CL-30794723 +// Cuda compilation tools, release 11.6, V11.6.55 +// Based on NVVM 7.0.1 +// + +.version 7.6 +.target sm_70, debug +.address_size 64 + + // .weak _Z18AxB_dot3_phase3_mpIiiiEvxxPxP16GB_Matrix_opaqueS2_S2_S2_i +.weak .func (.param .b32 func_retval0) _Z13GB_reduce_sumIiLi32EET_N18cooperative_groups4__v117thread_block_tileIXT0_EvEES0_ +( + .param .align 8 .b8 _Z13GB_reduce_sumIiLi32EET_N18cooperative_groups4__v117thread_block_tileIXT0_EvEES0__param_0[16], + .param .b32 _Z13GB_reduce_sumIiLi32EET_N18cooperative_groups4__v117thread_block_tileIXT0_EvEES0__param_1 +) +; +.extern .func (.param .b32 func_retval0) vprintf +( + .param .b64 vprintf_param_0, + .param .b64 vprintf_param_1 +) +; +.weak .func _ZN18cooperative_groups4__v117thread_group_baseILj4EEC2Ev +( + .param .b64 _ZN18cooperative_groups4__v117thread_group_baseILj4EEC2Ev_param_0 +) +; +.weak .func _ZN18cooperative_groups4__v17details22thread_block_tile_implILj32EvLb0EEC2ILj32ENS0_12thread_blockELb0EEERKNS2_IXT_ET0_XT1_EEE +( + .param .b64 _ZN18cooperative_groups4__v17details22thread_block_tile_implILj32EvLb0EEC2ILj32ENS0_12thread_blockELb0EEERKNS2_IXT_ET0_XT1_EEE_param_0, + .param .b64 _ZN18cooperative_groups4__v17details22thread_block_tile_implILj32EvLb0EEC2ILj32ENS0_12thread_blockELb0EEERKNS2_IXT_ET0_XT1_EEE_param_1 +) +; +.weak .func _ZN18cooperative_groups4__v117thread_block_tileILj32ENS0_12thread_blockEEC2ERKS2_ +( + .param .b64 _ZN18cooperative_groups4__v117thread_block_tileILj32ENS0_12thread_blockEEC2ERKS2__param_0, + .param .b64 _ZN18cooperative_groups4__v117thread_block_tileILj32ENS0_12thread_blockEEC2ERKS2__param_1 +) +; +.func (.param .b64 func_retval0) _Z9atomicAddPyy +( + .param .b64 _Z9atomicAddPyy_param_0, + .param .b64 _Z9atomicAddPyy_param_1 +) +; +.func (.param .b32 func_retval0) _Z13__ballot_syncji +( + .param .b32 _Z13__ballot_syncji_param_0, + .param .b32 _Z13__ballot_syncji_param_1 +) +; +.func (.param .b32 func_retval0) _Z16__shfl_down_syncjiji +( + .param .b32 _Z16__shfl_down_syncjiji_param_0, + .param .b32 _Z16__shfl_down_syncjiji_param_1, + .param .b32 _Z16__shfl_down_syncjiji_param_2, + .param .b32 _Z16__shfl_down_syncjiji_param_3 +) +; +.func (.param .b64 func_retval0) __ullAtomicAdd +( + .param .b64 __ullAtomicAdd_param_0, + .param .b64 __ullAtomicAdd_param_1 +) +; +.global .align 1 .b8 $str[42] = {119, 97, 114, 112, 32, 37, 100, 32, 122, 111, 109, 98, 105, 101, 32, 99, 111, 117, 110, 116, 32, 61, 32, 37, 100, 44, 32, 110, 122, 111, 109, 98, 105, 101, 115, 32, 61, 32, 37, 100, 10, 0}; +.global .align 1 .b8 $str$1[17] = {32, 67, 122, 111, 109, 98, 105, 101, 32, 61, 32, 37, 108, 108, 100, 10, 0}; + +.weak .entry _Z18AxB_dot3_phase3_mpIiiiEvxxPxP16GB_Matrix_opaqueS2_S2_S2_i( + .param .u64 _Z18AxB_dot3_phase3_mpIiiiEvxxPxP16GB_Matrix_opaqueS2_S2_S2_i_param_0, + .param .u64 _Z18AxB_dot3_phase3_mpIiiiEvxxPxP16GB_Matrix_opaqueS2_S2_S2_i_param_1, + .param .u64 _Z18AxB_dot3_phase3_mpIiiiEvxxPxP16GB_Matrix_opaqueS2_S2_S2_i_param_2, + .param .u64 _Z18AxB_dot3_phase3_mpIiiiEvxxPxP16GB_Matrix_opaqueS2_S2_S2_i_param_3, + .param .u64 _Z18AxB_dot3_phase3_mpIiiiEvxxPxP16GB_Matrix_opaqueS2_S2_S2_i_param_4, + .param .u64 _Z18AxB_dot3_phase3_mpIiiiEvxxPxP16GB_Matrix_opaqueS2_S2_S2_i_param_5, + .param .u64 _Z18AxB_dot3_phase3_mpIiiiEvxxPxP16GB_Matrix_opaqueS2_S2_S2_i_param_6, + .param .u32 _Z18AxB_dot3_phase3_mpIiiiEvxxPxP16GB_Matrix_opaqueS2_S2_S2_i_param_7 +) +{ + .local .align 8 .b8 __local_depot0[160]; + .reg .b64 %SP; + .reg .b64 %SPL; + .reg .pred %p<81>; + .reg .b16 %rs<2>; + .reg .b32 %r<141>; + .reg .b64 %rd<224>; + .loc 10 76 0 +$L__func_begin0: + .loc 10 76 0 + + + mov.u64 %SPL, __local_depot0; + cvta.local.u64 %SP, %SPL; + ld.param.u64 %rd31, [_Z18AxB_dot3_phase3_mpIiiiEvxxPxP16GB_Matrix_opaqueS2_S2_S2_i_param_0]; + ld.param.u64 %rd32, [_Z18AxB_dot3_phase3_mpIiiiEvxxPxP16GB_Matrix_opaqueS2_S2_S2_i_param_1]; + ld.param.u64 %rd33, [_Z18AxB_dot3_phase3_mpIiiiEvxxPxP16GB_Matrix_opaqueS2_S2_S2_i_param_2]; + ld.param.u64 %rd34, [_Z18AxB_dot3_phase3_mpIiiiEvxxPxP16GB_Matrix_opaqueS2_S2_S2_i_param_3]; + ld.param.u64 %rd35, [_Z18AxB_dot3_phase3_mpIiiiEvxxPxP16GB_Matrix_opaqueS2_S2_S2_i_param_4]; + ld.param.u64 %rd36, [_Z18AxB_dot3_phase3_mpIiiiEvxxPxP16GB_Matrix_opaqueS2_S2_S2_i_param_5]; + ld.param.u64 %rd37, [_Z18AxB_dot3_phase3_mpIiiiEvxxPxP16GB_Matrix_opaqueS2_S2_S2_i_param_6]; + ld.param.u32 %r67, [_Z18AxB_dot3_phase3_mpIiiiEvxxPxP16GB_Matrix_opaqueS2_S2_S2_i_param_7]; + mov.b64 %rd38, %rd33; + mov.b32 %r68, %r67; +$L__tmp0: + .loc 10 89 5 + mov.u16 %rs1, 1; + st.u8 [%rd34+203], %rs1; + .loc 10 90 13 + ld.u64 %rd1, [%rd36+104]; +$L__tmp1: + .loc 10 91 13 + ld.u64 %rd2, [%rd37+104]; +$L__tmp2: + .loc 10 92 13 + ld.u64 %rd3, [%rd34+104]; +$L__tmp3: + .loc 10 93 17 + ld.u64 %rd39, [%rd34+96]; + mov.b64 %rd4, %rd39; +$L__tmp4: + .loc 10 94 17 + ld.u64 %rd40, [%rd35+96]; + mov.b64 %rd5, %rd40; +$L__tmp5: + .loc 10 95 17 + ld.u64 %rd41, [%rd36+96]; + mov.b64 %rd6, %rd41; +$L__tmp6: + .loc 10 96 17 + ld.u64 %rd42, [%rd37+96]; + mov.b64 %rd7, %rd42; +$L__tmp7: + .loc 10 97 17 + ld.u64 %rd43, [%rd36+88]; + mov.b64 %rd8, %rd43; +$L__tmp8: + .loc 10 98 17 + ld.u64 %rd44, [%rd37+88]; + mov.b64 %rd9, %rd44; +$L__tmp9: + .loc 10 102 12 + mov.u32 %r69, 0; + mov.b32 %r1, %r69; +$L__tmp10: + .loc 10 108 20 + mov.u32 %r70, %tid.x; + mov.u32 %r71, %ntid.x; + mov.u32 %r72, %ctaid.x; + mul.lo.s32 %r73, %r71, %r72; + add.s32 %r74, %r70, %r73; + mov.b32 %r75, %r74; +$L__tmp11: + .loc 10 109 13 + mov.u32 %r76, %tid.x; + mov.b32 %r2, %r76; +$L__tmp12: + .loc 10 111 11 + mov.u32 %r77, %ctaid.x; + mov.b32 %r78, %r77; +$L__tmp13: + .loc 10 114 18 + mov.u64 %rd45, 0; + mov.b64 %rd46, %rd45; +$L__tmp14: + .loc 10 115 18 + mov.b64 %rd47, %rd45; +$L__tmp15: + .loc 10 116 25 + mov.b64 %rd48, %rd45; +$L__tmp16: + .loc 10 0 25 + add.u64 %rd49, %SP, 88; + mov.b64 %rd50, %rd49; + st.u64 [%SP+80], %rd50; + .loc 3 682 5 + bra.uni $L__tmp17; +$L__tmp17: + .loc 3 609 5 + ld.u64 %rd51, [%SP+80]; + { // callseq 0, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.b64 [param0+0], %rd51; + call.uni + _ZN18cooperative_groups4__v117thread_group_baseILj4EEC2Ev, + ( + param0 + ); + } // callseq 0 +$L__tmp18: + .loc 3 682 5 + ld.u64 %rd52, [%SP+88]; + ld.u64 %rd53, [%SP+96]; +$L__tmp19: + .loc 10 118 65 + st.u64 [%SP+120], %rd53; + st.u64 [%SP+112], %rd52; + add.u64 %rd54, %SP, 112; + mov.b64 %rd55, %rd54; + st.u64 [%SP+64], %rd55; + .loc 10 118 39 + bra.uni $L__tmp20; +$L__tmp20: + .loc 3 1684 5 + ld.u64 %rd56, [%SP+64]; + add.u64 %rd57, %SP, 72; + mov.b64 %rd58, %rd57; + st.u64 [%SP+48], %rd58; + mov.b64 %rd59, %rd56; + st.u64 [%SP+56], %rd59; + .loc 3 1684 5 + bra.uni $L__tmp21; +$L__tmp21: + .loc 3 1648 13 + ld.u64 %rd60, [%SP+48]; + ld.u64 %rd61, [%SP+56]; + { // callseq 1, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.b64 [param0+0], %rd60; + .param .b64 param1; + st.param.b64 [param1+0], %rd61; + call.uni + _ZN18cooperative_groups4__v117thread_block_tileILj32ENS0_12thread_blockEEC2ERKS2_, + ( + param0, + param1 + ); + } // callseq 1 + add.u64 %rd62, %SP, 104; + mov.b64 %rd63, %rd62; + st.u64 [%SP+24], %rd63; +$L__tmp22: + .loc 10 118 39 + bra.uni $L__tmp23; +$L__tmp23: + .loc 3 1612 9 + ld.u64 %rd64, [%SP+24]; + add.u64 %rd65, %SP, 32; + mov.b64 %rd66, %rd65; + st.u64 [%SP+8], %rd66; + mov.b64 %rd67, %rd64; + st.u64 [%SP+16], %rd67; + .loc 3 1612 9 + bra.uni $L__tmp24; +$L__tmp24: + .loc 3 1630 9 + ld.u64 %rd68, [%SP+8]; + ld.u64 %rd69, [%SP+16]; + { // callseq 2, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.b64 [param0+0], %rd68; + .param .b64 param1; + st.param.b64 [param1+0], %rd69; + call.uni + _ZN18cooperative_groups4__v17details22thread_block_tile_implILj32EvLb0EEC2ILj32ENS0_12thread_blockELb0EEERKNS2_IXT_ET0_XT1_EEE, + ( + param0, + param1 + ); + } // callseq 2 +$L__tmp25: + .loc 3 1612 9 + ld.u64 %rd70, [%SP+32]; + ld.u64 %rd71, [%SP+40]; +$L__tmp26: + .loc 10 118 39 + st.u64 [%SP+136], %rd71; + st.u64 [%SP+128], %rd70; + .loc 10 120 15 + mov.u32 %r79, %ntid.x; + mov.b32 %r3, %r79; +$L__tmp27: + .loc 10 125 10 + mov.u32 %r80, %ctaid.x; + cvt.u64.u32 %rd72, %r80; + add.s64 %rd10, %rd31, %rd72; +$L__tmp28: + .loc 10 125 5 + mov.u32 %r116, %r1; +$L__tmp29: + mov.u64 %rd219, %rd10; +$L__tmp30: + bra.uni $L__BB0_1; + +$L__BB0_1: + mov.u64 %rd11, %rd219; + mov.u32 %r4, %r116; +$L__tmp31: + setp.lt.s64 %p15, %rd11, %rd32; + not.pred %p16, %p15; + @%p16 bra $L__BB0_69; + bra.uni $L__BB0_2; + +$L__BB0_2: +$L__tmp32: + .loc 10 130 20 + shl.b64 %rd83, %rd11, 3; + add.s64 %rd84, %rd5, %rd83; + ld.u64 %rd85, [%rd84]; + mov.b64 %rd12, %rd85; +$L__tmp33: + .loc 10 131 20 + shl.b64 %rd86, %rd11, 3; + add.s64 %rd87, %rd4, %rd86; + ld.u64 %rd88, [%rd87]; + shr.s64 %rd89, %rd88, 4; +$L__tmp34: + .loc 10 133 25 + shl.b64 %rd90, %rd12, 3; + add.s64 %rd91, %rd8, %rd90; + ld.u64 %rd92, [%rd91]; + mov.b64 %rd13, %rd92; +$L__tmp35: + .loc 10 134 25 + shl.b64 %rd93, %rd12, 3; + add.s64 %rd94, %rd93, 8; + add.s64 %rd95, %rd8, %rd94; + ld.u64 %rd96, [%rd95]; + mov.b64 %rd14, %rd96; +$L__tmp36: + .loc 10 135 10 + sub.s64 %rd15, %rd14, %rd13; +$L__tmp37: + .loc 10 137 25 + shl.b64 %rd97, %rd89, 3; + add.s64 %rd98, %rd9, %rd97; + ld.u64 %rd99, [%rd98]; + mov.b64 %rd16, %rd99; +$L__tmp38: + .loc 10 138 25 + shl.b64 %rd100, %rd89, 3; + add.s64 %rd101, %rd100, 8; + add.s64 %rd102, %rd9, %rd101; + ld.u64 %rd103, [%rd102]; + mov.b64 %rd17, %rd103; +$L__tmp39: + .loc 10 139 10 + sub.s64 %rd18, %rd17, %rd16; +$L__tmp40: + .loc 10 144 10 + sub.s64 %rd104, %rd14, %rd13; + sub.s64 %rd105, %rd17, %rd16; + setp.lt.s64 %p21, %rd104, %rd105; + not.pred %p22, %p21; + @%p22 bra $L__BB0_4; + bra.uni $L__BB0_3; + +$L__BB0_3: + sub.s64 %rd19, %rd14, %rd13; + bra.uni $L__BB0_5; + +$L__BB0_4: + sub.s64 %rd20, %rd17, %rd16; + bra.uni $L__BB0_5; + +$L__BB0_5: +$L__tmp41: + .loc 10 151 17 + add.s64 %rd21, %rd15, %rd18; +$L__tmp42: + .loc 10 153 25 + cvt.s64.s32 %rd107, %r3; + add.s64 %rd108, %rd21, %rd107; + sub.s64 %rd109, %rd108, 1; + cvt.s64.s32 %rd110, %r3; + div.s64 %rd111, %rd109, %rd110; + cvt.u32.u64 %r5, %rd111; +$L__tmp43: + .loc 10 154 14 + mul.lo.s32 %r84, %r5, %r2; + cvt.s64.s32 %rd112, %r84; + setp.lt.s64 %p23, %rd112, %rd21; + not.pred %p24, %p23; + @%p24 bra $L__BB0_7; + bra.uni $L__BB0_6; + +$L__BB0_6: + mul.lo.s32 %r85, %r5, %r2; + cvt.s64.s32 %rd22, %r85; + mov.u64 %rd220, %rd22; + bra.uni $L__BB0_8; + +$L__BB0_7: + mov.u64 %rd220, %rd21; +$L__tmp44: + bra.uni $L__BB0_8; + +$L__BB0_8: + mov.u64 %rd23, %rd220; + cvt.u32.u64 %r6, %rd23; +$L__tmp45: + .loc 10 155 18 + add.s32 %r86, %r6, %r5; + cvt.s64.s32 %rd113, %r86; + setp.lt.s64 %p25, %rd113, %rd21; + not.pred %p26, %p25; + @%p26 bra $L__BB0_10; + bra.uni $L__BB0_9; + +$L__BB0_9: + add.s32 %r87, %r6, %r5; + cvt.s64.s32 %rd24, %r87; + mov.u64 %rd221, %rd24; + bra.uni $L__BB0_11; + +$L__BB0_10: + mov.u64 %rd221, %rd21; +$L__tmp46: + bra.uni $L__BB0_11; + +$L__BB0_11: + mov.u64 %rd25, %rd221; + cvt.u32.u64 %r7, %rd25; +$L__tmp47: + .loc 10 158 15 + cvt.s64.s32 %rd114, %r6; + sub.s64 %rd115, %rd114, %rd18; + cvt.u32.u64 %r88, %rd115; + setp.gt.s32 %p27, %r88, 0; + not.pred %p28, %p27; + @%p28 bra $L__BB0_13; + bra.uni $L__BB0_12; + +$L__BB0_12: + cvt.s64.s32 %rd116, %r6; + sub.s64 %rd117, %rd116, %rd18; + cvt.u32.u64 %r8, %rd117; + mov.u32 %r117, %r8; + bra.uni $L__BB0_14; + +$L__BB0_13: + .loc 10 0 15 + mov.u32 %r89, 0; + .loc 10 158 15 + mov.u32 %r117, %r89; + bra.uni $L__BB0_14; + +$L__BB0_14: + mov.u32 %r9, %r117; +$L__tmp48: + .loc 10 159 15 + cvt.s64.s32 %rd118, %r6; + setp.lt.s64 %p29, %rd118, %rd15; + not.pred %p30, %p29; + @%p30 bra $L__BB0_16; + bra.uni $L__BB0_15; + +$L__BB0_15: + cvt.s64.s32 %rd26, %r6; + mov.u64 %rd222, %rd26; + bra.uni $L__BB0_17; + +$L__BB0_16: + mov.u64 %rd222, %rd15; +$L__tmp49: + bra.uni $L__BB0_17; + +$L__BB0_17: + mov.u64 %rd27, %rd222; + cvt.u32.u64 %r10, %rd27; +$L__tmp50: + .loc 10 162 5 + mov.u32 %r118, %r9; +$L__tmp51: + mov.u32 %r119, %r10; +$L__tmp52: + bra.uni $L__BB0_18; + +$L__BB0_18: + mov.u32 %r12, %r119; + mov.u32 %r11, %r118; +$L__tmp53: + setp.lt.s32 %p31, %r11, %r12; + not.pred %p32, %p31; + @%p32 bra $L__BB0_23; + bra.uni $L__BB0_19; + +$L__BB0_19: +$L__tmp54: + .loc 10 163 17 + add.s32 %r113, %r11, %r12; + div.s32 %r13, %r113, 2; +$L__tmp55: + .loc 10 164 7 + cvt.s64.s32 %rd209, %r13; + add.s64 %rd210, %rd209, %rd13; + shl.b64 %rd211, %rd210, 3; + add.s64 %rd212, %rd6, %rd211; + ld.u64 %rd213, [%rd212]; + sub.s32 %r114, %r6, %r13; + sub.s32 %r115, %r114, 1; + cvt.s64.s32 %rd214, %r115; + add.s64 %rd215, %rd214, %rd16; + shl.b64 %rd216, %rd215, 3; + add.s64 %rd217, %rd7, %rd216; + ld.u64 %rd218, [%rd217]; + setp.lt.s64 %p72, %rd213, %rd218; + not.pred %p73, %p72; + @%p73 bra $L__BB0_21; + bra.uni $L__BB0_20; + +$L__BB0_20: +$L__tmp56: + .loc 10 165 10 + add.s32 %r14, %r13, 1; +$L__tmp57: + mov.u32 %r120, %r14; +$L__tmp58: + mov.u32 %r121, %r12; +$L__tmp59: + bra.uni $L__BB0_22; +$L__tmp60: + +$L__BB0_21: + .loc 10 168 10 + mov.b32 %r15, %r13; +$L__tmp61: + mov.u32 %r120, %r11; +$L__tmp62: + mov.u32 %r121, %r15; +$L__tmp63: + bra.uni $L__BB0_22; + +$L__BB0_22: + mov.u32 %r17, %r121; + mov.u32 %r16, %r120; +$L__tmp64: + mov.u32 %r118, %r16; +$L__tmp65: + mov.u32 %r119, %r17; +$L__tmp66: + bra.uni $L__BB0_18; +$L__tmp67: + +$L__BB0_23: + .loc 10 171 16 + mov.b32 %r18, %r11; +$L__tmp68: + .loc 10 172 16 + sub.s32 %r90, %r6, %r11; + sub.s32 %r19, %r90, 1; +$L__tmp69: + .loc 10 173 5 + setp.gt.s32 %p34, %r6, 0; + mov.pred %p33, 0; + not.pred %p35, %p34; + mov.pred %p74, %p33; + @%p35 bra $L__BB0_25; + bra.uni $L__BB0_24; + +$L__BB0_24: + cvt.s64.s32 %rd119, %r6; + add.s64 %rd120, %rd15, %rd18; + setp.lt.s64 %p1, %rd119, %rd120; + mov.pred %p74, %p1; + bra.uni $L__BB0_25; + +$L__BB0_25: + mov.pred %p2, %p74; + mov.pred %p36, 0; + not.pred %p37, %p2; + mov.pred %p75, %p36; + @%p37 bra $L__BB0_27; + bra.uni $L__BB0_26; + +$L__BB0_26: + cvt.s64.s32 %rd121, %r18; + add.s64 %rd122, %rd121, %rd13; + shl.b64 %rd123, %rd122, 3; + add.s64 %rd124, %rd6, %rd123; + ld.u64 %rd125, [%rd124]; + cvt.s64.s32 %rd126, %r19; + add.s64 %rd127, %rd126, %rd16; + shl.b64 %rd128, %rd127, 3; + add.s64 %rd129, %rd7, %rd128; + ld.u64 %rd130, [%rd129]; + setp.eq.s64 %p3, %rd125, %rd130; + mov.pred %p75, %p3; + bra.uni $L__BB0_27; + +$L__BB0_27: + mov.pred %p4, %p75; + not.pred %p38, %p4; + mov.u32 %r122, %r6; +$L__tmp70: + @%p38 bra $L__BB0_29; + bra.uni $L__BB0_28; + +$L__BB0_28: +$L__tmp71: + .loc 10 174 8 + add.s32 %r20, %r6, -1; +$L__tmp72: + mov.u32 %r122, %r20; +$L__tmp73: + bra.uni $L__BB0_29; +$L__tmp74: + +$L__BB0_29: + .loc 10 177 18 + mov.u32 %r21, %r122; +$L__tmp75: + cvt.s64.s32 %rd131, %r18; + add.s64 %rd132, %rd131, %rd13; + cvt.u32.u64 %r22, %rd132; +$L__tmp76: + .loc 10 178 18 + sub.s32 %r91, %r21, %r18; + cvt.s64.s32 %rd133, %r91; + add.s64 %rd134, %rd133, %rd16; + cvt.u32.u64 %r23, %rd134; +$L__tmp77: + .loc 10 183 5 + cvt.s64.s32 %rd135, %r7; + sub.s64 %rd136, %rd135, %rd18; + cvt.u32.u64 %r92, %rd136; + setp.gt.s32 %p39, %r92, 0; + not.pred %p40, %p39; + @%p40 bra $L__BB0_31; + bra.uni $L__BB0_30; + +$L__BB0_30: + cvt.s64.s32 %rd137, %r7; + sub.s64 %rd138, %rd137, %rd18; + cvt.u32.u64 %r24, %rd138; + mov.u32 %r123, %r24; + bra.uni $L__BB0_32; + +$L__BB0_31: + .loc 10 0 5 + mov.u32 %r93, 0; + .loc 10 183 5 + mov.u32 %r123, %r93; + bra.uni $L__BB0_32; + +$L__BB0_32: + mov.u32 %r25, %r123; +$L__tmp78: + .loc 10 184 5 + cvt.s64.s32 %rd139, %r7; + setp.lt.s64 %p41, %rd139, %rd15; + not.pred %p42, %p41; + @%p42 bra $L__BB0_34; + bra.uni $L__BB0_33; + +$L__BB0_33: + cvt.s64.s32 %rd28, %r7; + mov.u64 %rd223, %rd28; + bra.uni $L__BB0_35; + +$L__BB0_34: + mov.u64 %rd223, %rd15; +$L__tmp79: + bra.uni $L__BB0_35; + +$L__BB0_35: + mov.u64 %rd29, %rd223; + cvt.u32.u64 %r26, %rd29; +$L__tmp80: + .loc 10 186 5 + mov.u32 %r124, %r25; +$L__tmp81: + mov.u32 %r125, %r26; +$L__tmp82: + bra.uni $L__BB0_36; + +$L__BB0_36: + mov.u32 %r28, %r125; + mov.u32 %r27, %r124; +$L__tmp83: + setp.lt.s32 %p43, %r27, %r28; + not.pred %p44, %p43; + @%p44 bra $L__BB0_41; + bra.uni $L__BB0_37; + +$L__BB0_37: +$L__tmp84: + .loc 10 187 18 + add.s32 %r110, %r27, %r28; + div.s32 %r29, %r110, 2; +$L__tmp85: + .loc 10 189 8 + cvt.s64.s32 %rd199, %r29; + add.s64 %rd200, %rd199, %rd13; + shl.b64 %rd201, %rd200, 3; + add.s64 %rd202, %rd6, %rd201; + ld.u64 %rd203, [%rd202]; + sub.s32 %r111, %r7, %r29; + sub.s32 %r112, %r111, 1; + cvt.s64.s32 %rd204, %r112; + add.s64 %rd205, %rd204, %rd16; + shl.b64 %rd206, %rd205, 3; + add.s64 %rd207, %rd7, %rd206; + ld.u64 %rd208, [%rd207]; + setp.lt.s64 %p70, %rd203, %rd208; + not.pred %p71, %p70; + @%p71 bra $L__BB0_39; + bra.uni $L__BB0_38; + +$L__BB0_38: +$L__tmp86: + .loc 10 190 11 + add.s32 %r30, %r29, 1; +$L__tmp87: + mov.u32 %r126, %r30; +$L__tmp88: + mov.u32 %r127, %r28; +$L__tmp89: + bra.uni $L__BB0_40; +$L__tmp90: + +$L__BB0_39: + .loc 10 193 11 + mov.b32 %r31, %r29; +$L__tmp91: + mov.u32 %r126, %r27; +$L__tmp92: + mov.u32 %r127, %r31; +$L__tmp93: + bra.uni $L__BB0_40; + +$L__BB0_40: + mov.u32 %r33, %r127; + mov.u32 %r32, %r126; +$L__tmp94: + mov.u32 %r124, %r32; +$L__tmp95: + mov.u32 %r125, %r33; +$L__tmp96: + bra.uni $L__BB0_36; +$L__tmp97: + +$L__BB0_41: + .loc 10 197 5 + mov.b32 %r34, %r27; +$L__tmp98: + .loc 10 198 5 + sub.s32 %r94, %r7, %r27; + sub.s32 %r35, %r94, 1; +$L__tmp99: + .loc 10 199 5 + cvt.s64.s32 %rd140, %r7; + add.s64 %rd141, %rd15, %rd18; + setp.lt.s64 %p46, %rd140, %rd141; + mov.pred %p45, 0; + not.pred %p47, %p46; + mov.pred %p76, %p45; + @%p47 bra $L__BB0_43; + bra.uni $L__BB0_42; + +$L__BB0_42: + cvt.s64.s32 %rd142, %r34; + add.s64 %rd143, %rd142, %rd13; + shl.b64 %rd144, %rd143, 3; + add.s64 %rd145, %rd6, %rd144; + ld.u64 %rd146, [%rd145]; + cvt.s64.s32 %rd147, %r35; + add.s64 %rd148, %rd147, %rd16; + shl.b64 %rd149, %rd148, 3; + add.s64 %rd150, %rd7, %rd149; + ld.u64 %rd151, [%rd150]; + setp.eq.s64 %p5, %rd146, %rd151; + mov.pred %p76, %p5; + bra.uni $L__BB0_43; + +$L__BB0_43: + mov.pred %p6, %p76; + not.pred %p48, %p6; + @%p48 bra $L__BB0_45; + bra.uni $L__BB0_44; + +$L__BB0_44: +$L__tmp100: + .loc 10 200 9 + bra.uni $L__BB0_45; +$L__tmp101: + +$L__BB0_45: + .loc 10 203 16 + cvt.s64.s32 %rd152, %r34; + add.s64 %rd153, %rd152, %rd13; + cvt.u32.u64 %r36, %rd153; +$L__tmp102: + .loc 10 204 16 + sub.s32 %r95, %r7, %r34; + cvt.s64.s32 %rd154, %r95; + add.s64 %rd155, %rd154, %rd16; + cvt.u32.u64 %r37, %rd155; +$L__tmp103: + .loc 10 208 13 + mov.u32 %r96, 0; + mov.b32 %r38, %r96; +$L__tmp104: + .loc 10 213 21 + mov.b32 %r39, %r96; +$L__tmp105: + .loc 10 217 11 + mov.b32 %r40, %r22; +$L__tmp106: + .loc 10 218 11 + mov.b32 %r41, %r23; +$L__tmp107: + .loc 10 224 5 + mov.u32 %r128, %r38; +$L__tmp108: + mov.u32 %r129, %r39; +$L__tmp109: + mov.u32 %r130, %r40; +$L__tmp110: + mov.u32 %r131, %r41; +$L__tmp111: + bra.uni $L__BB0_46; + +$L__BB0_46: + mov.u32 %r45, %r131; + mov.u32 %r44, %r130; + mov.u32 %r43, %r129; + mov.u32 %r42, %r128; +$L__tmp112: + setp.lt.s32 %p50, %r44, %r36; + mov.pred %p49, 0; + not.pred %p51, %p50; + mov.pred %p77, %p49; + @%p51 bra $L__BB0_48; + bra.uni $L__BB0_47; + +$L__BB0_47: + setp.lt.s32 %p7, %r45, %r37; + mov.pred %p77, %p7; + bra.uni $L__BB0_48; + +$L__BB0_48: + mov.pred %p8, %p77; + mov.pred %p52, 0; + not.pred %p53, %p8; + mov.pred %p78, %p52; + @%p53 bra $L__BB0_50; + bra.uni $L__BB0_49; + +$L__BB0_49: + setp.ne.s64 %p9, %rd15, 0; + mov.pred %p78, %p9; + bra.uni $L__BB0_50; + +$L__BB0_50: + mov.pred %p10, %p78; + mov.pred %p54, 0; + not.pred %p55, %p10; + mov.pred %p79, %p54; + @%p55 bra $L__BB0_52; + bra.uni $L__BB0_51; + +$L__BB0_51: + setp.ne.s64 %p11, %rd18, 0; + mov.pred %p79, %p11; + bra.uni $L__BB0_52; + +$L__BB0_52: + mov.pred %p12, %p79; + not.pred %p56, %p12; + @%p56 bra $L__BB0_60; + bra.uni $L__BB0_53; + +$L__BB0_53: +$L__tmp113: + .loc 10 226 9 + cvt.s64.s32 %rd169, %r44; + shl.b64 %rd170, %rd169, 3; + add.s64 %rd171, %rd6, %rd170; + ld.u64 %rd172, [%rd171]; + cvt.s64.s32 %rd173, %r45; + shl.b64 %rd174, %rd173, 3; + add.s64 %rd175, %rd7, %rd174; + ld.u64 %rd176, [%rd175]; + setp.eq.s64 %p64, %rd172, %rd176; + not.pred %p65, %p64; + @%p65 bra $L__BB0_58; + bra.uni $L__BB0_54; + +$L__BB0_54: +$L__tmp114: + .loc 10 228 13 + cvt.s64.s32 %rd193, %r44; + shl.b64 %rd194, %rd193, 2; + add.s64 %rd195, %rd1, %rd194; + ld.u32 %r106, [%rd195]; + mov.b32 %r46, %r106; +$L__tmp115: + .loc 10 229 13 + cvt.s64.s32 %rd196, %r45; + shl.b64 %rd197, %rd196, 2; + add.s64 %rd198, %rd2, %rd197; + ld.u32 %r107, [%rd198]; + mov.b32 %r47, %r107; +$L__tmp116: + .loc 10 230 13 + setp.ne.s32 %p68, %r43, 0; + not.pred %p69, %p68; + @%p69 bra $L__BB0_56; + bra.uni $L__BB0_55; + +$L__BB0_55: +$L__tmp117: + .loc 10 232 23 + mul.lo.s32 %r109, %r46, %r47; +$L__tmp118: + .loc 10 233 17 + add.s32 %r48, %r42, %r109; +$L__tmp119: + mov.u32 %r132, %r48; +$L__tmp120: + mov.u32 %r133, %r43; +$L__tmp121: + bra.uni $L__BB0_57; +$L__tmp122: + +$L__BB0_56: + .loc 10 239 17 + mov.u32 %r108, 1; + mov.b32 %r49, %r108; +$L__tmp123: + .loc 10 240 17 + mul.lo.s32 %r50, %r46, %r47; +$L__tmp124: + mov.u32 %r132, %r50; +$L__tmp125: + mov.u32 %r133, %r49; +$L__tmp126: + bra.uni $L__BB0_57; +$L__tmp127: + +$L__BB0_57: + .loc 10 245 13 + mov.u32 %r52, %r133; + mov.u32 %r51, %r132; +$L__tmp128: + add.s32 %r53, %r44, 1; +$L__tmp129: + .loc 10 246 13 + add.s32 %r54, %r45, 1; +$L__tmp130: + mov.u32 %r134, %r51; +$L__tmp131: + mov.u32 %r135, %r52; +$L__tmp132: + mov.u32 %r136, %r53; +$L__tmp133: + mov.u32 %r137, %r54; +$L__tmp134: + bra.uni $L__BB0_59; +$L__tmp135: + +$L__BB0_58: + .loc 10 252 13 + cvt.s64.s32 %rd177, %r44; + shl.b64 %rd178, %rd177, 3; + add.s64 %rd179, %rd6, %rd178; + ld.u64 %rd180, [%rd179]; + cvt.s64.s32 %rd181, %r45; + shl.b64 %rd182, %rd181, 3; + add.s64 %rd183, %rd7, %rd182; + ld.u64 %rd184, [%rd183]; + setp.lt.s64 %p66, %rd180, %rd184; + selp.u32 %r104, 1, 0, %p66; + add.s32 %r55, %r44, %r104; +$L__tmp136: + .loc 10 253 13 + cvt.s64.s32 %rd185, %r55; + shl.b64 %rd186, %rd185, 3; + add.s64 %rd187, %rd6, %rd186; + ld.u64 %rd188, [%rd187]; + cvt.s64.s32 %rd189, %r45; + shl.b64 %rd190, %rd189, 3; + add.s64 %rd191, %rd7, %rd190; + ld.u64 %rd192, [%rd191]; + setp.gt.s64 %p67, %rd188, %rd192; + selp.u32 %r105, 1, 0, %p67; + add.s32 %r56, %r45, %r105; +$L__tmp137: + mov.u32 %r134, %r42; +$L__tmp138: + mov.u32 %r135, %r43; +$L__tmp139: + mov.u32 %r136, %r55; +$L__tmp140: + mov.u32 %r137, %r56; +$L__tmp141: + bra.uni $L__BB0_59; + +$L__BB0_59: + mov.u32 %r60, %r137; + mov.u32 %r59, %r136; + mov.u32 %r58, %r135; + mov.u32 %r57, %r134; +$L__tmp142: + mov.u32 %r128, %r57; +$L__tmp143: + mov.u32 %r129, %r58; +$L__tmp144: + mov.u32 %r130, %r59; +$L__tmp145: + mov.u32 %r131, %r60; +$L__tmp146: + bra.uni $L__BB0_46; +$L__tmp147: + +$L__BB0_60: + .loc 10 0 13 + add.u64 %rd156, %SP, 128; + mov.b64 %rd157, %rd156; + st.u64 [%SP+0], %rd157; + mov.b32 %r97, %r43; +$L__tmp148: + .loc 3 1081 50 + bra.uni $L__tmp149; +$L__tmp149: + .loc 3 1012 27 + mov.u32 %r98, -1; + mov.b32 %r99, %r98; +$L__tmp150: + .loc 3 1019 9 + mov.b32 %r100, %r99; +$L__tmp151: + .loc 3 1081 36 + { // callseq 6, 0 + .reg .b32 temp_param_reg; + .param .b32 param0; + st.param.b32 [param0+0], %r100; + .param .b32 param1; + st.param.b32 [param1+0], %r97; + .param .b32 retval0; + call.uni (retval0), + _Z13__ballot_syncji, + ( + param0, + param1 + ); + ld.param.b32 %r101, [retval0+0]; +$L__tmp152: + } // callseq 6 + .loc 3 1082 9 + setp.ne.s32 %p57, %r101, 0; + selp.u32 %r102, 1, 0, %p57; +$L__tmp153: + .loc 10 270 19 + mov.b32 %r61, %r102; +$L__tmp154: + .loc 10 273 5 + setp.ne.s32 %p58, %r61, 0; + not.pred %p59, %p58; + mov.u32 %r138, %r42; +$L__tmp155: + @%p59 bra $L__BB0_62; + bra.uni $L__BB0_61; + +$L__BB0_61: +$L__tmp156: + .loc 10 275 8 + ld.u64 %rd158, [%SP+136]; + ld.u64 %rd159, [%SP+128]; + .loc 10 275 14 + { // callseq 7, 0 + .reg .b32 temp_param_reg; + .param .align 8 .b8 param0[16]; + st.param.b64 [param0+0], %rd159; + st.param.b64 [param0+8], %rd158; + .param .b32 param1; + st.param.b32 [param1+0], %r42; + .param .b32 retval0; + call.uni (retval0), + _Z13GB_reduce_sumIiLi32EET_N18cooperative_groups4__v117thread_block_tileIXT0_EvEES0_, + ( + param0, + param1 + ); + ld.param.b32 %r62, [retval0+0]; +$L__tmp157: + } // callseq 7 + mov.u32 %r138, %r62; +$L__tmp158: + bra.uni $L__BB0_62; +$L__tmp159: + +$L__BB0_62: + .loc 10 284 5 + mov.u32 %r63, %r138; +$L__tmp160: + setp.eq.s32 %p60, %r2, 0; + not.pred %p61, %p60; + mov.u32 %r140, %r4; +$L__tmp161: + @%p61 bra $L__BB0_67; + bra.uni $L__BB0_63; + +$L__BB0_63: +$L__tmp162: + .loc 10 287 9 + setp.ne.s32 %p62, %r61, 0; + not.pred %p63, %p62; + @%p63 bra $L__BB0_65; + bra.uni $L__BB0_64; + +$L__BB0_64: +$L__tmp163: + .loc 10 295 12 + shl.b64 %rd164, %rd11, 2; + add.s64 %rd165, %rd3, %rd164; + st.u32 [%rd165], %r63; + .loc 10 296 12 + shl.b64 %rd166, %rd11, 3; + add.s64 %rd167, %rd4, %rd166; + st.u64 [%rd167], %rd12; + mov.u32 %r139, %r4; +$L__tmp164: + bra.uni $L__BB0_66; +$L__tmp165: + +$L__BB0_65: + .loc 10 301 12 + add.s32 %r64, %r4, 1; +$L__tmp166: + .loc 10 302 12 + neg.s64 %rd160, %rd12; + sub.s64 %rd161, %rd160, 2; + shl.b64 %rd162, %rd11, 3; + add.s64 %rd163, %rd4, %rd162; + st.u64 [%rd163], %rd161; + mov.u32 %r139, %r64; +$L__tmp167: + bra.uni $L__BB0_66; + +$L__BB0_66: + mov.u32 %r65, %r139; +$L__tmp168: + mov.u32 %r140, %r65; +$L__tmp169: + bra.uni $L__BB0_67; +$L__tmp170: + +$L__BB0_67: + .loc 10 127 10 + mov.u32 %r66, %r140; +$L__tmp171: + bra.uni $L__BB0_68; + +$L__BB0_68: + mov.u32 %r103, %nctaid.x; + cvt.u64.u32 %rd168, %r103; + add.s64 %rd30, %rd11, %rd168; +$L__tmp172: + mov.u32 %r116, %r66; +$L__tmp173: + mov.u64 %rd219, %rd30; +$L__tmp174: + bra.uni $L__BB0_1; +$L__tmp175: + +$L__BB0_69: + .loc 10 310 3 + setp.eq.s32 %p18, %r2, 0; + mov.pred %p17, 0; + not.pred %p19, %p18; + mov.pred %p80, %p17; + @%p19 bra $L__BB0_71; + bra.uni $L__BB0_70; + +$L__BB0_70: + setp.gt.s32 %p13, %r4, 0; + mov.pred %p80, %p13; + bra.uni $L__BB0_71; + +$L__BB0_71: + mov.pred %p14, %p80; + not.pred %p20, %p14; + @%p20 bra $L__BB0_73; + bra.uni $L__BB0_72; + +$L__BB0_72: +$L__tmp176: + .loc 10 312 7 + mov.u32 %r81, %ctaid.x; + ld.u64 %rd73, [%rd34+176]; + st.u32 [%SP+144], %r81; + st.u32 [%SP+148], %r4; + st.u64 [%SP+152], %rd73; + mov.u64 %rd74, $str; + cvta.global.u64 %rd75, %rd74; + add.u64 %rd76, %SP, 144; + { // callseq 3, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.b64 [param0+0], %rd75; + .param .b64 param1; + st.param.b64 [param1+0], %rd76; + .param .b32 retval0; + call.uni (retval0), + vprintf, + ( + param0, + param1 + ); + ld.param.b32 %r82, [retval0+0]; + } // callseq 3 + .loc 10 313 7 + add.s64 %rd77, %rd34, 176; + cvt.s64.s32 %rd78, %r4; + { // callseq 4, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.b64 [param0+0], %rd77; + .param .b64 param1; + st.param.b64 [param1+0], %rd78; + .param .b64 retval0; + call.uni (retval0), + _Z9atomicAddPyy, + ( + param0, + param1 + ); + ld.param.b64 %rd79, [retval0+0]; + } // callseq 4 + .loc 10 314 7 + ld.u64 %rd80, [%rd34+176]; + st.u64 [%SP+144], %rd80; + mov.u64 %rd81, $str$1; + cvta.global.u64 %rd82, %rd81; + { // callseq 5, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.b64 [param0+0], %rd82; + .param .b64 param1; + st.param.b64 [param1+0], %rd76; + .param .b32 retval0; + call.uni (retval0), + vprintf, + ( + param0, + param1 + ); + ld.param.b32 %r83, [retval0+0]; + } // callseq 5 + bra.uni $L__BB0_73; +$L__tmp177: + +$L__BB0_73: + .loc 10 319 1 + ret; +$L__tmp178: +$L__func_end0: + +} + // .weak _Z13GB_reduce_sumIiLi32EET_N18cooperative_groups4__v117thread_block_tileIXT0_EvEES0_ +.weak .func (.param .b32 func_retval0) _Z13GB_reduce_sumIiLi32EET_N18cooperative_groups4__v117thread_block_tileIXT0_EvEES0_( + .param .align 8 .b8 _Z13GB_reduce_sumIiLi32EET_N18cooperative_groups4__v117thread_block_tileIXT0_EvEES0__param_0[16], + .param .b32 _Z13GB_reduce_sumIiLi32EET_N18cooperative_groups4__v117thread_block_tileIXT0_EvEES0__param_1 +) +{ + .local .align 8 .b8 __local_depot1[32]; + .reg .b64 %SP; + .reg .b64 %SPL; + .reg .pred %p<3>; + .reg .b32 %r<24>; + .reg .b64 %rd<9>; + .loc 10 48 0 +$L__func_begin1: + .loc 10 48 0 + + + mov.u64 %SPL, __local_depot1; + cvta.local.u64 %SP, %SPL; + ld.param.u64 %rd2, [_Z13GB_reduce_sumIiLi32EET_N18cooperative_groups4__v117thread_block_tileIXT0_EvEES0__param_0+8]; + ld.param.u32 %r4, [_Z13GB_reduce_sumIiLi32EET_N18cooperative_groups4__v117thread_block_tileIXT0_EvEES0__param_1]; + ld.param.u64 %rd1, [_Z13GB_reduce_sumIiLi32EET_N18cooperative_groups4__v117thread_block_tileIXT0_EvEES0__param_0]; + st.u64 [%SP+16], %rd2; + st.u64 [%SP+8], %rd1; + st.u32 [%SP+24], %r4; +$L__tmp179: + .loc 10 52 16 + mov.u32 %r5, 16; + mov.b32 %r1, %r5; +$L__tmp180: + .loc 10 52 5 + mov.u32 %r23, %r1; +$L__tmp181: + bra.uni $L__BB1_1; + +$L__BB1_1: + mov.u32 %r2, %r23; +$L__tmp182: + setp.gt.s32 %p1, %r2, 0; + not.pred %p2, %p1; + @%p2 bra $L__BB1_4; + bra.uni $L__BB1_2; + +$L__BB1_2: + .loc 10 0 5 + add.u64 %rd3, %SP, 8; + mov.b64 %rd4, %rd3; + st.u64 [%SP+0], %rd4; + add.u64 %rd5, %SP, 24; + mov.b64 %rd6, %rd5; +$L__tmp183: + mov.b32 %r8, %r2; +$L__tmp184: + mov.b64 %rd7, %rd6; +$L__tmp185: + .loc 3 1040 13 + bra.uni $L__tmp186; +$L__tmp186: + .loc 4 232 5 + mov.b64 %rd8, %rd7; +$L__tmp187: + .loc 3 1040 13 + ld.u32 %r9, [%rd8]; + .loc 3 1040 55 + bra.uni $L__tmp188; +$L__tmp188: + .loc 3 1012 27 + mov.u32 %r10, -1; + mov.b32 %r11, %r10; +$L__tmp189: + .loc 3 1019 9 + mov.b32 %r12, %r11; + mov.b32 %r13, %r9; +$L__tmp190: + .loc 3 0 9 + mov.b32 %r14, %r12; +$L__tmp191: + mov.b32 %r15, %r8; +$L__tmp192: + mov.u32 %r16, 32; + mov.b32 %r17, %r16; +$L__tmp193: + .loc 3 1039 16 + bra.uni $L__tmp194; +$L__tmp194: + .loc 5 327 44 + { // callseq 8, 0 + .reg .b32 temp_param_reg; + .param .b32 param0; + st.param.b32 [param0+0], %r14; + .param .b32 param1; + st.param.b32 [param1+0], %r13; + .param .b32 param2; + st.param.b32 [param2+0], %r15; + .param .b32 param3; + st.param.b32 [param3+0], %r17; + .param .b32 retval0; + call.uni (retval0), + _Z16__shfl_down_syncjiji, + ( + param0, + param1, + param2, + param3 + ); + ld.param.b32 %r18, [retval0+0]; + } // callseq 8 +$L__tmp195: + .loc 3 1039 16 + mov.b32 %r19, %r18; +$L__tmp196: + .loc 10 54 18 + mov.b32 %r20, %r19; +$L__tmp197: + .loc 10 55 9 + ld.u32 %r21, [%SP+24]; + add.s32 %r22, %r21, %r20; + st.u32 [%SP+24], %r22; +$L__tmp198: + .loc 10 52 39 + bra.uni $L__BB1_3; + +$L__BB1_3: + div.s32 %r3, %r2, 2; +$L__tmp199: + mov.u32 %r23, %r3; +$L__tmp200: + bra.uni $L__BB1_1; +$L__tmp201: + +$L__BB1_4: + .loc 10 57 5 + ld.u32 %r6, [%SP+24]; + mov.b32 %r7, %r6; + st.param.b32 [func_retval0+0], %r7; + ret; +$L__tmp202: +$L__func_end1: + +} + // .weak _ZN4dim3C1E5uint3 +.weak .func _ZN4dim3C1E5uint3( + .param .b64 _ZN4dim3C1E5uint3_param_0, + .param .align 4 .b8 _ZN4dim3C1E5uint3_param_1[12] +) +{ + .local .align 4 .b8 __local_depot2[12]; + .reg .b64 %SP; + .reg .b64 %SPL; + .reg .b32 %r<7>; + .reg .b64 %rd<2>; + .loc 7 979 0 +$L__func_begin2: + .loc 7 979 0 + + + mov.u64 %SPL, __local_depot2; + cvta.local.u64 %SP, %SPL; + ld.param.u64 %rd1, [_ZN4dim3C1E5uint3_param_0]; + ld.param.u32 %r2, [_ZN4dim3C1E5uint3_param_1+4]; + ld.param.u32 %r3, [_ZN4dim3C1E5uint3_param_1+8]; + ld.param.u32 %r1, [_ZN4dim3C1E5uint3_param_1]; + st.u32 [%SP+8], %r3; + st.u32 [%SP+4], %r2; + st.u32 [%SP+0], %r1; +$L__tmp203: + .loc 7 979 47 + ld.u32 %r4, [%SP+0]; + st.u32 [%rd1], %r4; + .loc 7 979 55 + ld.u32 %r5, [%SP+4]; + st.u32 [%rd1+4], %r5; + .loc 7 979 63 + ld.u32 %r6, [%SP+8]; + st.u32 [%rd1+8], %r6; + .loc 7 979 71 + ret; +$L__tmp204: +$L__func_end2: + +} + // .weak _ZN18cooperative_groups4__v112thread_groupC2Ej +.weak .func _ZN18cooperative_groups4__v112thread_groupC2Ej( + .param .b64 _ZN18cooperative_groups4__v112thread_groupC2Ej_param_0, + .param .b32 _ZN18cooperative_groups4__v112thread_groupC2Ej_param_1 +) +{ + .local .align 8 .b8 __local_depot3[8]; + .reg .b64 %SP; + .reg .b64 %SPL; + .reg .b32 %r<10>; + .reg .b64 %rd<5>; + + + mov.u64 %SPL, __local_depot3; + cvta.local.u64 %SP, %SPL; + ld.param.u64 %rd1, [_ZN18cooperative_groups4__v112thread_groupC2Ej_param_0]; + ld.param.u32 %r1, [_ZN18cooperative_groups4__v112thread_groupC2Ej_param_1]; + mov.b64 %rd2, %rd1; + st.u64 [%SP+0], %rd2; + mov.b32 %r2, %r1; + ld.u64 %rd3, [%SP+0]; + and.b32 %r3, %r2, 127; + ld.u32 %r4, [%rd3]; + and.b32 %r5, %r4, -255; + shl.b32 %r6, %r3, 1; + or.b32 %r7, %r5, %r6; + st.u32 [%rd3], %r7; + ld.u64 %rd4, [%SP+0]; + ld.u32 %r8, [%rd4]; + and.b32 %r9, %r8, -2; + st.u32 [%rd4], %r9; + ret; + +} + // .weak _ZN18cooperative_groups4__v117thread_group_baseILj4EEC2Ev +.weak .func _ZN18cooperative_groups4__v117thread_group_baseILj4EEC2Ev( + .param .b64 _ZN18cooperative_groups4__v117thread_group_baseILj4EEC2Ev_param_0 +) +{ + .local .align 8 .b8 __local_depot4[8]; + .reg .b64 %SP; + .reg .b64 %SPL; + .reg .b32 %r<2>; + .reg .b64 %rd<4>; + + + mov.u64 %SPL, __local_depot4; + cvta.local.u64 %SP, %SPL; + ld.param.u64 %rd1, [_ZN18cooperative_groups4__v117thread_group_baseILj4EEC2Ev_param_0]; + mov.b64 %rd2, %rd1; + st.u64 [%SP+0], %rd2; + ld.u64 %rd3, [%SP+0]; + mov.u32 %r1, 4; + { // callseq 9, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.b64 [param0+0], %rd3; + .param .b32 param1; + st.param.b32 [param1+0], %r1; + call.uni + _ZN18cooperative_groups4__v112thread_groupC2Ej, + ( + param0, + param1 + ); + } // callseq 9 + ret; + +} + // .weak _ZN18cooperative_groups4__v117thread_group_baseILj1EEC2Ev +.weak .func _ZN18cooperative_groups4__v117thread_group_baseILj1EEC2Ev( + .param .b64 _ZN18cooperative_groups4__v117thread_group_baseILj1EEC2Ev_param_0 +) +{ + .local .align 8 .b8 __local_depot5[8]; + .reg .b64 %SP; + .reg .b64 %SPL; + .reg .b32 %r<2>; + .reg .b64 %rd<4>; + + + mov.u64 %SPL, __local_depot5; + cvta.local.u64 %SP, %SPL; + ld.param.u64 %rd1, [_ZN18cooperative_groups4__v117thread_group_baseILj1EEC2Ev_param_0]; + mov.b64 %rd2, %rd1; + st.u64 [%SP+0], %rd2; + ld.u64 %rd3, [%SP+0]; + mov.u32 %r1, 1; + { // callseq 10, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.b64 [param0+0], %rd3; + .param .b32 param1; + st.param.b32 [param1+0], %r1; + call.uni + _ZN18cooperative_groups4__v112thread_groupC2Ej, + ( + param0, + param1 + ); + } // callseq 10 + ret; + +} + // .weak _ZN18cooperative_groups4__v131__single_warp_thread_block_tileILj32EvEC2Ejj +.weak .func _ZN18cooperative_groups4__v131__single_warp_thread_block_tileILj32EvEC2Ejj( + .param .b64 _ZN18cooperative_groups4__v131__single_warp_thread_block_tileILj32EvEC2Ejj_param_0, + .param .b32 _ZN18cooperative_groups4__v131__single_warp_thread_block_tileILj32EvEC2Ejj_param_1, + .param .b32 _ZN18cooperative_groups4__v131__single_warp_thread_block_tileILj32EvEC2Ejj_param_2 +) +{ + .local .align 8 .b8 __local_depot6[8]; + .reg .b64 %SP; + .reg .b64 %SPL; + .reg .b32 %r<23>; + .reg .b64 %rd<9>; + + + mov.u64 %SPL, __local_depot6; + cvta.local.u64 %SP, %SPL; + ld.param.u64 %rd1, [_ZN18cooperative_groups4__v131__single_warp_thread_block_tileILj32EvEC2Ejj_param_0]; + ld.param.u32 %r1, [_ZN18cooperative_groups4__v131__single_warp_thread_block_tileILj32EvEC2Ejj_param_1]; + ld.param.u32 %r2, [_ZN18cooperative_groups4__v131__single_warp_thread_block_tileILj32EvEC2Ejj_param_2]; + mov.b64 %rd2, %rd1; + st.u64 [%SP+0], %rd2; + mov.b32 %r3, %r1; + mov.b32 %r4, %r2; + ld.u64 %rd3, [%SP+0]; + { // callseq 11, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.b64 [param0+0], %rd3; + call.uni + _ZN18cooperative_groups4__v117thread_group_baseILj1EEC2Ev, + ( + param0 + ); + } // callseq 11 + mov.u32 %r5, -1; + mov.b32 %r6, %r5; + mov.b32 %r7, %r6; + ld.u64 %rd4, [%SP+0]; + st.u32 [%rd4+8], %r7; + ld.u64 %rd5, [%SP+0]; + ld.u32 %r8, [%rd5]; + and.b32 %r9, %r8, 255; + or.b32 %r10, %r9, 8192; + st.u32 [%rd5], %r10; + ld.u64 %rd6, [%SP+0]; + and.b32 %r11, %r3, 65535; + ld.u32 %r12, [%rd6+4]; + and.b32 %r13, %r12, 65535; + shl.b32 %r14, %r11, 16; + or.b32 %r15, %r13, %r14; + st.u32 [%rd6+4], %r15; + ld.u64 %rd7, [%SP+0]; + and.b32 %r16, %r4, 65535; + ld.u32 %r17, [%rd7+4]; + and.b32 %r18, %r17, -65536; + or.b32 %r19, %r18, %r16; + st.u32 [%rd7+4], %r19; + ld.u64 %rd8, [%SP+0]; + ld.u32 %r20, [%rd8]; + and.b32 %r21, %r20, -2; + or.b32 %r22, %r21, 1; + st.u32 [%rd8], %r22; + ret; + +} + // .weak _ZN18cooperative_groups4__v17details22thread_block_tile_implILj32EvLb0EEC2ILj32ENS0_12thread_blockELb0EEERKNS2_IXT_ET0_XT1_EEE +.weak .func _ZN18cooperative_groups4__v17details22thread_block_tile_implILj32EvLb0EEC2ILj32ENS0_12thread_blockELb0EEERKNS2_IXT_ET0_XT1_EEE( + .param .b64 _ZN18cooperative_groups4__v17details22thread_block_tile_implILj32EvLb0EEC2ILj32ENS0_12thread_blockELb0EEERKNS2_IXT_ET0_XT1_EEE_param_0, + .param .b64 _ZN18cooperative_groups4__v17details22thread_block_tile_implILj32EvLb0EEC2ILj32ENS0_12thread_blockELb0EEERKNS2_IXT_ET0_XT1_EEE_param_1 +) +{ + .local .align 8 .b8 __local_depot7[80]; + .reg .b64 %SP; + .reg .b64 %SPL; + .reg .b32 %r<40>; + .reg .b64 %rd<7>; + + + mov.u64 %SPL, __local_depot7; + cvta.local.u64 %SP, %SPL; + ld.param.u64 %rd1, [_ZN18cooperative_groups4__v17details22thread_block_tile_implILj32EvLb0EEC2ILj32ENS0_12thread_blockELb0EEERKNS2_IXT_ET0_XT1_EEE_param_0]; + ld.param.u64 %rd2, [_ZN18cooperative_groups4__v17details22thread_block_tile_implILj32EvLb0EEC2ILj32ENS0_12thread_blockELb0EEERKNS2_IXT_ET0_XT1_EEE_param_1]; + mov.b64 %rd3, %rd1; + st.u64 [%SP+64], %rd3; + mov.b64 %rd4, %rd2; + st.u64 [%SP+72], %rd4; + ld.u64 %rd5, [%SP+64]; + mov.u32 %r1, %tid.x; + st.u32 [%SP+36], %r1; + mov.u32 %r2, %tid.y; + st.u32 [%SP+40], %r2; + mov.u32 %r3, %tid.z; + st.u32 [%SP+44], %r3; + ld.u32 %r4, [%SP+44]; + ld.u32 %r5, [%SP+40]; + ld.u32 %r6, [%SP+36]; + add.u64 %rd6, %SP, 24; + { // callseq 12, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.b64 [param0+0], %rd6; + .param .align 4 .b8 param1[12]; + st.param.b32 [param1+0], %r6; + st.param.b32 [param1+4], %r5; + st.param.b32 [param1+8], %r4; + call.uni + _ZN4dim3C1E5uint3, + ( + param0, + param1 + ); + } // callseq 12 + ld.u32 %r7, [%SP+24]; + ld.u32 %r8, [%SP+28]; + ld.u32 %r9, [%SP+32]; + mov.u32 %r10, %ntid.x; + st.u32 [%SP+48], %r10; + mov.u32 %r11, %ntid.y; + st.u32 [%SP+52], %r11; + mov.u32 %r12, %ntid.z; + st.u32 [%SP+56], %r12; + ld.u32 %r13, [%SP+48]; + ld.u32 %r14, [%SP+52]; + ld.u32 %r15, [%SP+56]; + st.u32 [%SP+8], %r9; + st.u32 [%SP+4], %r8; + st.u32 [%SP+0], %r7; + st.u32 [%SP+20], %r15; + st.u32 [%SP+16], %r14; + st.u32 [%SP+12], %r13; + ld.u32 %r16, [%SP+8]; + ld.u32 %r17, [%SP+16]; + mul.lo.s32 %r18, %r16, %r17; + ld.u32 %r19, [%SP+12]; + mul.lo.s32 %r20, %r18, %r19; + ld.u32 %r21, [%SP+4]; + ld.u32 %r22, [%SP+12]; + mul.lo.s32 %r23, %r21, %r22; + add.s32 %r24, %r20, %r23; + ld.u32 %r25, [%SP+0]; + add.s32 %r26, %r24, %r25; + mov.b32 %r27, %r26; + mov.b32 %r28, %r27; + div.u32 %r29, %r28, 32; + mov.u32 %r30, %ntid.x; + mov.u32 %r31, %ntid.y; + mul.lo.s32 %r32, %r30, %r31; + mov.u32 %r33, %ntid.z; + mul.lo.s32 %r34, %r32, %r33; + mov.b32 %r35, %r34; + mov.b32 %r36, %r35; + add.s32 %r37, %r36, 32; + sub.s32 %r38, %r37, 1; + div.u32 %r39, %r38, 32; + { // callseq 13, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.b64 [param0+0], %rd5; + .param .b32 param1; + st.param.b32 [param1+0], %r29; + .param .b32 param2; + st.param.b32 [param2+0], %r39; + call.uni + _ZN18cooperative_groups4__v131__single_warp_thread_block_tileILj32EvEC2Ejj, + ( + param0, + param1, + param2 + ); + } // callseq 13 + ret; + +} + // .weak _ZN18cooperative_groups4__v131__single_warp_thread_block_tileILj32ENS0_12thread_blockEEC2Ev +.weak .func _ZN18cooperative_groups4__v131__single_warp_thread_block_tileILj32ENS0_12thread_blockEEC2Ev( + .param .b64 _ZN18cooperative_groups4__v131__single_warp_thread_block_tileILj32ENS0_12thread_blockEEC2Ev_param_0 +) +{ + .local .align 8 .b8 __local_depot8[8]; + .reg .b64 %SP; + .reg .b64 %SPL; + .reg .b64 %rd<3>; + + + mov.u64 %SPL, __local_depot8; + cvta.local.u64 %SP, %SPL; + ld.param.u64 %rd1, [_ZN18cooperative_groups4__v131__single_warp_thread_block_tileILj32ENS0_12thread_blockEEC2Ev_param_0]; + mov.b64 %rd2, %rd1; + st.u64 [%SP+0], %rd2; + ret; + +} + // .weak _ZN18cooperative_groups4__v17details22thread_block_tile_implILj32ENS0_12thread_blockELb0EEC2ERKS3_ +.weak .func _ZN18cooperative_groups4__v17details22thread_block_tile_implILj32ENS0_12thread_blockELb0EEC2ERKS3_( + .param .b64 _ZN18cooperative_groups4__v17details22thread_block_tile_implILj32ENS0_12thread_blockELb0EEC2ERKS3__param_0, + .param .b64 _ZN18cooperative_groups4__v17details22thread_block_tile_implILj32ENS0_12thread_blockELb0EEC2ERKS3__param_1 +) +{ + .local .align 8 .b8 __local_depot9[16]; + .reg .b64 %SP; + .reg .b64 %SPL; + .reg .b64 %rd<6>; + + + mov.u64 %SPL, __local_depot9; + cvta.local.u64 %SP, %SPL; + ld.param.u64 %rd1, [_ZN18cooperative_groups4__v17details22thread_block_tile_implILj32ENS0_12thread_blockELb0EEC2ERKS3__param_0]; + ld.param.u64 %rd2, [_ZN18cooperative_groups4__v17details22thread_block_tile_implILj32ENS0_12thread_blockELb0EEC2ERKS3__param_1]; + mov.b64 %rd3, %rd1; + st.u64 [%SP+0], %rd3; + mov.b64 %rd4, %rd2; + st.u64 [%SP+8], %rd4; + ld.u64 %rd5, [%SP+0]; + { // callseq 14, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.b64 [param0+0], %rd5; + call.uni + _ZN18cooperative_groups4__v131__single_warp_thread_block_tileILj32ENS0_12thread_blockEEC2Ev, + ( + param0 + ); + } // callseq 14 + ret; + +} + // .weak _ZN18cooperative_groups4__v117thread_block_tileILj32ENS0_12thread_blockEEC2ERKS2_ +.weak .func _ZN18cooperative_groups4__v117thread_block_tileILj32ENS0_12thread_blockEEC2ERKS2_( + .param .b64 _ZN18cooperative_groups4__v117thread_block_tileILj32ENS0_12thread_blockEEC2ERKS2__param_0, + .param .b64 _ZN18cooperative_groups4__v117thread_block_tileILj32ENS0_12thread_blockEEC2ERKS2__param_1 +) +{ + .local .align 8 .b8 __local_depot10[16]; + .reg .b64 %SP; + .reg .b64 %SPL; + .reg .b64 %rd<7>; + + + mov.u64 %SPL, __local_depot10; + cvta.local.u64 %SP, %SPL; + ld.param.u64 %rd1, [_ZN18cooperative_groups4__v117thread_block_tileILj32ENS0_12thread_blockEEC2ERKS2__param_0]; + ld.param.u64 %rd2, [_ZN18cooperative_groups4__v117thread_block_tileILj32ENS0_12thread_blockEEC2ERKS2__param_1]; + mov.b64 %rd3, %rd1; + st.u64 [%SP+0], %rd3; + mov.b64 %rd4, %rd2; + st.u64 [%SP+8], %rd4; + ld.u64 %rd5, [%SP+0]; + ld.u64 %rd6, [%SP+8]; + { // callseq 15, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.b64 [param0+0], %rd5; + .param .b64 param1; + st.param.b64 [param1+0], %rd6; + call.uni + _ZN18cooperative_groups4__v17details22thread_block_tile_implILj32ENS0_12thread_blockELb0EEC2ERKS3_, + ( + param0, + param1 + ); + } // callseq 15 + ret; + +} +.func (.param .b64 func_retval0) _Z9atomicAddPyy( + .param .b64 _Z9atomicAddPyy_param_0, + .param .b64 _Z9atomicAddPyy_param_1 +) +{ + .reg .b64 %rd<4>; + + + ld.param.u64 %rd1, [_Z9atomicAddPyy_param_0]; + ld.param.u64 %rd2, [_Z9atomicAddPyy_param_1]; + { // callseq 16, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.b64 [param0+0], %rd1; + .param .b64 param1; + st.param.b64 [param1+0], %rd2; + .param .b64 retval0; + call.uni (retval0), + __ullAtomicAdd, + ( + param0, + param1 + ); + ld.param.b64 %rd3, [retval0+0]; + } // callseq 16 + st.param.b64 [func_retval0+0], %rd3; + ret; + +} +.func (.param .b32 func_retval0) _Z13__ballot_syncji( + .param .b32 _Z13__ballot_syncji_param_0, + .param .b32 _Z13__ballot_syncji_param_1 +) +{ + .reg .pred %p<3>; + .reg .b32 %r<4>; + + + ld.param.u32 %r1, [_Z13__ballot_syncji_param_0]; + ld.param.u32 %r2, [_Z13__ballot_syncji_param_1]; + setp.ne.s32 %p1, %r2, 0; + vote.sync.ballot.b32 %r3, %p1, %r1; + st.param.b32 [func_retval0+0], %r3; + ret; + +} +.func (.param .b32 func_retval0) _Z16__shfl_down_syncjiji( + .param .b32 _Z16__shfl_down_syncjiji_param_0, + .param .b32 _Z16__shfl_down_syncjiji_param_1, + .param .b32 _Z16__shfl_down_syncjiji_param_2, + .param .b32 _Z16__shfl_down_syncjiji_param_3 +) +{ + .reg .pred %p<2>; + .reg .b32 %r<10>; + + + ld.param.u32 %r1, [_Z16__shfl_down_syncjiji_param_0]; + ld.param.u32 %r2, [_Z16__shfl_down_syncjiji_param_1]; + ld.param.u32 %r3, [_Z16__shfl_down_syncjiji_param_2]; + ld.param.u32 %r4, [_Z16__shfl_down_syncjiji_param_3]; + mov.u32 %r5, 32; + sub.s32 %r6, %r5, %r4; + shl.b32 %r7, %r6, 8; + or.b32 %r8, %r7, 31; + shfl.sync.down.b32 %r9|%p1, %r2, %r3, %r8, %r1; + st.param.b32 [func_retval0+0], %r9; + ret; + +} +.func (.param .b64 func_retval0) __ullAtomicAdd( + .param .b64 __ullAtomicAdd_param_0, + .param .b64 __ullAtomicAdd_param_1 +) +{ + .reg .b64 %rd<4>; + + + ld.param.u64 %rd1, [__ullAtomicAdd_param_0]; + ld.param.u64 %rd2, [__ullAtomicAdd_param_1]; + atom.add.u64 %rd3, [%rd1], %rd2; + st.param.b64 [func_retval0+0], %rd3; + ret; + +} + .file 1 "/share/workspace/nvidia_projects/GraphBLAS/CUDA/test/GB_opaque.h" + .file 2 "/share/workspace/nvidia_projects/GraphBLAS/CUDA/test/matrix.h" + .file 3 "/share/workspace/nvidia_projects/GraphBLAS/CUDA/test/cooperative_groups.h" + .file 4 "/share/workspace/nvidia_projects/GraphBLAS/CUDA/test/cooperative_groups/details/info.h" + .file 5 "/share/workspace/nvidia_projects/GraphBLAS/CUDA/test/cooperative_groups/details/helpers.h" + .file 6 "/share/workspace/nvidia_projects/GraphBLAS/CUDA/test/cstdint" + .file 7 "/share/workspace/nvidia_projects/GraphBLAS/CUDA/test/__nv_nvrtc_builtin_header.h" + .file 8 "/share/workspace/nvidia_projects/GraphBLAS/CUDA/test/GB_matrix.h" + .file 9 "/share/workspace/nvidia_projects/GraphBLAS/CUDA/test/cooperative_groups/details/driver_abi.h" + .file 10 "/share/workspace/nvidia_projects/GraphBLAS/CUDA/test/GB_jit_AxB_dot3_phase3_mp.cuh" + .section .debug_loc + { +.b64 $L__tmp10 +.b64 $L__tmp29 +.b8 5 +.b8 0 +.b8 144 +.b8 177 +.b8 228 +.b8 149 +.b8 1 +.b64 $L__tmp29 +.b64 $L__tmp31 +.b8 7 +.b8 0 +.b8 144 +.b8 182 +.b8 226 +.b8 196 +.b8 145 +.b8 215 +.b8 4 +.b64 $L__tmp31 +.b64 $L__tmp161 +.b8 5 +.b8 0 +.b8 144 +.b8 180 +.b8 228 +.b8 149 +.b8 1 +.b64 $L__tmp161 +.b64 $L__tmp164 +.b8 7 +.b8 0 +.b8 144 +.b8 176 +.b8 232 +.b8 196 +.b8 145 +.b8 215 +.b8 4 +.b64 $L__tmp164 +.b64 $L__tmp166 +.b8 7 +.b8 0 +.b8 144 +.b8 185 +.b8 230 +.b8 196 +.b8 145 +.b8 215 +.b8 4 +.b64 $L__tmp166 +.b64 $L__tmp167 +.b8 6 +.b8 0 +.b8 144 +.b8 180 +.b8 236 +.b8 200 +.b8 171 +.b8 2 +.b64 $L__tmp167 +.b64 $L__tmp168 +.b8 7 +.b8 0 +.b8 144 +.b8 185 +.b8 230 +.b8 196 +.b8 145 +.b8 215 +.b8 4 +.b64 $L__tmp168 +.b64 $L__tmp169 +.b8 6 +.b8 0 +.b8 144 +.b8 181 +.b8 236 +.b8 200 +.b8 171 +.b8 2 +.b64 $L__tmp169 +.b64 $L__tmp171 +.b8 7 +.b8 0 +.b8 144 +.b8 176 +.b8 232 +.b8 196 +.b8 145 +.b8 215 +.b8 4 +.b64 $L__tmp171 +.b64 $L__tmp173 +.b8 6 +.b8 0 +.b8 144 +.b8 182 +.b8 236 +.b8 200 +.b8 171 +.b8 2 +.b64 $L__tmp173 +.b64 $L__func_end0 +.b8 7 +.b8 0 +.b8 144 +.b8 182 +.b8 226 +.b8 196 +.b8 145 +.b8 215 +.b8 4 +.b64 0 +.b64 0 +.b64 $L__tmp14 +.b64 $L__tmp37 +.b8 7 +.b8 0 +.b8 144 +.b8 182 +.b8 232 +.b8 144 +.b8 147 +.b8 215 +.b8 4 +.b64 $L__tmp37 +.b64 $L__tmp49 +.b8 7 +.b8 0 +.b8 144 +.b8 181 +.b8 226 +.b8 144 +.b8 147 +.b8 215 +.b8 4 +.b64 $L__tmp49 +.b64 $L__tmp79 +.b8 8 +.b8 0 +.b8 144 +.b8 178 +.b8 228 +.b8 200 +.b8 161 +.b8 166 +.b8 174 +.b8 9 +.b64 $L__tmp79 +.b64 $L__func_end0 +.b8 8 +.b8 0 +.b8 144 +.b8 179 +.b8 228 +.b8 200 +.b8 161 +.b8 166 +.b8 174 +.b8 9 +.b64 0 +.b64 0 +.b64 $L__tmp15 +.b64 $L__tmp40 +.b8 7 +.b8 0 +.b8 144 +.b8 183 +.b8 232 +.b8 144 +.b8 147 +.b8 215 +.b8 4 +.b64 $L__tmp40 +.b64 $L__func_end0 +.b8 7 +.b8 0 +.b8 144 +.b8 184 +.b8 226 +.b8 144 +.b8 147 +.b8 215 +.b8 4 +.b64 0 +.b64 0 +.b64 $L__tmp16 +.b64 $L__tmp41 +.b8 7 +.b8 0 +.b8 144 +.b8 184 +.b8 232 +.b8 144 +.b8 147 +.b8 215 +.b8 4 +.b64 $L__tmp41 +.b64 $L__func_end0 +.b8 8 +.b8 0 +.b8 144 +.b8 182 +.b8 224 +.b8 196 +.b8 161 +.b8 166 +.b8 174 +.b8 9 +.b64 0 +.b64 0 +.b64 $L__tmp28 +.b64 $L__tmp30 +.b8 7 +.b8 0 +.b8 144 +.b8 176 +.b8 226 +.b8 144 +.b8 147 +.b8 215 +.b8 4 +.b64 $L__tmp30 +.b64 $L__tmp31 +.b8 8 +.b8 0 +.b8 144 +.b8 185 +.b8 226 +.b8 200 +.b8 161 +.b8 166 +.b8 174 +.b8 9 +.b64 $L__tmp31 +.b64 $L__tmp172 +.b8 7 +.b8 0 +.b8 144 +.b8 177 +.b8 226 +.b8 144 +.b8 147 +.b8 215 +.b8 4 +.b64 $L__tmp172 +.b64 $L__tmp174 +.b8 7 +.b8 0 +.b8 144 +.b8 176 +.b8 230 +.b8 144 +.b8 147 +.b8 215 +.b8 4 +.b64 $L__tmp174 +.b64 $L__func_end0 +.b8 8 +.b8 0 +.b8 144 +.b8 185 +.b8 226 +.b8 200 +.b8 161 +.b8 166 +.b8 174 +.b8 9 +.b64 0 +.b64 0 +.b64 $L__tmp42 +.b64 $L__tmp44 +.b8 7 +.b8 0 +.b8 144 +.b8 177 +.b8 228 +.b8 144 +.b8 147 +.b8 215 +.b8 4 +.b64 $L__tmp44 +.b64 $L__tmp46 +.b8 8 +.b8 0 +.b8 144 +.b8 176 +.b8 228 +.b8 200 +.b8 161 +.b8 166 +.b8 174 +.b8 9 +.b64 $L__tmp46 +.b64 $L__func_end0 +.b8 8 +.b8 0 +.b8 144 +.b8 177 +.b8 228 +.b8 200 +.b8 161 +.b8 166 +.b8 174 +.b8 9 +.b64 0 +.b64 0 +.b64 $L__tmp45 +.b64 $L__tmp70 +.b8 5 +.b8 0 +.b8 144 +.b8 182 +.b8 228 +.b8 149 +.b8 1 +.b64 $L__tmp70 +.b64 $L__tmp72 +.b8 7 +.b8 0 +.b8 144 +.b8 178 +.b8 228 +.b8 196 +.b8 145 +.b8 215 +.b8 4 +.b64 $L__tmp72 +.b64 $L__tmp73 +.b8 6 +.b8 0 +.b8 144 +.b8 176 +.b8 228 +.b8 200 +.b8 171 +.b8 2 +.b64 $L__tmp73 +.b64 $L__tmp75 +.b8 7 +.b8 0 +.b8 144 +.b8 178 +.b8 228 +.b8 196 +.b8 145 +.b8 215 +.b8 4 +.b64 $L__tmp75 +.b64 $L__func_end0 +.b8 6 +.b8 0 +.b8 144 +.b8 177 +.b8 228 +.b8 200 +.b8 171 +.b8 2 +.b64 0 +.b64 0 +.b64 $L__tmp48 +.b64 $L__tmp51 +.b8 5 +.b8 0 +.b8 144 +.b8 185 +.b8 228 +.b8 149 +.b8 1 +.b64 $L__tmp51 +.b64 $L__tmp53 +.b8 7 +.b8 0 +.b8 144 +.b8 184 +.b8 226 +.b8 196 +.b8 145 +.b8 215 +.b8 4 +.b64 $L__tmp53 +.b64 $L__tmp57 +.b8 6 +.b8 0 +.b8 144 +.b8 177 +.b8 226 +.b8 200 +.b8 171 +.b8 2 +.b64 $L__tmp57 +.b64 $L__tmp58 +.b8 6 +.b8 0 +.b8 144 +.b8 180 +.b8 226 +.b8 200 +.b8 171 +.b8 2 +.b64 $L__tmp58 +.b64 $L__tmp64 +.b8 7 +.b8 0 +.b8 144 +.b8 176 +.b8 228 +.b8 196 +.b8 145 +.b8 215 +.b8 4 +.b64 $L__tmp64 +.b64 $L__tmp65 +.b8 6 +.b8 0 +.b8 144 +.b8 182 +.b8 226 +.b8 200 +.b8 171 +.b8 2 +.b64 $L__tmp65 +.b64 $L__tmp78 +.b8 7 +.b8 0 +.b8 144 +.b8 184 +.b8 226 +.b8 196 +.b8 145 +.b8 215 +.b8 4 +.b64 $L__tmp78 +.b64 $L__tmp81 +.b8 6 +.b8 0 +.b8 144 +.b8 181 +.b8 228 +.b8 200 +.b8 171 +.b8 2 +.b64 $L__tmp81 +.b64 $L__tmp83 +.b8 7 +.b8 0 +.b8 144 +.b8 180 +.b8 228 +.b8 196 +.b8 145 +.b8 215 +.b8 4 +.b64 $L__tmp83 +.b64 $L__tmp87 +.b8 6 +.b8 0 +.b8 144 +.b8 183 +.b8 228 +.b8 200 +.b8 171 +.b8 2 +.b64 $L__tmp87 +.b64 $L__tmp88 +.b8 6 +.b8 0 +.b8 144 +.b8 176 +.b8 230 +.b8 200 +.b8 171 +.b8 2 +.b64 $L__tmp88 +.b64 $L__tmp94 +.b8 7 +.b8 0 +.b8 144 +.b8 182 +.b8 228 +.b8 196 +.b8 145 +.b8 215 +.b8 4 +.b64 $L__tmp94 +.b64 $L__tmp95 +.b8 6 +.b8 0 +.b8 144 +.b8 178 +.b8 230 +.b8 200 +.b8 171 +.b8 2 +.b64 $L__tmp95 +.b64 $L__func_end0 +.b8 7 +.b8 0 +.b8 144 +.b8 180 +.b8 228 +.b8 196 +.b8 145 +.b8 215 +.b8 4 +.b64 0 +.b64 0 +.b64 $L__tmp50 +.b64 $L__tmp52 +.b8 6 +.b8 0 +.b8 144 +.b8 176 +.b8 226 +.b8 200 +.b8 171 +.b8 2 +.b64 $L__tmp52 +.b64 $L__tmp53 +.b8 7 +.b8 0 +.b8 144 +.b8 185 +.b8 226 +.b8 196 +.b8 145 +.b8 215 +.b8 4 +.b64 $L__tmp53 +.b64 $L__tmp59 +.b8 6 +.b8 0 +.b8 144 +.b8 178 +.b8 226 +.b8 200 +.b8 171 +.b8 2 +.b64 $L__tmp59 +.b64 $L__tmp61 +.b8 7 +.b8 0 +.b8 144 +.b8 177 +.b8 228 +.b8 196 +.b8 145 +.b8 215 +.b8 4 +.b64 $L__tmp61 +.b64 $L__tmp63 +.b8 6 +.b8 0 +.b8 144 +.b8 181 +.b8 226 +.b8 200 +.b8 171 +.b8 2 +.b64 $L__tmp63 +.b64 $L__tmp64 +.b8 7 +.b8 0 +.b8 144 +.b8 177 +.b8 228 +.b8 196 +.b8 145 +.b8 215 +.b8 4 +.b64 $L__tmp64 +.b64 $L__tmp66 +.b8 6 +.b8 0 +.b8 144 +.b8 183 +.b8 226 +.b8 200 +.b8 171 +.b8 2 +.b64 $L__tmp66 +.b64 $L__tmp80 +.b8 7 +.b8 0 +.b8 144 +.b8 185 +.b8 226 +.b8 196 +.b8 145 +.b8 215 +.b8 4 +.b64 $L__tmp80 +.b64 $L__tmp82 +.b8 6 +.b8 0 +.b8 144 +.b8 182 +.b8 228 +.b8 200 +.b8 171 +.b8 2 +.b64 $L__tmp82 +.b64 $L__tmp83 +.b8 7 +.b8 0 +.b8 144 +.b8 181 +.b8 228 +.b8 196 +.b8 145 +.b8 215 +.b8 4 +.b64 $L__tmp83 +.b64 $L__tmp89 +.b8 6 +.b8 0 +.b8 144 +.b8 184 +.b8 228 +.b8 200 +.b8 171 +.b8 2 +.b64 $L__tmp89 +.b64 $L__tmp91 +.b8 7 +.b8 0 +.b8 144 +.b8 183 +.b8 228 +.b8 196 +.b8 145 +.b8 215 +.b8 4 +.b64 $L__tmp91 +.b64 $L__tmp93 +.b8 6 +.b8 0 +.b8 144 +.b8 177 +.b8 230 +.b8 200 +.b8 171 +.b8 2 +.b64 $L__tmp93 +.b64 $L__tmp94 +.b8 7 +.b8 0 +.b8 144 +.b8 183 +.b8 228 +.b8 196 +.b8 145 +.b8 215 +.b8 4 +.b64 $L__tmp94 +.b64 $L__tmp96 +.b8 6 +.b8 0 +.b8 144 +.b8 179 +.b8 230 +.b8 200 +.b8 171 +.b8 2 +.b64 $L__tmp96 +.b64 $L__func_end0 +.b8 7 +.b8 0 +.b8 144 +.b8 181 +.b8 228 +.b8 196 +.b8 145 +.b8 215 +.b8 4 +.b64 0 +.b64 0 +.b64 $L__tmp68 +.b64 $L__tmp98 +.b8 6 +.b8 0 +.b8 144 +.b8 184 +.b8 226 +.b8 200 +.b8 171 +.b8 2 +.b64 $L__tmp98 +.b64 $L__func_end0 +.b8 6 +.b8 0 +.b8 144 +.b8 180 +.b8 230 +.b8 200 +.b8 171 +.b8 2 +.b64 0 +.b64 0 +.b64 $L__tmp69 +.b64 $L__tmp99 +.b8 6 +.b8 0 +.b8 144 +.b8 185 +.b8 226 +.b8 200 +.b8 171 +.b8 2 +.b64 $L__tmp99 +.b64 $L__func_end0 +.b8 6 +.b8 0 +.b8 144 +.b8 181 +.b8 230 +.b8 200 +.b8 171 +.b8 2 +.b64 0 +.b64 0 +.b64 $L__tmp104 +.b64 $L__tmp108 +.b8 6 +.b8 0 +.b8 144 +.b8 184 +.b8 230 +.b8 200 +.b8 171 +.b8 2 +.b64 $L__tmp108 +.b64 $L__tmp112 +.b8 7 +.b8 0 +.b8 144 +.b8 184 +.b8 228 +.b8 196 +.b8 145 +.b8 215 +.b8 4 +.b64 $L__tmp112 +.b64 $L__tmp119 +.b8 6 +.b8 0 +.b8 144 +.b8 178 +.b8 232 +.b8 200 +.b8 171 +.b8 2 +.b64 $L__tmp119 +.b64 $L__tmp120 +.b8 6 +.b8 0 +.b8 144 +.b8 184 +.b8 232 +.b8 200 +.b8 171 +.b8 2 +.b64 $L__tmp120 +.b64 $L__tmp124 +.b8 7 +.b8 0 +.b8 144 +.b8 178 +.b8 230 +.b8 196 +.b8 145 +.b8 215 +.b8 4 +.b64 $L__tmp124 +.b64 $L__tmp125 +.b8 6 +.b8 0 +.b8 144 +.b8 176 +.b8 234 +.b8 200 +.b8 171 +.b8 2 +.b64 $L__tmp125 +.b64 $L__tmp130 +.b8 7 +.b8 0 +.b8 144 +.b8 178 +.b8 230 +.b8 196 +.b8 145 +.b8 215 +.b8 4 +.b64 $L__tmp130 +.b64 $L__tmp131 +.b8 6 +.b8 0 +.b8 144 +.b8 177 +.b8 234 +.b8 200 +.b8 171 +.b8 2 +.b64 $L__tmp131 +.b64 $L__tmp142 +.b8 7 +.b8 0 +.b8 144 +.b8 180 +.b8 230 +.b8 196 +.b8 145 +.b8 215 +.b8 4 +.b64 $L__tmp142 +.b64 $L__tmp143 +.b8 6 +.b8 0 +.b8 144 +.b8 183 +.b8 234 +.b8 200 +.b8 171 +.b8 2 +.b64 $L__tmp143 +.b64 $L__tmp155 +.b8 7 +.b8 0 +.b8 144 +.b8 184 +.b8 228 +.b8 196 +.b8 145 +.b8 215 +.b8 4 +.b64 $L__tmp155 +.b64 $L__tmp157 +.b8 7 +.b8 0 +.b8 144 +.b8 184 +.b8 230 +.b8 196 +.b8 145 +.b8 215 +.b8 4 +.b64 $L__tmp157 +.b64 $L__tmp158 +.b8 6 +.b8 0 +.b8 144 +.b8 178 +.b8 236 +.b8 200 +.b8 171 +.b8 2 +.b64 $L__tmp158 +.b64 $L__tmp160 +.b8 7 +.b8 0 +.b8 144 +.b8 184 +.b8 230 +.b8 196 +.b8 145 +.b8 215 +.b8 4 +.b64 $L__tmp160 +.b64 $L__func_end0 +.b8 6 +.b8 0 +.b8 144 +.b8 179 +.b8 236 +.b8 200 +.b8 171 +.b8 2 +.b64 0 +.b64 0 +.b64 $L__tmp105 +.b64 $L__tmp109 +.b8 6 +.b8 0 +.b8 144 +.b8 185 +.b8 230 +.b8 200 +.b8 171 +.b8 2 +.b64 $L__tmp109 +.b64 $L__tmp112 +.b8 7 +.b8 0 +.b8 144 +.b8 185 +.b8 228 +.b8 196 +.b8 145 +.b8 215 +.b8 4 +.b64 $L__tmp112 +.b64 $L__tmp121 +.b8 6 +.b8 0 +.b8 144 +.b8 179 +.b8 232 +.b8 200 +.b8 171 +.b8 2 +.b64 $L__tmp121 +.b64 $L__tmp123 +.b8 7 +.b8 0 +.b8 144 +.b8 179 +.b8 230 +.b8 196 +.b8 145 +.b8 215 +.b8 4 +.b64 $L__tmp123 +.b64 $L__tmp126 +.b8 6 +.b8 0 +.b8 144 +.b8 185 +.b8 232 +.b8 200 +.b8 171 +.b8 2 +.b64 $L__tmp126 +.b64 $L__tmp128 +.b8 7 +.b8 0 +.b8 144 +.b8 179 +.b8 230 +.b8 196 +.b8 145 +.b8 215 +.b8 4 +.b64 $L__tmp128 +.b64 $L__tmp132 +.b8 6 +.b8 0 +.b8 144 +.b8 178 +.b8 234 +.b8 200 +.b8 171 +.b8 2 +.b64 $L__tmp132 +.b64 $L__tmp142 +.b8 7 +.b8 0 +.b8 144 +.b8 181 +.b8 230 +.b8 196 +.b8 145 +.b8 215 +.b8 4 +.b64 $L__tmp142 +.b64 $L__tmp144 +.b8 6 +.b8 0 +.b8 144 +.b8 184 +.b8 234 +.b8 200 +.b8 171 +.b8 2 +.b64 $L__tmp144 +.b64 $L__tmp154 +.b8 7 +.b8 0 +.b8 144 +.b8 185 +.b8 228 +.b8 196 +.b8 145 +.b8 215 +.b8 4 +.b64 $L__tmp154 +.b64 $L__func_end0 +.b8 6 +.b8 0 +.b8 144 +.b8 177 +.b8 236 +.b8 200 +.b8 171 +.b8 2 +.b64 0 +.b64 0 +.b64 $L__tmp106 +.b64 $L__tmp110 +.b8 6 +.b8 0 +.b8 144 +.b8 176 +.b8 232 +.b8 200 +.b8 171 +.b8 2 +.b64 $L__tmp110 +.b64 $L__tmp112 +.b8 7 +.b8 0 +.b8 144 +.b8 176 +.b8 230 +.b8 196 +.b8 145 +.b8 215 +.b8 4 +.b64 $L__tmp112 +.b64 $L__tmp129 +.b8 6 +.b8 0 +.b8 144 +.b8 180 +.b8 232 +.b8 200 +.b8 171 +.b8 2 +.b64 $L__tmp129 +.b64 $L__tmp133 +.b8 6 +.b8 0 +.b8 144 +.b8 179 +.b8 234 +.b8 200 +.b8 171 +.b8 2 +.b64 $L__tmp133 +.b64 $L__tmp136 +.b8 7 +.b8 0 +.b8 144 +.b8 182 +.b8 230 +.b8 196 +.b8 145 +.b8 215 +.b8 4 +.b64 $L__tmp136 +.b64 $L__tmp140 +.b8 6 +.b8 0 +.b8 144 +.b8 181 +.b8 234 +.b8 200 +.b8 171 +.b8 2 +.b64 $L__tmp140 +.b64 $L__tmp142 +.b8 7 +.b8 0 +.b8 144 +.b8 182 +.b8 230 +.b8 196 +.b8 145 +.b8 215 +.b8 4 +.b64 $L__tmp142 +.b64 $L__tmp145 +.b8 6 +.b8 0 +.b8 144 +.b8 185 +.b8 234 +.b8 200 +.b8 171 +.b8 2 +.b64 $L__tmp145 +.b64 $L__func_end0 +.b8 7 +.b8 0 +.b8 144 +.b8 176 +.b8 230 +.b8 196 +.b8 145 +.b8 215 +.b8 4 +.b64 0 +.b64 0 +.b64 $L__tmp107 +.b64 $L__tmp111 +.b8 6 +.b8 0 +.b8 144 +.b8 177 +.b8 232 +.b8 200 +.b8 171 +.b8 2 +.b64 $L__tmp111 +.b64 $L__tmp112 +.b8 7 +.b8 0 +.b8 144 +.b8 177 +.b8 230 +.b8 196 +.b8 145 +.b8 215 +.b8 4 +.b64 $L__tmp112 +.b64 $L__tmp130 +.b8 6 +.b8 0 +.b8 144 +.b8 181 +.b8 232 +.b8 200 +.b8 171 +.b8 2 +.b64 $L__tmp130 +.b64 $L__tmp134 +.b8 6 +.b8 0 +.b8 144 +.b8 180 +.b8 234 +.b8 200 +.b8 171 +.b8 2 +.b64 $L__tmp134 +.b64 $L__tmp137 +.b8 7 +.b8 0 +.b8 144 +.b8 183 +.b8 230 +.b8 196 +.b8 145 +.b8 215 +.b8 4 +.b64 $L__tmp137 +.b64 $L__tmp141 +.b8 6 +.b8 0 +.b8 144 +.b8 182 +.b8 234 +.b8 200 +.b8 171 +.b8 2 +.b64 $L__tmp141 +.b64 $L__tmp142 +.b8 7 +.b8 0 +.b8 144 +.b8 183 +.b8 230 +.b8 196 +.b8 145 +.b8 215 +.b8 4 +.b64 $L__tmp142 +.b64 $L__tmp146 +.b8 6 +.b8 0 +.b8 144 +.b8 176 +.b8 236 +.b8 200 +.b8 171 +.b8 2 +.b64 $L__tmp146 +.b64 $L__func_end0 +.b8 7 +.b8 0 +.b8 144 +.b8 177 +.b8 230 +.b8 196 +.b8 145 +.b8 215 +.b8 4 +.b64 0 +.b64 0 +.b64 $L__tmp180 +.b64 $L__tmp181 +.b8 5 +.b8 0 +.b8 144 +.b8 177 +.b8 228 +.b8 149 +.b8 1 +.b64 $L__tmp181 +.b64 $L__tmp182 +.b8 6 +.b8 0 +.b8 144 +.b8 179 +.b8 228 +.b8 200 +.b8 171 +.b8 2 +.b64 $L__tmp182 +.b64 $L__tmp199 +.b8 5 +.b8 0 +.b8 144 +.b8 178 +.b8 228 +.b8 149 +.b8 1 +.b64 $L__tmp199 +.b64 $L__tmp200 +.b8 5 +.b8 0 +.b8 144 +.b8 179 +.b8 228 +.b8 149 +.b8 1 +.b64 $L__tmp200 +.b64 $L__func_end1 +.b8 6 +.b8 0 +.b8 144 +.b8 179 +.b8 228 +.b8 200 +.b8 171 +.b8 2 +.b64 0 +.b64 0 + } + .section .debug_abbrev + { +.b8 1 +.b8 17 +.b8 1 +.b8 37 +.b8 8 +.b8 19 +.b8 5 +.b8 3 +.b8 8 +.b8 16 +.b8 6 +.b8 27 +.b8 8 +.b8 17 +.b8 1 +.b8 0 +.b8 0 +.b8 2 +.b8 52 +.b8 0 +.b8 3 +.b8 8 +.b8 73 +.b8 19 +.b8 58 +.b8 11 +.b8 59 +.b8 5 +.b8 51 +.b8 11 +.b8 2 +.b8 10 +.b8 135,64 +.b8 8 +.b8 0 +.b8 0 +.b8 3 +.b8 22 +.b8 0 +.b8 73 +.b8 19 +.b8 3 +.b8 8 +.b8 58 +.b8 11 +.b8 59 +.b8 11 +.b8 0 +.b8 0 +.b8 4 +.b8 15 +.b8 0 +.b8 73 +.b8 19 +.b8 51 +.b8 6 +.b8 0 +.b8 0 +.b8 5 +.b8 19 +.b8 0 +.b8 3 +.b8 8 +.b8 11 +.b8 11 +.b8 58 +.b8 11 +.b8 59 +.b8 5 +.b8 0 +.b8 0 +.b8 6 +.b8 52 +.b8 0 +.b8 3 +.b8 8 +.b8 73 +.b8 19 +.b8 58 +.b8 11 +.b8 59 +.b8 11 +.b8 51 +.b8 11 +.b8 2 +.b8 10 +.b8 135,64 +.b8 8 +.b8 0 +.b8 0 +.b8 7 +.b8 38 +.b8 0 +.b8 73 +.b8 19 +.b8 0 +.b8 0 +.b8 8 +.b8 36 +.b8 0 +.b8 3 +.b8 8 +.b8 62 +.b8 11 +.b8 11 +.b8 11 +.b8 0 +.b8 0 +.b8 9 +.b8 19 +.b8 1 +.b8 3 +.b8 8 +.b8 11 +.b8 11 +.b8 58 +.b8 11 +.b8 59 +.b8 5 +.b8 0 +.b8 0 +.b8 10 +.b8 13 +.b8 0 +.b8 3 +.b8 8 +.b8 73 +.b8 19 +.b8 58 +.b8 11 +.b8 59 +.b8 11 +.b8 56 +.b8 10 +.b8 0 +.b8 0 +.b8 11 +.b8 13 +.b8 0 +.b8 3 +.b8 8 +.b8 73 +.b8 19 +.b8 58 +.b8 11 +.b8 59 +.b8 5 +.b8 56 +.b8 10 +.b8 0 +.b8 0 +.b8 12 +.b8 59 +.b8 0 +.b8 3 +.b8 8 +.b8 0 +.b8 0 +.b8 13 +.b8 22 +.b8 0 +.b8 73 +.b8 19 +.b8 3 +.b8 8 +.b8 58 +.b8 11 +.b8 59 +.b8 5 +.b8 0 +.b8 0 +.b8 14 +.b8 19 +.b8 1 +.b8 3 +.b8 8 +.b8 11 +.b8 11 +.b8 58 +.b8 11 +.b8 59 +.b8 11 +.b8 0 +.b8 0 +.b8 15 +.b8 13 +.b8 0 +.b8 3 +.b8 8 +.b8 73 +.b8 19 +.b8 56 +.b8 10 +.b8 0 +.b8 0 +.b8 16 +.b8 46 +.b8 1 +.b8 135,64 +.b8 8 +.b8 3 +.b8 8 +.b8 58 +.b8 11 +.b8 59 +.b8 5 +.b8 73 +.b8 19 +.b8 32 +.b8 11 +.b8 0 +.b8 0 +.b8 17 +.b8 5 +.b8 0 +.b8 3 +.b8 8 +.b8 73 +.b8 19 +.b8 0 +.b8 0 +.b8 18 +.b8 46 +.b8 0 +.b8 135,64 +.b8 8 +.b8 3 +.b8 8 +.b8 58 +.b8 11 +.b8 59 +.b8 5 +.b8 73 +.b8 19 +.b8 32 +.b8 11 +.b8 0 +.b8 0 +.b8 19 +.b8 5 +.b8 0 +.b8 3 +.b8 8 +.b8 58 +.b8 11 +.b8 59 +.b8 5 +.b8 73 +.b8 19 +.b8 0 +.b8 0 +.b8 20 +.b8 16 +.b8 0 +.b8 73 +.b8 19 +.b8 0 +.b8 0 +.b8 21 +.b8 11 +.b8 1 +.b8 0 +.b8 0 +.b8 22 +.b8 52 +.b8 0 +.b8 3 +.b8 8 +.b8 58 +.b8 11 +.b8 59 +.b8 5 +.b8 73 +.b8 19 +.b8 0 +.b8 0 +.b8 23 +.b8 46 +.b8 1 +.b8 17 +.b8 1 +.b8 18 +.b8 1 +.b8 64 +.b8 10 +.b8 135,64 +.b8 8 +.b8 3 +.b8 8 +.b8 58 +.b8 11 +.b8 59 +.b8 11 +.b8 73 +.b8 19 +.b8 63 +.b8 12 +.b8 0 +.b8 0 +.b8 24 +.b8 5 +.b8 0 +.b8 2 +.b8 10 +.b8 51 +.b8 11 +.b8 3 +.b8 8 +.b8 58 +.b8 11 +.b8 59 +.b8 11 +.b8 73 +.b8 19 +.b8 0 +.b8 0 +.b8 25 +.b8 11 +.b8 1 +.b8 17 +.b8 1 +.b8 18 +.b8 1 +.b8 0 +.b8 0 +.b8 26 +.b8 52 +.b8 0 +.b8 51 +.b8 11 +.b8 2 +.b8 10 +.b8 3 +.b8 8 +.b8 58 +.b8 11 +.b8 59 +.b8 11 +.b8 73 +.b8 19 +.b8 0 +.b8 0 +.b8 27 +.b8 52 +.b8 0 +.b8 2 +.b8 10 +.b8 51 +.b8 11 +.b8 3 +.b8 8 +.b8 58 +.b8 11 +.b8 59 +.b8 11 +.b8 73 +.b8 19 +.b8 0 +.b8 0 +.b8 28 +.b8 52 +.b8 0 +.b8 2 +.b8 6 +.b8 3 +.b8 8 +.b8 58 +.b8 11 +.b8 59 +.b8 11 +.b8 73 +.b8 19 +.b8 0 +.b8 0 +.b8 29 +.b8 23 +.b8 1 +.b8 3 +.b8 8 +.b8 11 +.b8 11 +.b8 58 +.b8 11 +.b8 59 +.b8 11 +.b8 0 +.b8 0 +.b8 30 +.b8 13 +.b8 0 +.b8 3 +.b8 8 +.b8 73 +.b8 19 +.b8 58 +.b8 11 +.b8 59 +.b8 11 +.b8 11 +.b8 11 +.b8 13 +.b8 11 +.b8 12 +.b8 15 +.b8 56 +.b8 10 +.b8 0 +.b8 0 +.b8 31 +.b8 13 +.b8 0 +.b8 73 +.b8 19 +.b8 58 +.b8 11 +.b8 59 +.b8 11 +.b8 11 +.b8 11 +.b8 13 +.b8 11 +.b8 12 +.b8 15 +.b8 56 +.b8 10 +.b8 0 +.b8 0 +.b8 32 +.b8 19 +.b8 0 +.b8 3 +.b8 8 +.b8 11 +.b8 11 +.b8 58 +.b8 11 +.b8 59 +.b8 11 +.b8 0 +.b8 0 +.b8 33 +.b8 29 +.b8 1 +.b8 49 +.b8 19 +.b8 17 +.b8 1 +.b8 18 +.b8 1 +.b8 88 +.b8 11 +.b8 89 +.b8 11 +.b8 0 +.b8 0 +.b8 34 +.b8 29 +.b8 1 +.b8 49 +.b8 19 +.b8 17 +.b8 1 +.b8 18 +.b8 1 +.b8 88 +.b8 11 +.b8 89 +.b8 5 +.b8 0 +.b8 0 +.b8 35 +.b8 5 +.b8 0 +.b8 51 +.b8 11 +.b8 2 +.b8 10 +.b8 49 +.b8 19 +.b8 0 +.b8 0 +.b8 36 +.b8 5 +.b8 0 +.b8 2 +.b8 10 +.b8 51 +.b8 11 +.b8 49 +.b8 19 +.b8 0 +.b8 0 +.b8 37 +.b8 52 +.b8 0 +.b8 2 +.b8 10 +.b8 51 +.b8 11 +.b8 49 +.b8 19 +.b8 0 +.b8 0 +.b8 38 +.b8 46 +.b8 1 +.b8 135,64 +.b8 8 +.b8 3 +.b8 8 +.b8 58 +.b8 11 +.b8 59 +.b8 11 +.b8 73 +.b8 19 +.b8 32 +.b8 11 +.b8 0 +.b8 0 +.b8 39 +.b8 5 +.b8 0 +.b8 3 +.b8 8 +.b8 58 +.b8 11 +.b8 59 +.b8 11 +.b8 73 +.b8 19 +.b8 0 +.b8 0 +.b8 40 +.b8 5 +.b8 0 +.b8 51 +.b8 11 +.b8 2 +.b8 10 +.b8 3 +.b8 8 +.b8 58 +.b8 11 +.b8 59 +.b8 11 +.b8 73 +.b8 19 +.b8 0 +.b8 0 +.b8 41 +.b8 46 +.b8 1 +.b8 17 +.b8 1 +.b8 18 +.b8 1 +.b8 64 +.b8 10 +.b8 135,64 +.b8 8 +.b8 3 +.b8 8 +.b8 58 +.b8 11 +.b8 59 +.b8 5 +.b8 73 +.b8 19 +.b8 63 +.b8 12 +.b8 0 +.b8 0 +.b8 42 +.b8 5 +.b8 0 +.b8 2 +.b8 10 +.b8 51 +.b8 11 +.b8 3 +.b8 8 +.b8 73 +.b8 19 +.b8 0 +.b8 0 +.b8 43 +.b8 5 +.b8 0 +.b8 51 +.b8 11 +.b8 2 +.b8 10 +.b8 3 +.b8 8 +.b8 58 +.b8 11 +.b8 59 +.b8 5 +.b8 73 +.b8 19 +.b8 0 +.b8 0 +.b8 0 + } + .section .debug_info + { +.b32 16706 +.b8 2 +.b8 0 +.b32 .debug_abbrev +.b8 8 +.b8 1 +.b8 108,103,101,110,102,101,58,32,69,68,71,32,54,46,50 +.b8 0 +.b8 4 +.b8 0 +.b8 71,66,95,106,105,116,95,65,120,66,95,100,111,116,51,95,112,104,97,115,101,51,95,109,112 +.b8 0 +.b32 .debug_line +.b8 47,115,104,97,114,101,47,119,111,114,107,115,112,97,99,101,47,110,118,105,100,105,97,95,112,114,111,106,101,99,116,115,47,71,114,97,112,104,66,76 +.b8 65,83,47,67,85,68,65,47,116,101,115,116 +.b8 0 +.b64 0 +.b8 2 +.b8 71,120,66,95,70,76,73,80,68,73,65,71,73,78,68,69,88,95,73,78,84,51,50 +.b8 0 +.b32 188 +.b8 1 +.b8 110 +.b8 1 +.b8 5 +.b8 9 +.b8 3 +.b64 GxB_FLIPDIAGINDEX_INT32 +.b8 71,120,66,95,70,76,73,80,68,73,65,71,73,78,68,69,88,95,73,78,84,51,50 +.b8 0 +.b8 3 +.b32 212 +.b8 71,114,66,95,73,110,100,101,120,85,110,97,114,121,79,112 +.b8 0 +.b8 2 +.b8 71 +.b8 4 +.b32 221 +.b32 12 +.b8 5 +.b8 71,66,95,73,110,100,101,120,85,110,97,114,121,79,112,95,111,112,97,113,117,101 +.b8 0 +.b8 0 +.b8 1 +.b8 144 +.b8 1 +.b8 2 +.b8 71,120,66,95,70,76,73,80,68,73,65,71,73,78,68,69,88,95,73,78,84,54,52 +.b8 0 +.b32 188 +.b8 1 +.b8 110 +.b8 1 +.b8 5 +.b8 9 +.b8 3 +.b64 GxB_FLIPDIAGINDEX_INT64 +.b8 71,120,66,95,70,76,73,80,68,73,65,71,73,78,68,69,88,95,73,78,84,54,52 +.b8 0 +.b8 6 +.b8 95,90,78,49,56,99,111,111,112,101,114,97,116,105,118,101,95,103,114,111,117,112,115,52,95,95,118,49,49,55,116,104,114,101,97,100,95,103,114,111 +.b8 117,112,95,98,97,115,101,73,76,106,51,69,69,50,105,100,69 +.b8 0 +.b32 450 +.b8 3 +.b8 164 +.b8 5 +.b8 9 +.b8 3 +.b64 _ZN18cooperative_groups4__v117thread_group_baseILj3EE2idE +.b8 95,90,78,49,56,99,111,111,112,101,114,97,116,105,118,101,95,103,114,111,117,112,115,52,95,95,118,49,49,55,116,104,114,101,97,100,95,103,114,111 +.b8 117,112,95,98,97,115,101,73,76,106,51,69,69,50,105,100,69 +.b8 0 +.b8 7 +.b32 455 +.b8 8 +.b8 117,110,115,105,103,110,101,100,32,105,110,116 +.b8 0 +.b8 7 +.b8 4 +.b8 2 +.b8 95,90,78,49,56,99,111,111,112,101,114,97,116,105,118,101,95,103,114,111,117,112,115,52,95,95,118,49,49,48,103,114,105,100,95,103,114,111,117,112 +.b8 57,95,103,114,111,117,112,95,105,100,69 +.b8 0 +.b32 450 +.b8 3 +.b8 68 +.b8 1 +.b8 5 +.b8 9 +.b8 3 +.b64 _ZN18cooperative_groups4__v110grid_group9_group_idE +.b8 95,90,78,49,56,99,111,111,112,101,114,97,116,105,118,101,95,103,114,111,117,112,115,52,95,95,118,49,49,48,103,114,105,100,95,103,114,111,117,112 +.b8 57,95,103,114,111,117,112,95,105,100,69 +.b8 0 +.b8 6 +.b8 95,90,78,49,56,99,111,111,112,101,114,97,116,105,118,101,95,103,114,111,117,112,115,52,95,95,118,49,49,55,116,104,114,101,97,100,95,103,114,111 +.b8 117,112,95,98,97,115,101,73,76,106,52,69,69,50,105,100,69 +.b8 0 +.b32 450 +.b8 3 +.b8 164 +.b8 5 +.b8 9 +.b8 3 +.b64 _ZN18cooperative_groups4__v117thread_group_baseILj4EE2idE +.b8 95,90,78,49,56,99,111,111,112,101,114,97,116,105,118,101,95,103,114,111,117,112,115,52,95,95,118,49,49,55,116,104,114,101,97,100,95,103,114,111 +.b8 117,112,95,98,97,115,101,73,76,106,52,69,69,50,105,100,69 +.b8 0 +.b8 2 +.b8 95,90,78,49,56,99,111,111,112,101,114,97,116,105,118,101,95,103,114,111,117,112,115,52,95,95,118,49,49,50,116,104,114,101,97,100,95,98,108,111 +.b8 99,107,57,95,103,114,111,117,112,95,105,100,69 +.b8 0 +.b32 450 +.b8 3 +.b8 125 +.b8 2 +.b8 5 +.b8 9 +.b8 3 +.b64 _ZN18cooperative_groups4__v112thread_block9_group_idE +.b8 95,90,78,49,56,99,111,111,112,101,114,97,116,105,118,101,95,103,114,111,117,112,115,52,95,95,118,49,49,50,116,104,114,101,97,100,95,98,108,111 +.b8 99,107,57,95,103,114,111,117,112,95,105,100,69 +.b8 0 +.b8 6 +.b8 95,90,78,49,56,99,111,111,112,101,114,97,116,105,118,101,95,103,114,111,117,112,115,52,95,95,118,49,49,55,116,104,114,101,97,100,95,103,114,111 +.b8 117,112,95,98,97,115,101,73,76,106,49,69,69,50,105,100,69 +.b8 0 +.b32 450 +.b8 3 +.b8 164 +.b8 5 +.b8 9 +.b8 3 +.b64 _ZN18cooperative_groups4__v117thread_group_baseILj1EE2idE +.b8 95,90,78,49,56,99,111,111,112,101,114,97,116,105,118,101,95,103,114,111,117,112,115,52,95,95,118,49,49,55,116,104,114,101,97,100,95,103,114,111 +.b8 117,112,95,98,97,115,101,73,76,106,49,69,69,50,105,100,69 +.b8 0 +.b8 2 +.b8 95,90,78,49,56,99,111,111,112,101,114,97,116,105,118,101,95,103,114,111,117,112,115,52,95,95,118,49,49,53,99,111,97,108,101,115,99,101,100,95 +.b8 103,114,111,117,112,57,95,103,114,111,117,112,95,105,100,69 +.b8 0 +.b32 450 +.b8 3 +.b8 25 +.b8 3 +.b8 5 +.b8 9 +.b8 3 +.b64 _ZN18cooperative_groups4__v115coalesced_group9_group_idE +.b8 95,90,78,49,56,99,111,111,112,101,114,97,116,105,118,101,95,103,114,111,117,112,115,52,95,95,118,49,49,53,99,111,97,108,101,115,99,101,100,95 +.b8 103,114,111,117,112,57,95,103,114,111,117,112,95,105,100,69 +.b8 0 +.b8 2 +.b8 95,90,78,49,56,99,111,111,112,101,114,97,116,105,118,101,95,103,114,111,117,112,115,52,95,95,118,49,50,51,95,95,115,116,97,116,105,99,95,115 +.b8 105,122,101,95,116,105,108,101,95,98,97,115,101,73,76,106,49,69,69,49,48,110,117,109,84,104,114,101,97,100,115,69 +.b8 0 +.b32 450 +.b8 3 +.b8 208 +.b8 3 +.b8 5 +.b8 9 +.b8 3 +.b64 _ZN18cooperative_groups4__v123__static_size_tile_baseILj1EE10numThreadsE +.b8 95,90,78,49,56,99,111,111,112,101,114,97,116,105,118,101,95,103,114,111,117,112,115,52,95,95,118,49,50,51,95,95,115,116,97,116,105,99,95,115 +.b8 105,122,101,95,116,105,108,101,95,98,97,115,101,73,76,106,49,69,69,49,48,110,117,109,84,104,114,101,97,100,115,69 +.b8 0 +.b8 2 +.b8 95,90,78,49,56,99,111,111,112,101,114,97,116,105,118,101,95,103,114,111,117,112,115,52,95,95,118,49,51,54,95,95,115,116,97,116,105,99,95,115 +.b8 105,122,101,95,116,104,114,101,97,100,95,98,108,111,99,107,95,116,105,108,101,95,98,97,115,101,73,76,106,49,69,69,56,102,117,108,108,77,97,115 +.b8 107,69 +.b8 0 +.b32 450 +.b8 3 +.b8 240 +.b8 3 +.b8 5 +.b8 9 +.b8 3 +.b64 _ZN18cooperative_groups4__v136__static_size_thread_block_tile_baseILj1EE8fullMaskE +.b8 95,90,78,49,56,99,111,111,112,101,114,97,116,105,118,101,95,103,114,111,117,112,115,52,95,95,118,49,51,54,95,95,115,116,97,116,105,99,95,115 +.b8 105,122,101,95,116,104,114,101,97,100,95,98,108,111,99,107,95,116,105,108,101,95,98,97,115,101,73,76,106,49,69,69,56,102,117,108,108,77,97,115 +.b8 107,69 +.b8 0 +.b8 2 +.b8 95,90,78,49,56,99,111,111,112,101,114,97,116,105,118,101,95,103,114,111,117,112,115,52,95,95,118,49,51,54,95,95,115,116,97,116,105,99,95,115 +.b8 105,122,101,95,116,104,114,101,97,100,95,98,108,111,99,107,95,116,105,108,101,95,98,97,115,101,73,76,106,49,69,69,57,95,103,114,111,117,112,95 +.b8 105,100,69 +.b8 0 +.b32 450 +.b8 3 +.b8 255 +.b8 3 +.b8 5 +.b8 9 +.b8 3 +.b64 _ZN18cooperative_groups4__v136__static_size_thread_block_tile_baseILj1EE9_group_idE +.b8 95,90,78,49,56,99,111,111,112,101,114,97,116,105,118,101,95,103,114,111,117,112,115,52,95,95,118,49,51,54,95,95,115,116,97,116,105,99,95,115 +.b8 105,122,101,95,116,104,114,101,97,100,95,98,108,111,99,107,95,116,105,108,101,95,98,97,115,101,73,76,106,49,69,69,57,95,103,114,111,117,112,95 +.b8 105,100,69 +.b8 0 +.b8 2 +.b8 95,90,78,49,56,99,111,111,112,101,114,97,116,105,118,101,95,103,114,111,117,112,115,52,95,95,118,49,51,49,95,95,115,105,110,103,108,101,95,119 +.b8 97,114,112,95,116,104,114,101,97,100,95,98,108,111,99,107,95,116,105,108,101,73,76,106,49,69,118,69,49,48,110,117,109,84,104,114,101,97,100,115 +.b8 69 +.b8 0 +.b32 450 +.b8 3 +.b8 135 +.b8 4 +.b8 5 +.b8 9 +.b8 3 +.b64 _ZN18cooperative_groups4__v131__single_warp_thread_block_tileILj1EvE10numThreadsE +.b8 95,90,78,49,56,99,111,111,112,101,114,97,116,105,118,101,95,103,114,111,117,112,115,52,95,95,118,49,51,49,95,95,115,105,110,103,108,101,95,119 +.b8 97,114,112,95,116,104,114,101,97,100,95,98,108,111,99,107,95,116,105,108,101,73,76,106,49,69,118,69,49,48,110,117,109,84,104,114,101,97,100,115 +.b8 69 +.b8 0 +.b8 2 +.b8 95,90,78,49,56,99,111,111,112,101,114,97,116,105,118,101,95,103,114,111,117,112,115,52,95,95,118,49,50,51,95,95,115,116,97,116,105,99,95,115 +.b8 105,122,101,95,116,105,108,101,95,98,97,115,101,73,76,106,51,50,69,69,49,48,110,117,109,84,104,114,101,97,100,115,69 +.b8 0 +.b32 450 +.b8 3 +.b8 208 +.b8 3 +.b8 5 +.b8 9 +.b8 3 +.b64 _ZN18cooperative_groups4__v123__static_size_tile_baseILj32EE10numThreadsE +.b8 95,90,78,49,56,99,111,111,112,101,114,97,116,105,118,101,95,103,114,111,117,112,115,52,95,95,118,49,50,51,95,95,115,116,97,116,105,99,95,115 +.b8 105,122,101,95,116,105,108,101,95,98,97,115,101,73,76,106,51,50,69,69,49,48,110,117,109,84,104,114,101,97,100,115,69 +.b8 0 +.b8 2 +.b8 95,90,78,49,56,99,111,111,112,101,114,97,116,105,118,101,95,103,114,111,117,112,115,52,95,95,118,49,51,54,95,95,115,116,97,116,105,99,95,115 +.b8 105,122,101,95,116,104,114,101,97,100,95,98,108,111,99,107,95,116,105,108,101,95,98,97,115,101,73,76,106,51,50,69,69,56,102,117,108,108,77,97 +.b8 115,107,69 +.b8 0 +.b32 450 +.b8 3 +.b8 240 +.b8 3 +.b8 5 +.b8 9 +.b8 3 +.b64 _ZN18cooperative_groups4__v136__static_size_thread_block_tile_baseILj32EE8fullMaskE +.b8 95,90,78,49,56,99,111,111,112,101,114,97,116,105,118,101,95,103,114,111,117,112,115,52,95,95,118,49,51,54,95,95,115,116,97,116,105,99,95,115 +.b8 105,122,101,95,116,104,114,101,97,100,95,98,108,111,99,107,95,116,105,108,101,95,98,97,115,101,73,76,106,51,50,69,69,56,102,117,108,108,77,97 +.b8 115,107,69 +.b8 0 +.b8 2 +.b8 95,90,78,49,56,99,111,111,112,101,114,97,116,105,118,101,95,103,114,111,117,112,115,52,95,95,118,49,51,54,95,95,115,116,97,116,105,99,95,115 +.b8 105,122,101,95,116,104,114,101,97,100,95,98,108,111,99,107,95,116,105,108,101,95,98,97,115,101,73,76,106,51,50,69,69,57,95,103,114,111,117,112 +.b8 95,105,100,69 +.b8 0 +.b32 450 +.b8 3 +.b8 255 +.b8 3 +.b8 5 +.b8 9 +.b8 3 +.b64 _ZN18cooperative_groups4__v136__static_size_thread_block_tile_baseILj32EE9_group_idE +.b8 95,90,78,49,56,99,111,111,112,101,114,97,116,105,118,101,95,103,114,111,117,112,115,52,95,95,118,49,51,54,95,95,115,116,97,116,105,99,95,115 +.b8 105,122,101,95,116,104,114,101,97,100,95,98,108,111,99,107,95,116,105,108,101,95,98,97,115,101,73,76,106,51,50,69,69,57,95,103,114,111,117,112 +.b8 95,105,100,69 +.b8 0 +.b8 2 +.b8 95,90,78,49,56,99,111,111,112,101,114,97,116,105,118,101,95,103,114,111,117,112,115,52,95,95,118,49,51,49,95,95,115,105,110,103,108,101,95,119 +.b8 97,114,112,95,116,104,114,101,97,100,95,98,108,111,99,107,95,116,105,108,101,73,76,106,51,50,69,118,69,49,48,110,117,109,84,104,114,101,97,100 +.b8 115,69 +.b8 0 +.b32 450 +.b8 3 +.b8 135 +.b8 4 +.b8 5 +.b8 9 +.b8 3 +.b64 _ZN18cooperative_groups4__v131__single_warp_thread_block_tileILj32EvE10numThreadsE +.b8 95,90,78,49,56,99,111,111,112,101,114,97,116,105,118,101,95,103,114,111,117,112,115,52,95,95,118,49,51,49,95,95,115,105,110,103,108,101,95,119 +.b8 97,114,112,95,116,104,114,101,97,100,95,98,108,111,99,107,95,116,105,108,101,73,76,106,51,50,69,118,69,49,48,110,117,109,84,104,114,101,97,100 +.b8 115,69 +.b8 0 +.b8 6 +.b8 95,90,78,49,56,99,111,111,112,101,114,97,116,105,118,101,95,103,114,111,117,112,115,52,95,95,118,49,55,100,101,116,97,105,108,115,57,116,101,109 +.b8 112,108,97,116,101,115,49,55,105,110,116,101,103,114,97,108,95,99,111,110,115,116,97,110,116,73,98,76,98,48,69,69,53,118,97,108,117,101,69 +.b8 0 +.b32 2748 +.b8 4 +.b8 196 +.b8 5 +.b8 9 +.b8 3 +.b64 _ZN18cooperative_groups4__v17details9templates17integral_constantIbLb0EE5valueE +.b8 95,90,78,49,56,99,111,111,112,101,114,97,116,105,118,101,95,103,114,111,117,112,115,52,95,95,118,49,55,100,101,116,97,105,108,115,57,116,101,109 +.b8 112,108,97,116,101,115,49,55,105,110,116,101,103,114,97,108,95,99,111,110,115,116,97,110,116,73,98,76,98,48,69,69,53,118,97,108,117,101,69 +.b8 0 +.b8 7 +.b32 2753 +.b8 8 +.b8 98,111,111,108 +.b8 0 +.b8 2 +.b8 1 +.b8 6 +.b8 95,90,78,49,56,99,111,111,112,101,114,97,116,105,118,101,95,103,114,111,117,112,115,52,95,95,118,49,55,100,101,116,97,105,108,115,57,116,101,109 +.b8 112,108,97,116,101,115,49,55,105,110,116,101,103,114,97,108,95,99,111,110,115,116,97,110,116,73,98,76,98,49,69,69,53,118,97,108,117,101,69 +.b8 0 +.b32 2748 +.b8 4 +.b8 196 +.b8 5 +.b8 9 +.b8 3 +.b64 _ZN18cooperative_groups4__v17details9templates17integral_constantIbLb1EE5valueE +.b8 95,90,78,49,56,99,111,111,112,101,114,97,116,105,118,101,95,103,114,111,117,112,115,52,95,95,118,49,55,100,101,116,97,105,108,115,57,116,101,109 +.b8 112,108,97,116,101,115,49,55,105,110,116,101,103,114,97,108,95,99,111,110,115,116,97,110,116,73,98,76,98,49,69,69,53,118,97,108,117,101,69 +.b8 0 +.b8 6 +.b8 95,90,78,49,56,99,111,111,112,101,114,97,116,105,118,101,95,103,114,111,117,112,115,52,95,95,118,49,55,100,101,116,97,105,108,115,52,116,105,108 +.b8 101,49,51,95,116,105,108,101,95,104,101,108,112,101,114,115,73,76,106,49,69,76,106,52,50,57,52,57,54,55,50,57,53,69,76,106,51,49,69,76 +.b8 106,53,69,69,57,116,105,108,101,67,111,117,110,116,69 +.b8 0 +.b32 450 +.b8 5 +.b8 157 +.b8 5 +.b8 9 +.b8 3 +.b64 _ZN18cooperative_groups4__v17details4tile13_tile_helpersILj1ELj4294967295ELj31ELj5EE9tileCountE +.b8 95,90,78,49,56,99,111,111,112,101,114,97,116,105,118,101,95,103,114,111,117,112,115,52,95,95,118,49,55,100,101,116,97,105,108,115,52,116,105,108 +.b8 101,49,51,95,116,105,108,101,95,104,101,108,112,101,114,115,73,76,106,49,69,76,106,52,50,57,52,57,54,55,50,57,53,69,76,106,51,49,69,76 +.b8 106,53,69,69,57,116,105,108,101,67,111,117,110,116,69 +.b8 0 +.b8 6 +.b8 95,90,78,49,56,99,111,111,112,101,114,97,116,105,118,101,95,103,114,111,117,112,115,52,95,95,118,49,55,100,101,116,97,105,108,115,52,116,105,108 +.b8 101,49,51,95,116,105,108,101,95,104,101,108,112,101,114,115,73,76,106,49,69,76,106,52,50,57,52,57,54,55,50,57,53,69,76,106,51,49,69,76 +.b8 106,53,69,69,56,116,105,108,101,77,97,115,107,69 +.b8 0 +.b32 450 +.b8 5 +.b8 158 +.b8 5 +.b8 9 +.b8 3 +.b64 _ZN18cooperative_groups4__v17details4tile13_tile_helpersILj1ELj4294967295ELj31ELj5EE8tileMaskE +.b8 95,90,78,49,56,99,111,111,112,101,114,97,116,105,118,101,95,103,114,111,117,112,115,52,95,95,118,49,55,100,101,116,97,105,108,115,52,116,105,108 +.b8 101,49,51,95,116,105,108,101,95,104,101,108,112,101,114,115,73,76,106,49,69,76,106,52,50,57,52,57,54,55,50,57,53,69,76,106,51,49,69,76 +.b8 106,53,69,69,56,116,105,108,101,77,97,115,107,69 +.b8 0 +.b8 6 +.b8 95,90,78,49,56,99,111,111,112,101,114,97,116,105,118,101,95,103,114,111,117,112,115,52,95,95,118,49,55,100,101,116,97,105,108,115,52,116,105,108 +.b8 101,49,51,95,116,105,108,101,95,104,101,108,112,101,114,115,73,76,106,49,69,76,106,52,50,57,52,57,54,55,50,57,53,69,76,106,51,49,69,76 +.b8 106,53,69,69,56,108,97,110,101,77,97,115,107,69 +.b8 0 +.b32 450 +.b8 5 +.b8 159 +.b8 5 +.b8 9 +.b8 3 +.b64 _ZN18cooperative_groups4__v17details4tile13_tile_helpersILj1ELj4294967295ELj31ELj5EE8laneMaskE +.b8 95,90,78,49,56,99,111,111,112,101,114,97,116,105,118,101,95,103,114,111,117,112,115,52,95,95,118,49,55,100,101,116,97,105,108,115,52,116,105,108 +.b8 101,49,51,95,116,105,108,101,95,104,101,108,112,101,114,115,73,76,106,49,69,76,106,52,50,57,52,57,54,55,50,57,53,69,76,106,51,49,69,76 +.b8 106,53,69,69,56,108,97,110,101,77,97,115,107,69 +.b8 0 +.b8 6 +.b8 95,90,78,49,56,99,111,111,112,101,114,97,116,105,118,101,95,103,114,111,117,112,115,52,95,95,118,49,55,100,101,116,97,105,108,115,52,116,105,108 +.b8 101,49,51,95,116,105,108,101,95,104,101,108,112,101,114,115,73,76,106,49,69,76,106,52,50,57,52,57,54,55,50,57,53,69,76,106,51,49,69,76 +.b8 106,53,69,69,49,48,115,104,105,102,116,67,111,117,110,116,69 +.b8 0 +.b32 450 +.b8 5 +.b8 160 +.b8 5 +.b8 9 +.b8 3 +.b64 _ZN18cooperative_groups4__v17details4tile13_tile_helpersILj1ELj4294967295ELj31ELj5EE10shiftCountE +.b8 95,90,78,49,56,99,111,111,112,101,114,97,116,105,118,101,95,103,114,111,117,112,115,52,95,95,118,49,55,100,101,116,97,105,108,115,52,116,105,108 +.b8 101,49,51,95,116,105,108,101,95,104,101,108,112,101,114,115,73,76,106,49,69,76,106,52,50,57,52,57,54,55,50,57,53,69,76,106,51,49,69,76 +.b8 106,53,69,69,49,48,115,104,105,102,116,67,111,117,110,116,69 +.b8 0 +.b8 6 +.b8 95,90,78,49,56,99,111,111,112,101,114,97,116,105,118,101,95,103,114,111,117,112,115,52,95,95,118,49,55,100,101,116,97,105,108,115,52,116,105,108 +.b8 101,49,51,95,116,105,108,101,95,104,101,108,112,101,114,115,73,76,106,50,69,76,106,54,53,53,51,53,69,76,106,49,53,69,76,106,52,69,69,57 +.b8 116,105,108,101,67,111,117,110,116,69 +.b8 0 +.b32 450 +.b8 5 +.b8 157 +.b8 5 +.b8 9 +.b8 3 +.b64 _ZN18cooperative_groups4__v17details4tile13_tile_helpersILj2ELj65535ELj15ELj4EE9tileCountE +.b8 95,90,78,49,56,99,111,111,112,101,114,97,116,105,118,101,95,103,114,111,117,112,115,52,95,95,118,49,55,100,101,116,97,105,108,115,52,116,105,108 +.b8 101,49,51,95,116,105,108,101,95,104,101,108,112,101,114,115,73,76,106,50,69,76,106,54,53,53,51,53,69,76,106,49,53,69,76,106,52,69,69,57 +.b8 116,105,108,101,67,111,117,110,116,69 +.b8 0 +.b8 6 +.b8 95,90,78,49,56,99,111,111,112,101,114,97,116,105,118,101,95,103,114,111,117,112,115,52,95,95,118,49,55,100,101,116,97,105,108,115,52,116,105,108 +.b8 101,49,51,95,116,105,108,101,95,104,101,108,112,101,114,115,73,76,106,50,69,76,106,54,53,53,51,53,69,76,106,49,53,69,76,106,52,69,69,56 +.b8 116,105,108,101,77,97,115,107,69 +.b8 0 +.b32 450 +.b8 5 +.b8 158 +.b8 5 +.b8 9 +.b8 3 +.b64 _ZN18cooperative_groups4__v17details4tile13_tile_helpersILj2ELj65535ELj15ELj4EE8tileMaskE +.b8 95,90,78,49,56,99,111,111,112,101,114,97,116,105,118,101,95,103,114,111,117,112,115,52,95,95,118,49,55,100,101,116,97,105,108,115,52,116,105,108 +.b8 101,49,51,95,116,105,108,101,95,104,101,108,112,101,114,115,73,76,106,50,69,76,106,54,53,53,51,53,69,76,106,49,53,69,76,106,52,69,69,56 +.b8 116,105,108,101,77,97,115,107,69 +.b8 0 +.b8 6 +.b8 95,90,78,49,56,99,111,111,112,101,114,97,116,105,118,101,95,103,114,111,117,112,115,52,95,95,118,49,55,100,101,116,97,105,108,115,52,116,105,108 +.b8 101,49,51,95,116,105,108,101,95,104,101,108,112,101,114,115,73,76,106,50,69,76,106,54,53,53,51,53,69,76,106,49,53,69,76,106,52,69,69,56 +.b8 108,97,110,101,77,97,115,107,69 +.b8 0 +.b32 450 +.b8 5 +.b8 159 +.b8 5 +.b8 9 +.b8 3 +.b64 _ZN18cooperative_groups4__v17details4tile13_tile_helpersILj2ELj65535ELj15ELj4EE8laneMaskE +.b8 95,90,78,49,56,99,111,111,112,101,114,97,116,105,118,101,95,103,114,111,117,112,115,52,95,95,118,49,55,100,101,116,97,105,108,115,52,116,105,108 +.b8 101,49,51,95,116,105,108,101,95,104,101,108,112,101,114,115,73,76,106,50,69,76,106,54,53,53,51,53,69,76,106,49,53,69,76,106,52,69,69,56 +.b8 108,97,110,101,77,97,115,107,69 +.b8 0 +.b8 6 +.b8 95,90,78,49,56,99,111,111,112,101,114,97,116,105,118,101,95,103,114,111,117,112,115,52,95,95,118,49,55,100,101,116,97,105,108,115,52,116,105,108 +.b8 101,49,51,95,116,105,108,101,95,104,101,108,112,101,114,115,73,76,106,50,69,76,106,54,53,53,51,53,69,76,106,49,53,69,76,106,52,69,69,49 +.b8 48,115,104,105,102,116,67,111,117,110,116,69 +.b8 0 +.b32 450 +.b8 5 +.b8 160 +.b8 5 +.b8 9 +.b8 3 +.b64 _ZN18cooperative_groups4__v17details4tile13_tile_helpersILj2ELj65535ELj15ELj4EE10shiftCountE +.b8 95,90,78,49,56,99,111,111,112,101,114,97,116,105,118,101,95,103,114,111,117,112,115,52,95,95,118,49,55,100,101,116,97,105,108,115,52,116,105,108 +.b8 101,49,51,95,116,105,108,101,95,104,101,108,112,101,114,115,73,76,106,50,69,76,106,54,53,53,51,53,69,76,106,49,53,69,76,106,52,69,69,49 +.b8 48,115,104,105,102,116,67,111,117,110,116,69 +.b8 0 +.b8 6 +.b8 95,90,78,49,56,99,111,111,112,101,114,97,116,105,118,101,95,103,114,111,117,112,115,52,95,95,118,49,55,100,101,116,97,105,108,115,52,116,105,108 +.b8 101,49,51,95,116,105,108,101,95,104,101,108,112,101,114,115,73,76,106,52,69,76,106,50,53,53,69,76,106,55,69,76,106,51,69,69,57,116,105,108 +.b8 101,67,111,117,110,116,69 +.b8 0 +.b32 450 +.b8 5 +.b8 157 +.b8 5 +.b8 9 +.b8 3 +.b64 _ZN18cooperative_groups4__v17details4tile13_tile_helpersILj4ELj255ELj7ELj3EE9tileCountE +.b8 95,90,78,49,56,99,111,111,112,101,114,97,116,105,118,101,95,103,114,111,117,112,115,52,95,95,118,49,55,100,101,116,97,105,108,115,52,116,105,108 +.b8 101,49,51,95,116,105,108,101,95,104,101,108,112,101,114,115,73,76,106,52,69,76,106,50,53,53,69,76,106,55,69,76,106,51,69,69,57,116,105,108 +.b8 101,67,111,117,110,116,69 +.b8 0 +.b8 6 +.b8 95,90,78,49,56,99,111,111,112,101,114,97,116,105,118,101,95,103,114,111,117,112,115,52,95,95,118,49,55,100,101,116,97,105,108,115,52,116,105,108 +.b8 101,49,51,95,116,105,108,101,95,104,101,108,112,101,114,115,73,76,106,52,69,76,106,50,53,53,69,76,106,55,69,76,106,51,69,69,56,116,105,108 +.b8 101,77,97,115,107,69 +.b8 0 +.b32 450 +.b8 5 +.b8 158 +.b8 5 +.b8 9 +.b8 3 +.b64 _ZN18cooperative_groups4__v17details4tile13_tile_helpersILj4ELj255ELj7ELj3EE8tileMaskE +.b8 95,90,78,49,56,99,111,111,112,101,114,97,116,105,118,101,95,103,114,111,117,112,115,52,95,95,118,49,55,100,101,116,97,105,108,115,52,116,105,108 +.b8 101,49,51,95,116,105,108,101,95,104,101,108,112,101,114,115,73,76,106,52,69,76,106,50,53,53,69,76,106,55,69,76,106,51,69,69,56,116,105,108 +.b8 101,77,97,115,107,69 +.b8 0 +.b8 6 +.b8 95,90,78,49,56,99,111,111,112,101,114,97,116,105,118,101,95,103,114,111,117,112,115,52,95,95,118,49,55,100,101,116,97,105,108,115,52,116,105,108 +.b8 101,49,51,95,116,105,108,101,95,104,101,108,112,101,114,115,73,76,106,52,69,76,106,50,53,53,69,76,106,55,69,76,106,51,69,69,56,108,97,110 +.b8 101,77,97,115,107,69 +.b8 0 +.b32 450 +.b8 5 +.b8 159 +.b8 5 +.b8 9 +.b8 3 +.b64 _ZN18cooperative_groups4__v17details4tile13_tile_helpersILj4ELj255ELj7ELj3EE8laneMaskE +.b8 95,90,78,49,56,99,111,111,112,101,114,97,116,105,118,101,95,103,114,111,117,112,115,52,95,95,118,49,55,100,101,116,97,105,108,115,52,116,105,108 +.b8 101,49,51,95,116,105,108,101,95,104,101,108,112,101,114,115,73,76,106,52,69,76,106,50,53,53,69,76,106,55,69,76,106,51,69,69,56,108,97,110 +.b8 101,77,97,115,107,69 +.b8 0 +.b8 6 +.b8 95,90,78,49,56,99,111,111,112,101,114,97,116,105,118,101,95,103,114,111,117,112,115,52,95,95,118,49,55,100,101,116,97,105,108,115,52,116,105,108 +.b8 101,49,51,95,116,105,108,101,95,104,101,108,112,101,114,115,73,76,106,52,69,76,106,50,53,53,69,76,106,55,69,76,106,51,69,69,49,48,115,104 +.b8 105,102,116,67,111,117,110,116,69 +.b8 0 +.b32 450 +.b8 5 +.b8 160 +.b8 5 +.b8 9 +.b8 3 +.b64 _ZN18cooperative_groups4__v17details4tile13_tile_helpersILj4ELj255ELj7ELj3EE10shiftCountE +.b8 95,90,78,49,56,99,111,111,112,101,114,97,116,105,118,101,95,103,114,111,117,112,115,52,95,95,118,49,55,100,101,116,97,105,108,115,52,116,105,108 +.b8 101,49,51,95,116,105,108,101,95,104,101,108,112,101,114,115,73,76,106,52,69,76,106,50,53,53,69,76,106,55,69,76,106,51,69,69,49,48,115,104 +.b8 105,102,116,67,111,117,110,116,69 +.b8 0 +.b8 6 +.b8 95,90,78,49,56,99,111,111,112,101,114,97,116,105,118,101,95,103,114,111,117,112,115,52,95,95,118,49,55,100,101,116,97,105,108,115,52,116,105,108 +.b8 101,49,51,95,116,105,108,101,95,104,101,108,112,101,114,115,73,76,106,56,69,76,106,49,53,69,76,106,51,69,76,106,50,69,69,57,116,105,108,101 +.b8 67,111,117,110,116,69 +.b8 0 +.b32 450 +.b8 5 +.b8 157 +.b8 5 +.b8 9 +.b8 3 +.b64 _ZN18cooperative_groups4__v17details4tile13_tile_helpersILj8ELj15ELj3ELj2EE9tileCountE +.b8 95,90,78,49,56,99,111,111,112,101,114,97,116,105,118,101,95,103,114,111,117,112,115,52,95,95,118,49,55,100,101,116,97,105,108,115,52,116,105,108 +.b8 101,49,51,95,116,105,108,101,95,104,101,108,112,101,114,115,73,76,106,56,69,76,106,49,53,69,76,106,51,69,76,106,50,69,69,57,116,105,108,101 +.b8 67,111,117,110,116,69 +.b8 0 +.b8 6 +.b8 95,90,78,49,56,99,111,111,112,101,114,97,116,105,118,101,95,103,114,111,117,112,115,52,95,95,118,49,55,100,101,116,97,105,108,115,52,116,105,108 +.b8 101,49,51,95,116,105,108,101,95,104,101,108,112,101,114,115,73,76,106,56,69,76,106,49,53,69,76,106,51,69,76,106,50,69,69,56,116,105,108,101 +.b8 77,97,115,107,69 +.b8 0 +.b32 450 +.b8 5 +.b8 158 +.b8 5 +.b8 9 +.b8 3 +.b64 _ZN18cooperative_groups4__v17details4tile13_tile_helpersILj8ELj15ELj3ELj2EE8tileMaskE +.b8 95,90,78,49,56,99,111,111,112,101,114,97,116,105,118,101,95,103,114,111,117,112,115,52,95,95,118,49,55,100,101,116,97,105,108,115,52,116,105,108 +.b8 101,49,51,95,116,105,108,101,95,104,101,108,112,101,114,115,73,76,106,56,69,76,106,49,53,69,76,106,51,69,76,106,50,69,69,56,116,105,108,101 +.b8 77,97,115,107,69 +.b8 0 +.b8 6 +.b8 95,90,78,49,56,99,111,111,112,101,114,97,116,105,118,101,95,103,114,111,117,112,115,52,95,95,118,49,55,100,101,116,97,105,108,115,52,116,105,108 +.b8 101,49,51,95,116,105,108,101,95,104,101,108,112,101,114,115,73,76,106,56,69,76,106,49,53,69,76,106,51,69,76,106,50,69,69,56,108,97,110,101 +.b8 77,97,115,107,69 +.b8 0 +.b32 450 +.b8 5 +.b8 159 +.b8 5 +.b8 9 +.b8 3 +.b64 _ZN18cooperative_groups4__v17details4tile13_tile_helpersILj8ELj15ELj3ELj2EE8laneMaskE +.b8 95,90,78,49,56,99,111,111,112,101,114,97,116,105,118,101,95,103,114,111,117,112,115,52,95,95,118,49,55,100,101,116,97,105,108,115,52,116,105,108 +.b8 101,49,51,95,116,105,108,101,95,104,101,108,112,101,114,115,73,76,106,56,69,76,106,49,53,69,76,106,51,69,76,106,50,69,69,56,108,97,110,101 +.b8 77,97,115,107,69 +.b8 0 +.b8 6 +.b8 95,90,78,49,56,99,111,111,112,101,114,97,116,105,118,101,95,103,114,111,117,112,115,52,95,95,118,49,55,100,101,116,97,105,108,115,52,116,105,108 +.b8 101,49,51,95,116,105,108,101,95,104,101,108,112,101,114,115,73,76,106,56,69,76,106,49,53,69,76,106,51,69,76,106,50,69,69,49,48,115,104,105 +.b8 102,116,67,111,117,110,116,69 +.b8 0 +.b32 450 +.b8 5 +.b8 160 +.b8 5 +.b8 9 +.b8 3 +.b64 _ZN18cooperative_groups4__v17details4tile13_tile_helpersILj8ELj15ELj3ELj2EE10shiftCountE +.b8 95,90,78,49,56,99,111,111,112,101,114,97,116,105,118,101,95,103,114,111,117,112,115,52,95,95,118,49,55,100,101,116,97,105,108,115,52,116,105,108 +.b8 101,49,51,95,116,105,108,101,95,104,101,108,112,101,114,115,73,76,106,56,69,76,106,49,53,69,76,106,51,69,76,106,50,69,69,49,48,115,104,105 +.b8 102,116,67,111,117,110,116,69 +.b8 0 +.b8 6 +.b8 95,90,78,49,56,99,111,111,112,101,114,97,116,105,118,101,95,103,114,111,117,112,115,52,95,95,118,49,55,100,101,116,97,105,108,115,52,116,105,108 +.b8 101,49,51,95,116,105,108,101,95,104,101,108,112,101,114,115,73,76,106,49,54,69,76,106,51,69,76,106,49,69,76,106,49,69,69,57,116,105,108,101 +.b8 67,111,117,110,116,69 +.b8 0 +.b32 450 +.b8 5 +.b8 157 +.b8 5 +.b8 9 +.b8 3 +.b64 _ZN18cooperative_groups4__v17details4tile13_tile_helpersILj16ELj3ELj1ELj1EE9tileCountE +.b8 95,90,78,49,56,99,111,111,112,101,114,97,116,105,118,101,95,103,114,111,117,112,115,52,95,95,118,49,55,100,101,116,97,105,108,115,52,116,105,108 +.b8 101,49,51,95,116,105,108,101,95,104,101,108,112,101,114,115,73,76,106,49,54,69,76,106,51,69,76,106,49,69,76,106,49,69,69,57,116,105,108,101 +.b8 67,111,117,110,116,69 +.b8 0 +.b8 6 +.b8 95,90,78,49,56,99,111,111,112,101,114,97,116,105,118,101,95,103,114,111,117,112,115,52,95,95,118,49,55,100,101,116,97,105,108,115,52,116,105,108 +.b8 101,49,51,95,116,105,108,101,95,104,101,108,112,101,114,115,73,76,106,49,54,69,76,106,51,69,76,106,49,69,76,106,49,69,69,56,116,105,108,101 +.b8 77,97,115,107,69 +.b8 0 +.b32 450 +.b8 5 +.b8 158 +.b8 5 +.b8 9 +.b8 3 +.b64 _ZN18cooperative_groups4__v17details4tile13_tile_helpersILj16ELj3ELj1ELj1EE8tileMaskE +.b8 95,90,78,49,56,99,111,111,112,101,114,97,116,105,118,101,95,103,114,111,117,112,115,52,95,95,118,49,55,100,101,116,97,105,108,115,52,116,105,108 +.b8 101,49,51,95,116,105,108,101,95,104,101,108,112,101,114,115,73,76,106,49,54,69,76,106,51,69,76,106,49,69,76,106,49,69,69,56,116,105,108,101 +.b8 77,97,115,107,69 +.b8 0 +.b8 6 +.b8 95,90,78,49,56,99,111,111,112,101,114,97,116,105,118,101,95,103,114,111,117,112,115,52,95,95,118,49,55,100,101,116,97,105,108,115,52,116,105,108 +.b8 101,49,51,95,116,105,108,101,95,104,101,108,112,101,114,115,73,76,106,49,54,69,76,106,51,69,76,106,49,69,76,106,49,69,69,56,108,97,110,101 +.b8 77,97,115,107,69 +.b8 0 +.b32 450 +.b8 5 +.b8 159 +.b8 5 +.b8 9 +.b8 3 +.b64 _ZN18cooperative_groups4__v17details4tile13_tile_helpersILj16ELj3ELj1ELj1EE8laneMaskE +.b8 95,90,78,49,56,99,111,111,112,101,114,97,116,105,118,101,95,103,114,111,117,112,115,52,95,95,118,49,55,100,101,116,97,105,108,115,52,116,105,108 +.b8 101,49,51,95,116,105,108,101,95,104,101,108,112,101,114,115,73,76,106,49,54,69,76,106,51,69,76,106,49,69,76,106,49,69,69,56,108,97,110,101 +.b8 77,97,115,107,69 +.b8 0 +.b8 6 +.b8 95,90,78,49,56,99,111,111,112,101,114,97,116,105,118,101,95,103,114,111,117,112,115,52,95,95,118,49,55,100,101,116,97,105,108,115,52,116,105,108 +.b8 101,49,51,95,116,105,108,101,95,104,101,108,112,101,114,115,73,76,106,49,54,69,76,106,51,69,76,106,49,69,76,106,49,69,69,49,48,115,104,105 +.b8 102,116,67,111,117,110,116,69 +.b8 0 +.b32 450 +.b8 5 +.b8 160 +.b8 5 +.b8 9 +.b8 3 +.b64 _ZN18cooperative_groups4__v17details4tile13_tile_helpersILj16ELj3ELj1ELj1EE10shiftCountE +.b8 95,90,78,49,56,99,111,111,112,101,114,97,116,105,118,101,95,103,114,111,117,112,115,52,95,95,118,49,55,100,101,116,97,105,108,115,52,116,105,108 +.b8 101,49,51,95,116,105,108,101,95,104,101,108,112,101,114,115,73,76,106,49,54,69,76,106,51,69,76,106,49,69,76,106,49,69,69,49,48,115,104,105 +.b8 102,116,67,111,117,110,116,69 +.b8 0 +.b8 6 +.b8 95,90,78,49,56,99,111,111,112,101,114,97,116,105,118,101,95,103,114,111,117,112,115,52,95,95,118,49,55,100,101,116,97,105,108,115,52,116,105,108 +.b8 101,49,51,95,116,105,108,101,95,104,101,108,112,101,114,115,73,76,106,51,50,69,76,106,49,69,76,106,48,69,76,106,48,69,69,57,116,105,108,101 +.b8 67,111,117,110,116,69 +.b8 0 +.b32 450 +.b8 5 +.b8 157 +.b8 5 +.b8 9 +.b8 3 +.b64 _ZN18cooperative_groups4__v17details4tile13_tile_helpersILj32ELj1ELj0ELj0EE9tileCountE +.b8 95,90,78,49,56,99,111,111,112,101,114,97,116,105,118,101,95,103,114,111,117,112,115,52,95,95,118,49,55,100,101,116,97,105,108,115,52,116,105,108 +.b8 101,49,51,95,116,105,108,101,95,104,101,108,112,101,114,115,73,76,106,51,50,69,76,106,49,69,76,106,48,69,76,106,48,69,69,57,116,105,108,101 +.b8 67,111,117,110,116,69 +.b8 0 +.b8 6 +.b8 95,90,78,49,56,99,111,111,112,101,114,97,116,105,118,101,95,103,114,111,117,112,115,52,95,95,118,49,55,100,101,116,97,105,108,115,52,116,105,108 +.b8 101,49,51,95,116,105,108,101,95,104,101,108,112,101,114,115,73,76,106,51,50,69,76,106,49,69,76,106,48,69,76,106,48,69,69,56,116,105,108,101 +.b8 77,97,115,107,69 +.b8 0 +.b32 450 +.b8 5 +.b8 158 +.b8 5 +.b8 9 +.b8 3 +.b64 _ZN18cooperative_groups4__v17details4tile13_tile_helpersILj32ELj1ELj0ELj0EE8tileMaskE +.b8 95,90,78,49,56,99,111,111,112,101,114,97,116,105,118,101,95,103,114,111,117,112,115,52,95,95,118,49,55,100,101,116,97,105,108,115,52,116,105,108 +.b8 101,49,51,95,116,105,108,101,95,104,101,108,112,101,114,115,73,76,106,51,50,69,76,106,49,69,76,106,48,69,76,106,48,69,69,56,116,105,108,101 +.b8 77,97,115,107,69 +.b8 0 +.b8 6 +.b8 95,90,78,49,56,99,111,111,112,101,114,97,116,105,118,101,95,103,114,111,117,112,115,52,95,95,118,49,55,100,101,116,97,105,108,115,52,116,105,108 +.b8 101,49,51,95,116,105,108,101,95,104,101,108,112,101,114,115,73,76,106,51,50,69,76,106,49,69,76,106,48,69,76,106,48,69,69,56,108,97,110,101 +.b8 77,97,115,107,69 +.b8 0 +.b32 450 +.b8 5 +.b8 159 +.b8 5 +.b8 9 +.b8 3 +.b64 _ZN18cooperative_groups4__v17details4tile13_tile_helpersILj32ELj1ELj0ELj0EE8laneMaskE +.b8 95,90,78,49,56,99,111,111,112,101,114,97,116,105,118,101,95,103,114,111,117,112,115,52,95,95,118,49,55,100,101,116,97,105,108,115,52,116,105,108 +.b8 101,49,51,95,116,105,108,101,95,104,101,108,112,101,114,115,73,76,106,51,50,69,76,106,49,69,76,106,48,69,76,106,48,69,69,56,108,97,110,101 +.b8 77,97,115,107,69 +.b8 0 +.b8 6 +.b8 95,90,78,49,56,99,111,111,112,101,114,97,116,105,118,101,95,103,114,111,117,112,115,52,95,95,118,49,55,100,101,116,97,105,108,115,52,116,105,108 +.b8 101,49,51,95,116,105,108,101,95,104,101,108,112,101,114,115,73,76,106,51,50,69,76,106,49,69,76,106,48,69,76,106,48,69,69,49,48,115,104,105 +.b8 102,116,67,111,117,110,116,69 +.b8 0 +.b32 450 +.b8 5 +.b8 160 +.b8 5 +.b8 9 +.b8 3 +.b64 _ZN18cooperative_groups4__v17details4tile13_tile_helpersILj32ELj1ELj0ELj0EE10shiftCountE +.b8 95,90,78,49,56,99,111,111,112,101,114,97,116,105,118,101,95,103,114,111,117,112,115,52,95,95,118,49,55,100,101,116,97,105,108,115,52,116,105,108 +.b8 101,49,51,95,116,105,108,101,95,104,101,108,112,101,114,115,73,76,106,51,50,69,76,106,49,69,76,106,48,69,76,106,48,69,69,49,48,115,104,105 +.b8 102,116,67,111,117,110,116,69 +.b8 0 +.b8 2 +.b8 95,90,78,54,52,95,73,78,84,69,82,78,65,76,95,48,48,48,48,48,48,48,48,95,50,53,95,71,66,95,106,105,116,95,65,120,66,95,100,111 +.b8 116,51,95,112,104,97,115,101,51,95,109,112,95,102,55,49,49,51,55,97,57,95,52,48,50,49,51,57,51,49,56,99,111,111,112,101,114,97,116,105 +.b8 118,101,95,103,114,111,117,112,115,52,95,95,118,49,55,100,101,116,97,105,108,115,52,116,105,108,101,50,48,95,77,101,109,111,114,121,83,104,117,102 +.b8 102,108,101,67,117,116,111,102,102,69 +.b8 0 +.b32 7940 +.b8 5 +.b8 98 +.b8 1 +.b8 5 +.b8 9 +.b8 3 +.b64 _ZN64_INTERNAL_00000000_25_GB_jit_AxB_dot3_phase3_mp_f71137a9_402139318cooperative_groups4__v17details4tile20_MemoryShuffleCutoffE +.b8 95,90,78,54,52,95,73,78,84,69,82,78,65,76,95,48,48,48,48,48,48,48,48,95,50,53,95,71,66,95,106,105,116,95,65,120,66,95,100,111 +.b8 116,51,95,112,104,97,115,101,51,95,109,112,95,102,55,49,49,51,55,97,57,95,52,48,50,49,51,57,51,49,56,99,111,111,112,101,114,97,116,105 +.b8 118,101,95,103,114,111,117,112,115,52,95,95,118,49,55,100,101,116,97,105,108,115,52,116,105,108,101,50,48,95,77,101,109,111,114,121,83,104,117,102 +.b8 102,108,101,67,117,116,111,102,102,69 +.b8 0 +.b8 7 +.b32 7945 +.b8 8 +.b8 117,110,115,105,103,110,101,100,32,108,111,110,103,32,108,111,110,103 +.b8 0 +.b8 7 +.b8 8 +.b8 6 +.b8 95,90,78,54,52,95,73,78,84,69,82,78,65,76,95,48,48,48,48,48,48,48,48,95,50,53,95,71,66,95,106,105,116,95,65,120,66,95,100,111 +.b8 116,51,95,112,104,97,115,101,51,95,109,112,95,102,55,49,49,51,55,97,57,95,52,48,50,49,51,57,51,49,56,99,111,111,112,101,114,97,116,105 +.b8 118,101,95,103,114,111,117,112,115,52,95,95,118,49,55,100,101,116,97,105,108,115,49,56,99,111,97,108,101,115,99,101,100,95,103,114,111,117,112,95 +.b8 105,100,69 +.b8 0 +.b32 450 +.b8 3 +.b8 69 +.b8 5 +.b8 9 +.b8 3 +.b64 _ZN64_INTERNAL_00000000_25_GB_jit_AxB_dot3_phase3_mp_f71137a9_402139318cooperative_groups4__v17details18coalesced_group_idE +.b8 95,90,78,54,52,95,73,78,84,69,82,78,65,76,95,48,48,48,48,48,48,48,48,95,50,53,95,71,66,95,106,105,116,95,65,120,66,95,100,111 +.b8 116,51,95,112,104,97,115,101,51,95,109,112,95,102,55,49,49,51,55,97,57,95,52,48,50,49,51,57,51,49,56,99,111,111,112,101,114,97,116,105 +.b8 118,101,95,103,114,111,117,112,115,52,95,95,118,49,55,100,101,116,97,105,108,115,49,56,99,111,97,108,101,115,99,101,100,95,103,114,111,117,112,95 +.b8 105,100,69 +.b8 0 +.b8 6 +.b8 95,90,78,54,52,95,73,78,84,69,82,78,65,76,95,48,48,48,48,48,48,48,48,95,50,53,95,71,66,95,106,105,116,95,65,120,66,95,100,111 +.b8 116,51,95,112,104,97,115,101,51,95,109,112,95,102,55,49,49,51,55,97,57,95,52,48,50,49,51,57,51,49,56,99,111,111,112,101,114,97,116,105 +.b8 118,101,95,103,114,111,117,112,115,52,95,95,118,49,55,100,101,116,97,105,108,115,49,57,109,117,108,116,105,95,103,114,105,100,95,103,114,111,117,112 +.b8 95,105,100,69 +.b8 0 +.b32 450 +.b8 3 +.b8 70 +.b8 5 +.b8 9 +.b8 3 +.b64 _ZN64_INTERNAL_00000000_25_GB_jit_AxB_dot3_phase3_mp_f71137a9_402139318cooperative_groups4__v17details19multi_grid_group_idE +.b8 95,90,78,54,52,95,73,78,84,69,82,78,65,76,95,48,48,48,48,48,48,48,48,95,50,53,95,71,66,95,106,105,116,95,65,120,66,95,100,111 +.b8 116,51,95,112,104,97,115,101,51,95,109,112,95,102,55,49,49,51,55,97,57,95,52,48,50,49,51,57,51,49,56,99,111,111,112,101,114,97,116,105 +.b8 118,101,95,103,114,111,117,112,115,52,95,95,118,49,55,100,101,116,97,105,108,115,49,57,109,117,108,116,105,95,103,114,105,100,95,103,114,111,117,112 +.b8 95,105,100,69 +.b8 0 +.b8 6 +.b8 95,90,78,54,52,95,73,78,84,69,82,78,65,76,95,48,48,48,48,48,48,48,48,95,50,53,95,71,66,95,106,105,116,95,65,120,66,95,100,111 +.b8 116,51,95,112,104,97,115,101,51,95,109,112,95,102,55,49,49,51,55,97,57,95,52,48,50,49,51,57,51,49,56,99,111,111,112,101,114,97,116,105 +.b8 118,101,95,103,114,111,117,112,115,52,95,95,118,49,55,100,101,116,97,105,108,115,49,51,103,114,105,100,95,103,114,111,117,112,95,105,100,69 +.b8 0 +.b32 450 +.b8 3 +.b8 71 +.b8 5 +.b8 9 +.b8 3 +.b64 _ZN64_INTERNAL_00000000_25_GB_jit_AxB_dot3_phase3_mp_f71137a9_402139318cooperative_groups4__v17details13grid_group_idE +.b8 95,90,78,54,52,95,73,78,84,69,82,78,65,76,95,48,48,48,48,48,48,48,48,95,50,53,95,71,66,95,106,105,116,95,65,120,66,95,100,111 +.b8 116,51,95,112,104,97,115,101,51,95,109,112,95,102,55,49,49,51,55,97,57,95,52,48,50,49,51,57,51,49,56,99,111,111,112,101,114,97,116,105 +.b8 118,101,95,103,114,111,117,112,115,52,95,95,118,49,55,100,101,116,97,105,108,115,49,51,103,114,105,100,95,103,114,111,117,112,95,105,100,69 +.b8 0 +.b8 6 +.b8 95,90,78,54,52,95,73,78,84,69,82,78,65,76,95,48,48,48,48,48,48,48,48,95,50,53,95,71,66,95,106,105,116,95,65,120,66,95,100,111 +.b8 116,51,95,112,104,97,115,101,51,95,109,112,95,102,55,49,49,51,55,97,57,95,52,48,50,49,51,57,51,49,56,99,111,111,112,101,114,97,116,105 +.b8 118,101,95,103,114,111,117,112,115,52,95,95,118,49,55,100,101,116,97,105,108,115,49,53,116,104,114,101,97,100,95,98,108,111,99,107,95,105,100,69 +.b8 0 +.b32 450 +.b8 3 +.b8 72 +.b8 5 +.b8 9 +.b8 3 +.b64 _ZN64_INTERNAL_00000000_25_GB_jit_AxB_dot3_phase3_mp_f71137a9_402139318cooperative_groups4__v17details15thread_block_idE +.b8 95,90,78,54,52,95,73,78,84,69,82,78,65,76,95,48,48,48,48,48,48,48,48,95,50,53,95,71,66,95,106,105,116,95,65,120,66,95,100,111 +.b8 116,51,95,112,104,97,115,101,51,95,109,112,95,102,55,49,49,51,55,97,57,95,52,48,50,49,51,57,51,49,56,99,111,111,112,101,114,97,116,105 +.b8 118,101,95,103,114,111,117,112,115,52,95,95,118,49,55,100,101,116,97,105,108,115,49,53,116,104,114,101,97,100,95,98,108,111,99,107,95,105,100,69 +.b8 0 +.b8 6 +.b8 95,90,78,54,52,95,73,78,84,69,82,78,65,76,95,48,48,48,48,48,48,48,48,95,50,53,95,71,66,95,106,105,116,95,65,120,66,95,100,111 +.b8 116,51,95,112,104,97,115,101,51,95,109,112,95,102,55,49,49,51,55,97,57,95,52,48,50,49,51,57,51,49,56,99,111,111,112,101,114,97,116,105 +.b8 118,101,95,103,114,111,117,112,115,52,95,95,118,49,55,100,101,116,97,105,108,115,49,57,109,117,108,116,105,95,116,105,108,101,95,103,114,111,117,112 +.b8 95,105,100,69 +.b8 0 +.b32 450 +.b8 3 +.b8 73 +.b8 5 +.b8 9 +.b8 3 +.b64 _ZN64_INTERNAL_00000000_25_GB_jit_AxB_dot3_phase3_mp_f71137a9_402139318cooperative_groups4__v17details19multi_tile_group_idE +.b8 95,90,78,54,52,95,73,78,84,69,82,78,65,76,95,48,48,48,48,48,48,48,48,95,50,53,95,71,66,95,106,105,116,95,65,120,66,95,100,111 +.b8 116,51,95,112,104,97,115,101,51,95,109,112,95,102,55,49,49,51,55,97,57,95,52,48,50,49,51,57,51,49,56,99,111,111,112,101,114,97,116,105 +.b8 118,101,95,103,114,111,117,112,115,52,95,95,118,49,55,100,101,116,97,105,108,115,49,57,109,117,108,116,105,95,116,105,108,101,95,103,114,111,117,112 +.b8 95,105,100,69 +.b8 0 +.b8 3 +.b32 9325 +.b8 95,90,78,49,56,95,95,106,105,116,105,102,121,95,115,116,100,105,110,116,95,110,115,55,105,110,116,54,52,95,116,69 +.b8 0 +.b8 6 +.b8 8 +.b8 8 +.b8 108,111,110,103,32,108,111,110,103 +.b8 0 +.b8 5 +.b8 8 +.b8 3 +.b32 9352 +.b8 115,105,122,101,95,116 +.b8 0 +.b8 7 +.b8 55 +.b8 8 +.b8 117,110,115,105,103,110,101,100,32,108,111,110,103 +.b8 0 +.b8 7 +.b8 8 +.b8 9 +.b8 71,66,95,77,97,116,114,105,120,95,111,112,97,113,117,101 +.b8 0 +.b8 208 +.b8 1 +.b8 247 +.b8 1 +.b8 10 +.b8 109,97,103,105,99 +.b8 0 +.b32 9285 +.b8 8 +.b8 33 +.b8 2 +.b8 35 +.b8 0 +.b8 10 +.b8 104,101,97,100,101,114,95,115,105,122,101 +.b8 0 +.b32 9338 +.b8 8 +.b8 34 +.b8 2 +.b8 35 +.b8 8 +.b8 10 +.b8 108,111,103,103,101,114 +.b8 0 +.b32 10089 +.b8 8 +.b8 35 +.b8 2 +.b8 35 +.b8 16 +.b8 10 +.b8 108,111,103,103,101,114,95,115,105,122,101 +.b8 0 +.b32 9338 +.b8 8 +.b8 36 +.b8 2 +.b8 35 +.b8 24 +.b8 10 +.b8 116,121,112,101 +.b8 0 +.b32 10106 +.b8 8 +.b8 40 +.b8 2 +.b8 35 +.b8 32 +.b8 5 +.b8 71,66,95,84,121,112,101,95,111,112,97,113,117,101 +.b8 0 +.b8 0 +.b8 1 +.b8 129 +.b8 1 +.b8 10 +.b8 112,108,101,110 +.b8 0 +.b32 9285 +.b8 8 +.b8 205 +.b8 2 +.b8 35 +.b8 40 +.b8 10 +.b8 118,108,101,110 +.b8 0 +.b32 9285 +.b8 8 +.b8 206 +.b8 2 +.b8 35 +.b8 48 +.b8 10 +.b8 118,100,105,109 +.b8 0 +.b32 9285 +.b8 8 +.b8 207 +.b8 2 +.b8 35 +.b8 56 +.b8 10 +.b8 110,118,101,99 +.b8 0 +.b32 9285 +.b8 8 +.b8 208 +.b8 2 +.b8 35 +.b8 64 +.b8 10 +.b8 110,118,101,99,95,110,111,110,101,109,112,116,121 +.b8 0 +.b32 9285 +.b8 8 +.b8 211 +.b8 2 +.b8 35 +.b8 72 +.b8 10 +.b8 104 +.b8 0 +.b32 10131 +.b8 8 +.b8 214 +.b8 2 +.b8 35 +.b8 80 +.b8 10 +.b8 112 +.b8 0 +.b32 10131 +.b8 8 +.b8 215 +.b8 2 +.b8 35 +.b8 88 +.b8 10 +.b8 105 +.b8 0 +.b32 10131 +.b8 8 +.b8 216 +.b8 2 +.b8 35 +.b8 96 +.b8 10 +.b8 120 +.b8 0 +.b32 10140 +.b8 8 +.b8 217 +.b8 2 +.b8 35 +.b8 104 +.b8 10 +.b8 98 +.b8 0 +.b32 10155 +.b8 8 +.b8 219 +.b8 2 +.b8 35 +.b8 112 +.b8 10 +.b8 110,118,97,108,115 +.b8 0 +.b32 9285 +.b8 8 +.b8 220 +.b8 2 +.b8 35 +.b8 120 +.b8 10 +.b8 112,95,115,105,122,101 +.b8 0 +.b32 9338 +.b8 8 +.b8 222 +.b8 3 +.b8 35 +.b8 128,1 +.b8 10 +.b8 104,95,115,105,122,101 +.b8 0 +.b32 9338 +.b8 8 +.b8 223 +.b8 3 +.b8 35 +.b8 136,1 +.b8 10 +.b8 98,95,115,105,122,101 +.b8 0 +.b32 9338 +.b8 8 +.b8 224 +.b8 3 +.b8 35 +.b8 144,1 +.b8 10 +.b8 105,95,115,105,122,101 +.b8 0 +.b32 9338 +.b8 8 +.b8 225 +.b8 3 +.b8 35 +.b8 152,1 +.b8 10 +.b8 120,95,115,105,122,101 +.b8 0 +.b32 9338 +.b8 8 +.b8 226 +.b8 3 +.b8 35 +.b8 160,1 +.b8 11 +.b8 80,101,110,100,105,110,103 +.b8 0 +.b32 10218 +.b8 8 +.b8 21 +.b8 1 +.b8 3 +.b8 35 +.b8 168,1 +.b8 5 +.b8 71,66,95,80,101,110,100,105,110,103,95,115,116,114,117,99,116 +.b8 0 +.b8 0 +.b8 1 +.b8 214 +.b8 1 +.b8 11 +.b8 110,122,111,109,98,105,101,115 +.b8 0 +.b32 10246 +.b8 8 +.b8 51 +.b8 1 +.b8 3 +.b8 35 +.b8 176,1 +.b8 11 +.b8 104,121,112,101,114,95,115,119,105,116,99,104 +.b8 0 +.b32 10287 +.b8 8 +.b8 116 +.b8 1 +.b8 3 +.b8 35 +.b8 184,1 +.b8 11 +.b8 98,105,116,109,97,112,95,115,119,105,116,99,104 +.b8 0 +.b32 10287 +.b8 8 +.b8 117 +.b8 1 +.b8 3 +.b8 35 +.b8 188,1 +.b8 11 +.b8 115,112,97,114,115,105,116,121,95,99,111,110,116,114,111,108 +.b8 0 +.b32 10296 +.b8 8 +.b8 118 +.b8 1 +.b8 3 +.b8 35 +.b8 192,1 +.b8 11 +.b8 112,95,115,104,97,108,108,111,119 +.b8 0 +.b32 2753 +.b8 8 +.b8 135 +.b8 1 +.b8 3 +.b8 35 +.b8 196,1 +.b8 11 +.b8 104,95,115,104,97,108,108,111,119 +.b8 0 +.b32 2753 +.b8 8 +.b8 136 +.b8 1 +.b8 3 +.b8 35 +.b8 197,1 +.b8 11 +.b8 98,95,115,104,97,108,108,111,119 +.b8 0 +.b32 2753 +.b8 8 +.b8 137 +.b8 1 +.b8 3 +.b8 35 +.b8 198,1 +.b8 11 +.b8 105,95,115,104,97,108,108,111,119 +.b8 0 +.b32 2753 +.b8 8 +.b8 138 +.b8 1 +.b8 3 +.b8 35 +.b8 199,1 +.b8 11 +.b8 120,95,115,104,97,108,108,111,119 +.b8 0 +.b32 2753 +.b8 8 +.b8 139 +.b8 1 +.b8 3 +.b8 35 +.b8 200,1 +.b8 11 +.b8 115,116,97,116,105,99,95,104,101,97,100,101,114 +.b8 0 +.b32 2753 +.b8 8 +.b8 140 +.b8 1 +.b8 3 +.b8 35 +.b8 201,1 +.b8 11 +.b8 105,115,95,99,115,99 +.b8 0 +.b32 2753 +.b8 8 +.b8 146 +.b8 1 +.b8 3 +.b8 35 +.b8 202,1 +.b8 11 +.b8 106,117,109,98,108,101,100 +.b8 0 +.b32 2753 +.b8 8 +.b8 147 +.b8 1 +.b8 3 +.b8 35 +.b8 203,1 +.b8 11 +.b8 105,115,111 +.b8 0 +.b32 2753 +.b8 8 +.b8 172 +.b8 1 +.b8 3 +.b8 35 +.b8 204,1 +.b8 0 +.b8 4 +.b32 10098 +.b32 12 +.b8 8 +.b8 99,104,97,114 +.b8 0 +.b8 6 +.b8 1 +.b8 3 +.b32 10122 +.b8 71,114,66,95,84,121,112,101 +.b8 0 +.b8 2 +.b8 67 +.b8 4 +.b32 9483 +.b32 12 +.b8 4 +.b32 9285 +.b32 12 +.b8 4 +.b32 10149 +.b32 12 +.b8 12 +.b8 118,111,105,100 +.b8 0 +.b8 4 +.b32 10164 +.b32 12 +.b8 3 +.b32 10203 +.b8 95,90,78,49,56,95,95,106,105,116,105,102,121,95,115,116,100,105,110,116,95,110,115,54,105,110,116,56,95,116,69 +.b8 0 +.b8 6 +.b8 5 +.b8 8 +.b8 115,105,103,110,101,100,32,99,104,97,114 +.b8 0 +.b8 6 +.b8 1 +.b8 13 +.b32 10237 +.b8 71,66,95,80,101,110,100,105,110,103 +.b8 0 +.b8 1 +.b8 231 +.b8 1 +.b8 4 +.b32 9773 +.b32 12 +.b8 3 +.b32 7945 +.b8 95,90,78,49,56,95,95,106,105,116,105,102,121,95,115,116,100,105,110,116,95,110,115,56,117,105,110,116,54,52,95,116,69 +.b8 0 +.b8 6 +.b8 22 +.b8 8 +.b8 102,108,111,97,116 +.b8 0 +.b8 4 +.b8 4 +.b8 8 +.b8 105,110,116 +.b8 0 +.b8 5 +.b8 4 +.b8 3 +.b32 10321 +.b8 71,114,66,95,77,97,116,114,105,120 +.b8 0 +.b8 2 +.b8 76 +.b8 4 +.b32 9369 +.b32 12 +.b8 3 +.b32 10296 +.b8 95,90,78,49,56,95,95,106,105,116,105,102,121,95,115,116,100,105,110,116,95,110,115,55,105,110,116,51,50,95,116,69 +.b8 0 +.b8 6 +.b8 7 +.b8 4 +.b32 14534 +.b32 12 +.b8 9 +.b8 100,105,109,51 +.b8 0 +.b8 12 +.b8 7 +.b8 205 +.b8 3 +.b8 11 +.b8 120 +.b8 0 +.b32 455 +.b8 7 +.b8 207 +.b8 3 +.b8 2 +.b8 35 +.b8 0 +.b8 11 +.b8 121 +.b8 0 +.b32 455 +.b8 7 +.b8 207 +.b8 3 +.b8 2 +.b8 35 +.b8 4 +.b8 11 +.b8 122 +.b8 0 +.b32 455 +.b8 7 +.b8 207 +.b8 3 +.b8 2 +.b8 35 +.b8 8 +.b8 0 +.b8 9 +.b8 117,105,110,116,51 +.b8 0 +.b8 12 +.b8 7 +.b8 32 +.b8 3 +.b8 11 +.b8 120 +.b8 0 +.b32 455 +.b8 7 +.b8 34 +.b8 3 +.b8 2 +.b8 35 +.b8 0 +.b8 11 +.b8 121 +.b8 0 +.b32 455 +.b8 7 +.b8 34 +.b8 3 +.b8 2 +.b8 35 +.b8 4 +.b8 11 +.b8 122 +.b8 0 +.b32 455 +.b8 7 +.b8 34 +.b8 3 +.b8 2 +.b8 35 +.b8 8 +.b8 0 +.b8 13 +.b32 10429 +.b8 117,105,110,116,51 +.b8 0 +.b8 7 +.b8 178 +.b8 3 +.b8 14 +.b8 95,90,78,49,56,99,111,111,112,101,114,97,116,105,118,101,95,103,114,111,117,112,115,52,95,95,118,49,49,55,116,104,114,101,97,100,95,103,114,111 +.b8 117,112,95,98,97,115,101,73,76,106,52,69,69,69 +.b8 0 +.b8 16 +.b8 3 +.b8 162 +.b8 15 +.b8 95,95,98,95,78,49,56,99,111,111,112,101,114,97,116,105,118,101,95,103,114,111,117,112,115,52,95,95,118,49,49,50,116,104,114,101,97,100,95,103 +.b8 114,111,117,112,69 +.b8 0 +.b32 13983 +.b8 2 +.b8 35 +.b8 0 +.b8 0 +.b8 9 +.b8 95,90,78,49,56,99,111,111,112,101,114,97,116,105,118,101,95,103,114,111,117,112,115,52,95,95,118,49,49,50,116,104,114,101,97,100,95,98,108,111 +.b8 99,107,69 +.b8 0 +.b8 16 +.b8 3 +.b8 61 +.b8 2 +.b8 15 +.b8 95,95,98,95,78,49,56,99,111,111,112,101,114,97,116,105,118,101,95,103,114,111,117,112,115,52,95,95,118,49,49,55,116,104,114,101,97,100,95,103 +.b8 114,111,117,112,95,98,97,115,101,73,76,106,52,69,69,69 +.b8 0 +.b32 10494 +.b8 2 +.b8 35 +.b8 0 +.b8 0 +.b8 5 +.b8 95,90,78,49,56,99,111,111,112,101,114,97,116,105,118,101,95,103,114,111,117,112,115,52,95,95,118,49,51,54,95,95,115,116,97,116,105,99,95,115 +.b8 105,122,101,95,116,104,114,101,97,100,95,98,108,111,99,107,95,116,105,108,101,95,98,97,115,101,73,76,106,51,50,69,69,69 +.b8 0 +.b8 1 +.b8 3 +.b8 229 +.b8 3 +.b8 3 +.b32 10330 +.b8 95,90,78,49,56,99,111,111,112,101,114,97,116,105,118,101,95,103,114,111,117,112,115,52,95,95,118,49,55,100,101,116,97,105,108,115,57,116,101,109 +.b8 112,108,97,116,101,115,49,54,114,101,109,111,118,101,95,114,101,102,101,114,101,110,99,101,73,82,105,69,52,116,121,112,101,69 +.b8 0 +.b8 4 +.b8 213 +.b8 3 +.b32 10803 +.b8 95,90,78,49,56,99,111,111,112,101,114,97,116,105,118,101,95,103,114,111,117,112,115,52,95,95,118,49,55,100,101,116,97,105,108,115,57,116,101,109 +.b8 112,108,97,116,101,115,49,50,114,101,109,111,118,101,95,99,111,110,115,116,73,105,69,52,116,121,112,101,69 +.b8 0 +.b8 4 +.b8 219 +.b8 3 +.b32 10885 +.b8 95,90,78,49,56,99,111,111,112,101,114,97,116,105,118,101,95,103,114,111,117,112,115,52,95,95,118,49,55,100,101,116,97,105,108,115,57,116,101,109 +.b8 112,108,97,116,101,115,49,53,114,101,109,111,118,101,95,118,111,108,97,116,105,108,101,73,105,69,52,116,121,112,101,69 +.b8 0 +.b8 4 +.b8 222 +.b8 3 +.b32 10962 +.b8 95,90,78,49,56,99,111,111,112,101,114,97,116,105,118,101,95,103,114,111,117,112,115,52,95,95,118,49,55,100,101,116,97,105,108,115,57,116,101,109 +.b8 112,108,97,116,101,115,57,114,101,109,111,118,101,95,99,118,73,105,69,52,116,121,112,101,69 +.b8 0 +.b8 4 +.b8 225 +.b8 5 +.b8 95,90,78,49,56,99,111,111,112,101,114,97,116,105,118,101,95,103,114,111,117,112,115,52,95,95,118,49,49,55,116,104,114,101,97,100,95,98,108,111 +.b8 99,107,95,116,105,108,101,73,76,106,51,50,69,78,83,48,95,49,50,116,104,114,101,97,100,95,98,108,111,99,107,69,69,69 +.b8 0 +.b8 1 +.b8 3 +.b8 66 +.b8 6 +.b8 5 +.b8 95,90,78,49,56,99,111,111,112,101,114,97,116,105,118,101,95,103,114,111,117,112,115,52,95,95,118,49,51,49,95,95,115,105,110,103,108,101,95,119 +.b8 97,114,112,95,116,104,114,101,97,100,95,98,108,111,99,107,95,116,105,108,101,73,76,106,51,50,69,78,83,48,95,49,50,116,104,114,101,97,100,95 +.b8 98,108,111,99,107,69,69,69 +.b8 0 +.b8 1 +.b8 3 +.b8 114 +.b8 4 +.b8 5 +.b8 95,90,78,49,56,99,111,111,112,101,114,97,116,105,118,101,95,103,114,111,117,112,115,52,95,95,118,49,55,100,101,116,97,105,108,115,50,50,116,104 +.b8 114,101,97,100,95,98,108,111,99,107,95,116,105,108,101,95,105,109,112,108,73,76,106,51,50,69,78,83,48,95,49,50,116,104,114,101,97,100,95,98 +.b8 108,111,99,107,69,76,98,48,69,69,69 +.b8 0 +.b8 1 +.b8 3 +.b8 31 +.b8 6 +.b8 5 +.b8 95,90,78,49,56,99,111,111,112,101,114,97,116,105,118,101,95,103,114,111,117,112,115,52,95,95,118,49,55,100,101,116,97,105,108,115,50,48,116,105 +.b8 108,101,100,95,112,97,114,116,105,116,105,111,110,95,105,109,112,108,73,76,106,51,50,69,78,83,48,95,49,50,116,104,114,101,97,100,95,98,108,111 +.b8 99,107,69,69,69 +.b8 0 +.b8 1 +.b8 3 +.b8 107 +.b8 6 +.b8 3 +.b32 10803 +.b8 95,90,78,49,56,99,111,111,112,101,114,97,116,105,118,101,95,103,114,111,117,112,115,52,95,95,118,49,55,100,101,116,97,105,108,115,57,116,101,109 +.b8 112,108,97,116,101,115,49,56,114,101,109,111,118,101,95,114,101,102,101,114,101,110,99,101,95,116,73,82,105,69,69 +.b8 0 +.b8 4 +.b8 217 +.b8 13 +.b32 10379 +.b8 100,105,109,51 +.b8 0 +.b8 7 +.b8 220 +.b8 3 +.b8 16 +.b8 95,90,78,49,56,99,111,111,112,101,114,97,116,105,118,101,95,103,114,111,117,112,115,52,95,95,118,49,49,50,116,104,114,101,97,100,95,98,108,111 +.b8 99,107,67,49,69,118 +.b8 0 +.b8 116,104,114,101,97,100,95,98,108,111,99,107 +.b8 0 +.b8 3 +.b8 93 +.b8 2 +.b32 10149 +.b8 1 +.b8 17 +.b8 116,104,105,115 +.b8 0 +.b32 11649 +.b8 0 +.b8 7 +.b32 11654 +.b8 4 +.b32 10608 +.b32 12 +.b8 18 +.b8 95,90,78,49,56,99,111,111,112,101,114,97,116,105,118,101,95,103,114,111,117,112,115,52,95,95,118,49,49,55,116,104,105,115,95,116,104,114,101,97 +.b8 100,95,98,108,111,99,107,69,118 +.b8 0 +.b8 116,104,105,115,95,116,104,114,101,97,100,95,98,108,111,99,107 +.b8 0 +.b8 3 +.b8 168 +.b8 2 +.b32 10608 +.b8 1 +.b8 16 +.b8 95,90,78,49,56,99,111,111,112,101,114,97,116,105,118,101,95,103,114,111,117,112,115,52,95,95,118,49,49,53,116,105,108,101,100,95,112,97,114,116 +.b8 105,116,105,111,110,73,76,106,51,50,69,78,83,48,95,49,50,116,104,114,101,97,100,95,98,108,111,99,107,69,69,69,78,83,48,95,49,55,116,104 +.b8 114,101,97,100,95,98,108,111,99,107,95,116,105,108,101,73,88,84,95,69,84,48,95,69,69,82,75,83,52,95 +.b8 0 +.b8 116,105,108,101,100,95,112,97,114,116,105,116,105,111,110,60,51,50,85,44,116,104,114,101,97,100,95,98,108,111,99,107,62 +.b8 0 +.b8 3 +.b8 142 +.b8 6 +.b32 11115 +.b8 1 +.b8 19 +.b8 103 +.b8 0 +.b8 3 +.b8 142 +.b8 6 +.b32 11905 +.b8 0 +.b8 20 +.b32 11910 +.b8 7 +.b32 10608 +.b8 16 +.b8 95,90,78,49,56,99,111,111,112,101,114,97,116,105,118,101,95,103,114,111,117,112,115,52,95,95,118,49,55,100,101,116,97,105,108,115,50,48,116,105 +.b8 108,101,100,95,112,97,114,116,105,116,105,111,110,95,105,109,112,108,73,76,106,51,50,69,78,83,48,95,49,50,116,104,114,101,97,100,95,98,108,111 +.b8 99,107,69,69,67,49,69,82,75,83,51,95 +.b8 0 +.b8 116,105,108,101,100,95,112,97,114,116,105,116,105,111,110,95,105,109,112,108 +.b8 0 +.b8 3 +.b8 111 +.b8 6 +.b32 10149 +.b8 1 +.b8 17 +.b8 116,104,105,115 +.b8 0 +.b32 12059 +.b8 19 +.b8 103 +.b8 0 +.b8 3 +.b8 111 +.b8 6 +.b32 11905 +.b8 0 +.b8 7 +.b32 12064 +.b8 4 +.b32 11386 +.b32 12 +.b8 16 +.b8 95,90,78,75,49,56,99,111,111,112,101,114,97,116,105,118,101,95,103,114,111,117,112,115,52,95,95,118,49,49,55,116,104,114,101,97,100,95,98,108 +.b8 111,99,107,95,116,105,108,101,73,76,106,51,50,69,78,83,48,95,49,50,116,104,114,101,97,100,95,98,108,111,99,107,69,69,99,118,78,83,49,95 +.b8 73,76,106,51,50,69,118,69,69,69,118 +.b8 0 +.b8 111,112,101,114,97,116,111,114,32,99,111,111,112,101,114,97,116,105,118,101,95,103,114,111,117,112,115,58,58,95,95,118,49,58,58,116,104,114,101,97 +.b8 100,95,98,108,111,99,107,95,116,105,108,101,60,51,50,85,44,32,118,111,105,100,62 +.b8 0 +.b8 3 +.b8 75 +.b8 6 +.b32 13423 +.b8 1 +.b8 17 +.b8 116,104,105,115 +.b8 0 +.b32 12249 +.b8 0 +.b8 7 +.b32 12254 +.b8 4 +.b32 12263 +.b32 12 +.b8 7 +.b32 11115 +.b8 16 +.b8 95,90,78,49,56,99,111,111,112,101,114,97,116,105,118,101,95,103,114,111,117,112,115,52,95,95,118,49,49,55,116,104,114,101,97,100,95,98,108,111 +.b8 99,107,95,116,105,108,101,73,76,106,51,50,69,118,69,67,49,73,78,83,48,95,49,50,116,104,114,101,97,100,95,98,108,111,99,107,69,69,69,82 +.b8 75,78,83,49,95,73,76,106,51,50,69,84,95,69,69 +.b8 0 +.b8 116,104,114,101,97,100,95,98,108,111,99,107,95,116,105,108,101,60,116,104,114,101,97,100,95,98,108,111,99,107,62 +.b8 0 +.b8 3 +.b8 93 +.b8 6 +.b32 10149 +.b8 1 +.b8 17 +.b8 116,104,105,115 +.b8 0 +.b32 12426 +.b8 19 +.b8 103 +.b8 0 +.b8 3 +.b8 93 +.b8 6 +.b32 12440 +.b8 0 +.b8 7 +.b32 12431 +.b8 4 +.b32 13423 +.b32 12 +.b8 20 +.b32 12263 +.b8 16 +.b8 95,90,78,49,56,99,111,111,112,101,114,97,116,105,118,101,95,103,114,111,117,112,115,52,95,95,118,49,51,54,95,95,115,116,97,116,105,99,95,115 +.b8 105,122,101,95,116,104,114,101,97,100,95,98,108,111,99,107,95,116,105,108,101,95,98,97,115,101,73,76,106,51,50,69,69,49,48,98,117,105,108,100 +.b8 95,109,97,115,107,69,118 +.b8 0 +.b8 98,117,105,108,100,95,109,97,115,107 +.b8 0 +.b8 3 +.b8 243 +.b8 3 +.b32 455 +.b8 1 +.b8 21 +.b8 22 +.b8 109,97,115,107 +.b8 0 +.b8 3 +.b8 244 +.b8 3 +.b32 455 +.b8 21 +.b8 22 +.b8 108,97,110,101,73,100 +.b8 0 +.b8 3 +.b8 247 +.b8 3 +.b32 455 +.b8 0 +.b8 0 +.b8 0 +.b8 16 +.b8 95,90,78,75,49,56,99,111,111,112,101,114,97,116,105,118,101,95,103,114,111,117,112,115,52,95,95,118,49,51,54,95,95,115,116,97,116,105,99,95 +.b8 115,105,122,101,95,116,104,114,101,97,100,95,98,108,111,99,107,95,116,105,108,101,95,98,97,115,101,73,76,106,51,50,69,69,51,97,110,121,69,105 +.b8 0 +.b8 97,110,121 +.b8 0 +.b8 3 +.b8 56 +.b8 4 +.b32 10296 +.b8 1 +.b8 17 +.b8 116,104,105,115 +.b8 0 +.b32 12731 +.b8 19 +.b8 112,114,101,100,105,99,97,116,101 +.b8 0 +.b8 3 +.b8 56 +.b8 4 +.b32 10296 +.b8 21 +.b8 22 +.b8 108,97,110,101,95,98,97,108,108,111,116 +.b8 0 +.b8 3 +.b8 57 +.b8 4 +.b32 455 +.b8 0 +.b8 0 +.b8 7 +.b32 12736 +.b8 4 +.b32 12745 +.b32 12 +.b8 7 +.b32 10723 +.b8 23 +.b64 $L__func_begin0 +.b64 $L__func_end0 +.b8 1 +.b8 156 +.b8 95,90,49,56,65,120,66,95,100,111,116,51,95,112,104,97,115,101,51,95,109,112,73,105,105,105,69,118,120,120,80,120,80,49,54,71,66,95,77,97 +.b8 116,114,105,120,95,111,112,97,113,117,101,83,50,95,83,50,95,83,50,95,105 +.b8 0 +.b8 65,120,66,95,100,111,116,51,95,112,104,97,115,101,51,95,109,112,60,105,110,116,51,50,95,116,44,105,110,116,51,50,95,116,44,105,110,116,51,50 +.b8 95,116,62 +.b8 0 +.b8 10 +.b8 76 +.b32 10149 +.b8 1 +.b8 24 +.b8 9 +.b8 3 +.b64 _Z18AxB_dot3_phase3_mpIiiiEvxxPxP16GB_Matrix_opaqueS2_S2_S2_i_param_0 +.b8 7 +.b8 115,116,97,114,116 +.b8 0 +.b8 10 +.b8 78 +.b32 9285 +.b8 24 +.b8 9 +.b8 3 +.b64 _Z18AxB_dot3_phase3_mpIiiiEvxxPxP16GB_Matrix_opaqueS2_S2_S2_i_param_1 +.b8 7 +.b8 101,110,100 +.b8 0 +.b8 10 +.b8 79 +.b32 9285 +.b8 24 +.b8 7 +.b8 144 +.b8 184 +.b8 230 +.b8 144 +.b8 147 +.b8 215 +.b8 4 +.b8 2 +.b8 66,117,99,107,101,116 +.b8 0 +.b8 10 +.b8 80 +.b32 10131 +.b8 24 +.b8 9 +.b8 3 +.b64 _Z18AxB_dot3_phase3_mpIiiiEvxxPxP16GB_Matrix_opaqueS2_S2_S2_i_param_3 +.b8 7 +.b8 67 +.b8 0 +.b8 10 +.b8 81 +.b32 10303 +.b8 24 +.b8 9 +.b8 3 +.b64 _Z18AxB_dot3_phase3_mpIiiiEvxxPxP16GB_Matrix_opaqueS2_S2_S2_i_param_4 +.b8 7 +.b8 77 +.b8 0 +.b8 10 +.b8 82 +.b32 10303 +.b8 24 +.b8 9 +.b8 3 +.b64 _Z18AxB_dot3_phase3_mpIiiiEvxxPxP16GB_Matrix_opaqueS2_S2_S2_i_param_5 +.b8 7 +.b8 65 +.b8 0 +.b8 10 +.b8 83 +.b32 10303 +.b8 24 +.b8 9 +.b8 3 +.b64 _Z18AxB_dot3_phase3_mpIiiiEvxxPxP16GB_Matrix_opaqueS2_S2_S2_i_param_6 +.b8 7 +.b8 66 +.b8 0 +.b8 10 +.b8 84 +.b32 10303 +.b8 24 +.b8 6 +.b8 144 +.b8 184 +.b8 236 +.b8 200 +.b8 171 +.b8 2 +.b8 2 +.b8 115,122 +.b8 0 +.b8 10 +.b8 85 +.b32 10296 +.b8 25 +.b64 $L__tmp0 +.b64 $L__tmp178 +.b8 26 +.b8 6 +.b8 12 +.b8 3 +.b64 __local_depot0 +.b8 35 +.b8 128,1 +.b8 116,105,108,101 +.b8 0 +.b8 10 +.b8 118 +.b32 13423 +.b8 27 +.b8 6 +.b8 144 +.b8 177 +.b8 200 +.b8 201 +.b8 171 +.b8 2 +.b8 2 +.b8 65,120 +.b8 0 +.b8 10 +.b8 90 +.b32 16686 +.b8 27 +.b8 6 +.b8 144 +.b8 178 +.b8 200 +.b8 201 +.b8 171 +.b8 2 +.b8 2 +.b8 66,120 +.b8 0 +.b8 10 +.b8 91 +.b32 16686 +.b8 27 +.b8 6 +.b8 144 +.b8 179 +.b8 200 +.b8 201 +.b8 171 +.b8 2 +.b8 2 +.b8 67,120 +.b8 0 +.b8 10 +.b8 92 +.b32 16686 +.b8 27 +.b8 6 +.b8 144 +.b8 180 +.b8 200 +.b8 201 +.b8 171 +.b8 2 +.b8 2 +.b8 67,105 +.b8 0 +.b8 10 +.b8 93 +.b32 10131 +.b8 27 +.b8 6 +.b8 144 +.b8 181 +.b8 200 +.b8 201 +.b8 171 +.b8 2 +.b8 2 +.b8 77,105 +.b8 0 +.b8 10 +.b8 94 +.b32 10131 +.b8 27 +.b8 6 +.b8 144 +.b8 182 +.b8 200 +.b8 201 +.b8 171 +.b8 2 +.b8 2 +.b8 65,105 +.b8 0 +.b8 10 +.b8 95 +.b32 10131 +.b8 27 +.b8 6 +.b8 144 +.b8 183 +.b8 200 +.b8 201 +.b8 171 +.b8 2 +.b8 2 +.b8 66,105 +.b8 0 +.b8 10 +.b8 96 +.b32 10131 +.b8 27 +.b8 6 +.b8 144 +.b8 184 +.b8 200 +.b8 201 +.b8 171 +.b8 2 +.b8 2 +.b8 65,112 +.b8 0 +.b8 10 +.b8 97 +.b32 10131 +.b8 27 +.b8 6 +.b8 144 +.b8 185 +.b8 200 +.b8 201 +.b8 171 +.b8 2 +.b8 2 +.b8 66,112 +.b8 0 +.b8 10 +.b8 98 +.b32 10131 +.b8 28 +.b32 .debug_loc +.b8 122,99 +.b8 0 +.b8 10 +.b8 102 +.b32 10296 +.b8 27 +.b8 6 +.b8 144 +.b8 181 +.b8 238 +.b8 200 +.b8 171 +.b8 2 +.b8 2 +.b8 116,105,100,95,103,108,111,98,97,108 +.b8 0 +.b8 10 +.b8 108 +.b32 10296 +.b8 27 +.b8 5 +.b8 144 +.b8 178 +.b8 228 +.b8 149 +.b8 1 +.b8 2 +.b8 116,105,100 +.b8 0 +.b8 10 +.b8 109 +.b32 10296 +.b8 27 +.b8 6 +.b8 144 +.b8 184 +.b8 238 +.b8 200 +.b8 171 +.b8 2 +.b8 2 +.b8 98 +.b8 0 +.b8 10 +.b8 111 +.b32 10296 +.b8 28 +.b32 .debug_loc+284 +.b8 110,110,122,65 +.b8 0 +.b8 10 +.b8 114 +.b32 9285 +.b8 28 +.b32 .debug_loc+402 +.b8 110,110,122,66 +.b8 0 +.b8 10 +.b8 115 +.b32 9285 +.b8 28 +.b32 .debug_loc+468 +.b8 110,95,105,110,116,101,114,115,101,99,116 +.b8 0 +.b8 10 +.b8 116 +.b32 9285 +.b8 27 +.b8 5 +.b8 144 +.b8 179 +.b8 228 +.b8 149 +.b8 1 +.b8 2 +.b8 112,97,114,116,115 +.b8 0 +.b8 10 +.b8 120 +.b32 10296 +.b8 28 +.b32 .debug_loc+535 +.b8 112,97,105,114,95,105,100 +.b8 0 +.b8 10 +.b8 105 +.b32 9285 +.b8 9 +.b8 95,90,78,49,56,99,111,111,112,101,114,97,116,105,118,101,95,103,114,111,117,112,115,52,95,95,118,49,49,55,116,104,114,101,97,100,95,98,108,111 +.b8 99,107,95,116,105,108,101,73,76,106,51,50,69,118,69,69 +.b8 0 +.b8 16 +.b8 3 +.b8 66 +.b8 6 +.b8 15 +.b8 95,95,98,95,78,49,56,99,111,111,112,101,114,97,116,105,118,101,95,103,114,111,117,112,115,52,95,95,118,49,55,100,101,116,97,105,108,115,50,50 +.b8 116,104,114,101,97,100,95,98,108,111,99,107,95,116,105,108,101,95,105,109,112,108,73,76,106,51,50,69,118,76,98,48,69,69,69 +.b8 0 +.b32 13569 +.b8 2 +.b8 35 +.b8 0 +.b8 9 +.b8 95,90,78,49,56,99,111,111,112,101,114,97,116,105,118,101,95,103,114,111,117,112,115,52,95,95,118,49,55,100,101,116,97,105,108,115,50,50,116,104 +.b8 114,101,97,100,95,98,108,111,99,107,95,116,105,108,101,95,105,109,112,108,73,76,106,51,50,69,118,76,98,48,69,69,69 +.b8 0 +.b8 16 +.b8 3 +.b8 31 +.b8 6 +.b8 15 +.b8 95,95,98,95,78,49,56,99,111,111,112,101,114,97,116,105,118,101,95,103,114,111,117,112,115,52,95,95,118,49,51,49,95,95,115,105,110,103,108,101 +.b8 95,119,97,114,112,95,116,104,114,101,97,100,95,98,108,111,99,107,95,116,105,108,101,73,76,106,51,50,69,118,69,69 +.b8 0 +.b32 13729 +.b8 2 +.b8 35 +.b8 0 +.b8 9 +.b8 95,90,78,49,56,99,111,111,112,101,114,97,116,105,118,101,95,103,114,111,117,112,115,52,95,95,118,49,51,49,95,95,115,105,110,103,108,101,95,119 +.b8 97,114,112,95,116,104,114,101,97,100,95,98,108,111,99,107,95,116,105,108,101,73,76,106,51,50,69,118,69,69 +.b8 0 +.b8 16 +.b8 3 +.b8 114 +.b8 4 +.b8 15 +.b8 95,95,98,95,78,49,56,99,111,111,112,101,114,97,116,105,118,101,95,103,114,111,117,112,115,52,95,95,118,49,49,55,116,104,114,101,97,100,95,103 +.b8 114,111,117,112,95,98,97,115,101,73,76,106,49,69,69,69 +.b8 0 +.b32 13870 +.b8 2 +.b8 35 +.b8 0 +.b8 14 +.b8 95,90,78,49,56,99,111,111,112,101,114,97,116,105,118,101,95,103,114,111,117,112,115,52,95,95,118,49,49,55,116,104,114,101,97,100,95,103,114,111 +.b8 117,112,95,98,97,115,101,73,76,106,49,69,69,69 +.b8 0 +.b8 16 +.b8 3 +.b8 162 +.b8 15 +.b8 95,95,98,95,78,49,56,99,111,111,112,101,114,97,116,105,118,101,95,103,114,111,117,112,115,52,95,95,118,49,49,50,116,104,114,101,97,100,95,103 +.b8 114,111,117,112,69 +.b8 0 +.b32 13983 +.b8 2 +.b8 35 +.b8 0 +.b8 14 +.b8 95,90,78,49,56,99,111,111,112,101,114,97,116,105,118,101,95,103,114,111,117,112,115,52,95,95,118,49,49,50,116,104,114,101,97,100,95,103,114,111 +.b8 117,112,69 +.b8 0 +.b8 16 +.b8 3 +.b8 87 +.b8 10 +.b8 95,100,97,116,97 +.b8 0 +.b32 14047 +.b8 3 +.b8 131 +.b8 2 +.b8 35 +.b8 0 +.b8 29 +.b8 95,90,78,49,56,99,111,111,112,101,114,97,116,105,118,101,95,103,114,111,117,112,115,52,95,95,118,49,49,50,116,104,114,101,97,100,95,103,114,111 +.b8 117,112,85,116,95,69 +.b8 0 +.b8 16 +.b8 3 +.b8 124 +.b8 10 +.b8 103,114,111,117,112 +.b8 0 +.b32 14114 +.b8 3 +.b8 125 +.b8 2 +.b8 35 +.b8 0 +.b8 14 +.b8 95,90,78,49,56,99,111,111,112,101,114,97,116,105,118,101,95,103,114,111,117,112,115,52,95,95,118,49,49,50,116,104,114,101,97,100,95,103,114,111 +.b8 117,112,49,48,103,114,111,117,112,95,100,97,116,97,69 +.b8 0 +.b8 4 +.b8 3 +.b8 90 +.b8 30 +.b8 95,117,110,117,115,101,100 +.b8 0 +.b32 455 +.b8 3 +.b8 91 +.b8 4 +.b8 1 +.b8 31 +.b8 2 +.b8 35 +.b8 0 +.b8 30 +.b8 116,121,112,101 +.b8 0 +.b32 455 +.b8 3 +.b8 92 +.b8 4 +.b8 7 +.b8 24 +.b8 2 +.b8 35 +.b8 0 +.b8 31 +.b32 455 +.b8 3 +.b8 92 +.b8 4 +.b8 0 +.b8 32 +.b8 2 +.b8 35 +.b8 4 +.b8 0 +.b8 10 +.b8 99,111,97,108,101,115,99,101,100 +.b8 0 +.b32 14247 +.b8 3 +.b8 126 +.b8 2 +.b8 35 +.b8 0 +.b8 14 +.b8 95,90,78,49,56,99,111,111,112,101,114,97,116,105,118,101,95,103,114,111,117,112,115,52,95,95,118,49,49,50,116,104,114,101,97,100,95,103,114,111 +.b8 117,112,55,116,103,95,100,97,116,97,69 +.b8 0 +.b8 16 +.b8 3 +.b8 108 +.b8 30 +.b8 105,115,95,116,105,108,101,100 +.b8 0 +.b32 455 +.b8 3 +.b8 109 +.b8 4 +.b8 1 +.b8 31 +.b8 2 +.b8 35 +.b8 0 +.b8 30 +.b8 116,121,112,101 +.b8 0 +.b32 455 +.b8 3 +.b8 110 +.b8 4 +.b8 7 +.b8 24 +.b8 2 +.b8 35 +.b8 0 +.b8 30 +.b8 115,105,122,101 +.b8 0 +.b32 455 +.b8 3 +.b8 111 +.b8 4 +.b8 24 +.b8 0 +.b8 2 +.b8 35 +.b8 0 +.b8 30 +.b8 109,101,116,97,71,114,111,117,112,83,105,122,101 +.b8 0 +.b32 455 +.b8 3 +.b8 113 +.b8 4 +.b8 16 +.b8 16 +.b8 2 +.b8 35 +.b8 4 +.b8 30 +.b8 109,101,116,97,71,114,111,117,112,82,97,110,107 +.b8 0 +.b32 455 +.b8 3 +.b8 114 +.b8 4 +.b8 16 +.b8 0 +.b8 2 +.b8 35 +.b8 4 +.b8 10 +.b8 109,97,115,107 +.b8 0 +.b32 455 +.b8 3 +.b8 116 +.b8 2 +.b8 35 +.b8 8 +.b8 10 +.b8 95,114,101,115 +.b8 0 +.b32 455 +.b8 3 +.b8 118 +.b8 2 +.b8 35 +.b8 12 +.b8 0 +.b8 10 +.b8 103,114,105,100 +.b8 0 +.b32 14461 +.b8 3 +.b8 127 +.b8 2 +.b8 35 +.b8 0 +.b8 14 +.b8 95,90,78,49,56,99,111,111,112,101,114,97,116,105,118,101,95,103,114,111,117,112,115,52,95,95,118,49,49,50,116,104,114,101,97,100,95,103,114,111 +.b8 117,112,55,103,103,95,100,97,116,97,69 +.b8 0 +.b8 8 +.b8 3 +.b8 95 +.b8 10 +.b8 103,114,105,100,87,115 +.b8 0 +.b32 10370 +.b8 3 +.b8 96 +.b8 2 +.b8 35 +.b8 0 +.b8 32 +.b8 95,90,78,49,56,99,111,111,112,101,114,97,116,105,118,101,95,103,114,111,117,112,115,52,95,95,118,49,55,100,101,116,97,105,108,115,49,52,103,114 +.b8 105,100,95,119,111,114,107,115,112,97,99,101,69 +.b8 0 +.b8 0 +.b8 9 +.b8 86 +.b8 0 +.b8 0 +.b8 0 +.b8 0 +.b8 0 +.b8 0 +.b8 0 +.b8 33 +.b32 11663 +.b64 $L__tmp17 +.b64 $L__tmp19 +.b8 10 +.b8 118 +.b8 34 +.b32 11569 +.b64 $L__tmp17 +.b64 $L__tmp18 +.b8 3 +.b8 170 +.b8 2 +.b8 35 +.b8 6 +.b8 11 +.b8 3 +.b64 __local_depot0 +.b8 35 +.b8 80 +.b32 11638 +.b8 0 +.b8 0 +.b8 33 +.b32 11740 +.b64 $L__tmp20 +.b64 $L__tmp22 +.b8 10 +.b8 118 +.b8 35 +.b8 6 +.b8 11 +.b8 3 +.b64 __local_depot0 +.b8 35 +.b8 64 +.b32 11894 +.b8 34 +.b32 11915 +.b64 $L__tmp21 +.b64 $L__tmp22 +.b8 3 +.b8 148 +.b8 6 +.b8 35 +.b8 6 +.b8 11 +.b8 3 +.b64 __local_depot0 +.b8 35 +.b8 48 +.b32 12038 +.b8 35 +.b8 6 +.b8 11 +.b8 3 +.b64 __local_depot0 +.b8 35 +.b8 56 +.b32 12048 +.b8 0 +.b8 0 +.b8 33 +.b32 12073 +.b64 $L__tmp23 +.b64 $L__tmp26 +.b8 10 +.b8 118 +.b8 35 +.b8 6 +.b8 11 +.b8 3 +.b64 __local_depot0 +.b8 35 +.b8 24 +.b32 12238 +.b8 34 +.b32 12268 +.b64 $L__tmp24 +.b64 $L__tmp25 +.b8 3 +.b8 76 +.b8 6 +.b8 35 +.b8 6 +.b8 11 +.b8 3 +.b64 __local_depot0 +.b8 35 +.b8 8 +.b32 12405 +.b8 35 +.b8 6 +.b8 11 +.b8 3 +.b64 __local_depot0 +.b8 35 +.b8 16 +.b32 12415 +.b8 0 +.b8 0 +.b8 25 +.b64 $L__tmp32 +.b64 $L__tmp170 +.b8 27 +.b8 7 +.b8 144 +.b8 178 +.b8 226 +.b8 144 +.b8 147 +.b8 215 +.b8 4 +.b8 2 +.b8 105 +.b8 0 +.b8 10 +.b8 130 +.b32 9285 +.b8 27 +.b8 7 +.b8 144 +.b8 185 +.b8 240 +.b8 144 +.b8 147 +.b8 215 +.b8 4 +.b8 2 +.b8 106 +.b8 0 +.b8 10 +.b8 131 +.b32 9285 +.b8 27 +.b8 7 +.b8 144 +.b8 179 +.b8 226 +.b8 144 +.b8 147 +.b8 215 +.b8 4 +.b8 2 +.b8 120,115,116,97,114,116 +.b8 0 +.b8 10 +.b8 133 +.b32 9285 +.b8 27 +.b8 7 +.b8 144 +.b8 180 +.b8 226 +.b8 144 +.b8 147 +.b8 215 +.b8 4 +.b8 2 +.b8 120,101,110,100 +.b8 0 +.b8 10 +.b8 134 +.b32 9285 +.b8 27 +.b8 7 +.b8 144 +.b8 182 +.b8 226 +.b8 144 +.b8 147 +.b8 215 +.b8 4 +.b8 2 +.b8 121,115,116,97,114,116 +.b8 0 +.b8 10 +.b8 137 +.b32 9285 +.b8 27 +.b8 7 +.b8 144 +.b8 183 +.b8 226 +.b8 144 +.b8 147 +.b8 215 +.b8 4 +.b8 2 +.b8 121,101,110,100 +.b8 0 +.b8 10 +.b8 138 +.b32 9285 +.b8 28 +.b32 .debug_loc+678 +.b8 110,120,121 +.b8 0 +.b8 10 +.b8 151 +.b32 9285 +.b8 27 +.b8 5 +.b8 144 +.b8 181 +.b8 228 +.b8 149 +.b8 1 +.b8 2 +.b8 119,111,114,107,95,112,101,114,95,116,104,114,101,97,100 +.b8 0 +.b8 10 +.b8 153 +.b32 10296 +.b8 28 +.b32 .debug_loc+771 +.b8 100,105,97,103 +.b8 0 +.b8 10 +.b8 154 +.b32 10296 +.b8 27 +.b8 5 +.b8 144 +.b8 183 +.b8 228 +.b8 149 +.b8 1 +.b8 2 +.b8 100,105,97,103,95,101,110,100 +.b8 0 +.b8 10 +.b8 155 +.b32 10296 +.b8 28 +.b32 .debug_loc+908 +.b8 120,95,109,105,110 +.b8 0 +.b8 10 +.b8 158 +.b32 10296 +.b8 28 +.b32 .debug_loc+1265 +.b8 120,95,109,97,120 +.b8 0 +.b8 10 +.b8 159 +.b32 10296 +.b8 28 +.b32 .debug_loc+1673 +.b8 120,99,111,111,114,100 +.b8 0 +.b8 10 +.b8 171 +.b32 10296 +.b8 28 +.b32 .debug_loc+1737 +.b8 121,99,111,111,114,100 +.b8 0 +.b8 10 +.b8 172 +.b32 10296 +.b8 27 +.b8 6 +.b8 144 +.b8 178 +.b8 228 +.b8 200 +.b8 171 +.b8 2 +.b8 2 +.b8 116,120,95,115,116,97,114,116 +.b8 0 +.b8 10 +.b8 177 +.b32 10296 +.b8 27 +.b8 6 +.b8 144 +.b8 179 +.b8 228 +.b8 200 +.b8 171 +.b8 2 +.b8 2 +.b8 116,121,95,115,116,97,114,116 +.b8 0 +.b8 10 +.b8 178 +.b32 10296 +.b8 27 +.b8 6 +.b8 144 +.b8 182 +.b8 230 +.b8 200 +.b8 171 +.b8 2 +.b8 2 +.b8 116,120,95,101,110,100 +.b8 0 +.b8 10 +.b8 203 +.b32 10296 +.b8 27 +.b8 6 +.b8 144 +.b8 183 +.b8 230 +.b8 200 +.b8 171 +.b8 2 +.b8 2 +.b8 116,121,95,101,110,100 +.b8 0 +.b8 10 +.b8 204 +.b32 10296 +.b8 28 +.b32 .debug_loc+1801 +.b8 99,105,106 +.b8 0 +.b8 10 +.b8 208 +.b32 10330 +.b8 28 +.b32 .debug_loc+2184 +.b8 99,105,106,95,101,120,105,115,116,115 +.b8 0 +.b8 10 +.b8 213 +.b32 10296 +.b8 28 +.b32 .debug_loc+2469 +.b8 107 +.b8 0 +.b8 10 +.b8 217 +.b32 10296 +.b8 28 +.b32 .debug_loc+2705 +.b8 108 +.b8 0 +.b8 10 +.b8 218 +.b32 10296 +.b8 27 +.b8 6 +.b8 144 +.b8 182 +.b8 232 +.b8 200 +.b8 171 +.b8 2 +.b8 2 +.b8 97,107,105 +.b8 0 +.b8 10 +.b8 206 +.b32 10330 +.b8 27 +.b8 6 +.b8 144 +.b8 183 +.b8 232 +.b8 200 +.b8 171 +.b8 2 +.b8 2 +.b8 98,107,106 +.b8 0 +.b8 10 +.b8 207 +.b32 10330 +.b8 25 +.b64 $L__tmp54 +.b64 $L__tmp67 +.b8 27 +.b8 6 +.b8 144 +.b8 179 +.b8 226 +.b8 200 +.b8 171 +.b8 2 +.b8 2 +.b8 112,105,118,111,116 +.b8 0 +.b8 10 +.b8 163 +.b32 10296 +.b8 0 +.b8 25 +.b64 $L__tmp84 +.b64 $L__tmp97 +.b8 27 +.b8 6 +.b8 144 +.b8 185 +.b8 228 +.b8 200 +.b8 171 +.b8 2 +.b8 2 +.b8 112,105,118,111,116 +.b8 0 +.b8 10 +.b8 187 +.b32 10296 +.b8 0 +.b8 25 +.b64 $L__tmp117 +.b64 $L__tmp122 +.b8 27 +.b8 7 +.b8 144 +.b8 185 +.b8 224 +.b8 196 +.b8 145 +.b8 215 +.b8 4 +.b8 2 +.b8 116 +.b8 0 +.b8 10 +.b8 232 +.b32 10330 +.b8 0 +.b8 34 +.b32 12586 +.b64 $L__tmp149 +.b64 $L__tmp153 +.b8 10 +.b8 14 +.b8 1 +.b8 35 +.b8 6 +.b8 11 +.b8 3 +.b64 __local_depot0 +.b8 35 +.b8 0 +.b32 12680 +.b8 36 +.b8 6 +.b8 144 +.b8 183 +.b8 242 +.b8 200 +.b8 171 +.b8 2 +.b8 2 +.b32 12690 +.b8 25 +.b64 $L__tmp149 +.b64 $L__tmp153 +.b8 37 +.b8 7 +.b8 144 +.b8 177 +.b8 224 +.b8 196 +.b8 145 +.b8 215 +.b8 4 +.b8 2 +.b32 12709 +.b8 34 +.b32 12445 +.b64 $L__tmp149 +.b64 $L__tmp151 +.b8 3 +.b8 57 +.b8 4 +.b8 25 +.b64 $L__tmp149 +.b64 $L__tmp151 +.b8 37 +.b8 6 +.b8 144 +.b8 185 +.b8 242 +.b8 200 +.b8 171 +.b8 2 +.b8 2 +.b32 12554 +.b8 0 +.b8 0 +.b8 0 +.b8 0 +.b8 0 +.b8 0 +.b8 0 +.b8 38 +.b8 95,90,78,49,56,99,111,111,112,101,114,97,116,105,118,101,95,103,114,111,117,112,115,52,95,95,118,49,55,100,101,116,97,105,108,115,57,116,101,109 +.b8 112,108,97,116,101,115,55,102,111,114,119,97,114,100,73,82,105,69,69,79,84,95,82,78,83,50,95,49,54,114,101,109,111,118,101,95,114,101,102,101 +.b8 114,101,110,99,101,73,83,53,95,69,52,116,121,112,101,69 +.b8 0 +.b8 102,111,114,119,97,114,100,60,105,110,116,51,50,95,116,32,38,62 +.b8 0 +.b8 4 +.b8 231 +.b32 15755 +.b8 1 +.b8 39 +.b8 116 +.b8 0 +.b8 4 +.b8 231 +.b32 15760 +.b8 0 +.b8 20 +.b32 10330 +.b8 20 +.b32 11477 +.b8 16 +.b8 95,90,78,75,49,56,99,111,111,112,101,114,97,116,105,118,101,95,103,114,111,117,112,115,52,95,95,118,49,51,54,95,95,115,116,97,116,105,99,95 +.b8 115,105,122,101,95,116,104,114,101,97,100,95,98,108,111,99,107,95,116,105,108,101,95,98,97,115,101,73,76,106,51,50,69,69,57,115,104,102,108,95 +.b8 100,111,119,110,73,82,105,105,69,69,84,48,95,79,84,95,106 +.b8 0 +.b8 115,104,102,108,95,100,111,119,110,60,105,110,116,51,50,95,116,32,38,44,116,121,112,101,62 +.b8 0 +.b8 3 +.b8 14 +.b8 4 +.b32 11042 +.b8 1 +.b8 17 +.b8 116,104,105,115 +.b8 0 +.b32 12731 +.b8 19 +.b8 101,108,101,109 +.b8 0 +.b8 3 +.b8 14 +.b8 4 +.b32 15755 +.b8 19 +.b8 100,101,108,116,97 +.b8 0 +.b8 3 +.b8 14 +.b8 4 +.b32 455 +.b8 0 +.b8 16 +.b8 95,90,78,49,56,99,111,111,112,101,114,97,116,105,118,101,95,103,114,111,117,112,115,52,95,95,118,49,55,100,101,116,97,105,108,115,52,116,105,108 +.b8 101,49,53,95,110,97,116,105,118,101,95,115,104,117,102,102,108,101,57,115,104,102,108,95,100,111,119,110,73,105,69,69,84,95,83,53,95,106,106,106 +.b8 0 +.b8 115,104,102,108,95,100,111,119,110,60,105,110,116,51,50,95,116,62 +.b8 0 +.b8 5 +.b8 69 +.b8 1 +.b32 10330 +.b8 1 +.b8 19 +.b8 101,108,101,109 +.b8 0 +.b8 5 +.b8 70 +.b8 1 +.b32 10330 +.b8 19 +.b8 103,77,97,115,107 +.b8 0 +.b8 5 +.b8 70 +.b8 1 +.b32 455 +.b8 19 +.b8 100,101,108,116,97 +.b8 0 +.b8 5 +.b8 70 +.b8 1 +.b32 455 +.b8 19 +.b8 116,104,114,101,97,100,115 +.b8 0 +.b8 5 +.b8 70 +.b8 1 +.b32 455 +.b8 0 +.b8 23 +.b64 $L__func_begin1 +.b64 $L__func_end1 +.b8 1 +.b8 156 +.b8 95,90,49,51,71,66,95,114,101,100,117,99,101,95,115,117,109,73,105,76,105,51,50,69,69,84,95,78,49,56,99,111,111,112,101,114,97,116,105,118 +.b8 101,95,103,114,111,117,112,115,52,95,95,118,49,49,55,116,104,114,101,97,100,95,98,108,111,99,107,95,116,105,108,101,73,88,84,48,95,69,118,69 +.b8 69,83,48,95 +.b8 0 +.b8 71,66,95,114,101,100,117,99,101,95,115,117,109,60,105,110,116,51,50,95,116,44,51,50,62 +.b8 0 +.b8 10 +.b8 48 +.b32 10330 +.b8 1 +.b8 40 +.b8 6 +.b8 11 +.b8 3 +.b64 __local_depot1 +.b8 35 +.b8 8 +.b8 103 +.b8 0 +.b8 10 +.b8 48 +.b32 13423 +.b8 40 +.b8 6 +.b8 11 +.b8 3 +.b64 __local_depot1 +.b8 35 +.b8 24 +.b8 118,97,108 +.b8 0 +.b8 10 +.b8 48 +.b32 10330 +.b8 25 +.b64 $L__tmp179 +.b64 $L__tmp201 +.b8 28 +.b32 .debug_loc+2941 +.b8 105 +.b8 0 +.b8 10 +.b8 52 +.b32 10296 +.b8 25 +.b64 $L__tmp186 +.b64 $L__tmp198 +.b8 27 +.b8 6 +.b8 144 +.b8 176 +.b8 228 +.b8 200 +.b8 171 +.b8 2 +.b8 2 +.b8 110,101,120,116 +.b8 0 +.b8 10 +.b8 54 +.b32 10330 +.b8 33 +.b32 15765 +.b64 $L__tmp186 +.b64 $L__tmp196 +.b8 10 +.b8 54 +.b8 35 +.b8 6 +.b8 11 +.b8 3 +.b64 __local_depot1 +.b8 35 +.b8 0 +.b32 15898 +.b8 36 +.b8 6 +.b8 144 +.b8 182 +.b8 200 +.b8 201 +.b8 171 +.b8 2 +.b8 2 +.b32 15908 +.b8 36 +.b8 5 +.b8 144 +.b8 184 +.b8 228 +.b8 149 +.b8 1 +.b8 2 +.b32 15921 +.b8 34 +.b32 15621 +.b64 $L__tmp186 +.b64 $L__tmp187 +.b8 3 +.b8 16 +.b8 4 +.b8 36 +.b8 6 +.b8 144 +.b8 183 +.b8 200 +.b8 201 +.b8 171 +.b8 2 +.b8 2 +.b32 15745 +.b8 0 +.b8 34 +.b32 12445 +.b64 $L__tmp188 +.b64 $L__tmp193 +.b8 3 +.b8 16 +.b8 4 +.b8 25 +.b64 $L__tmp188 +.b64 $L__tmp193 +.b8 37 +.b8 6 +.b8 144 +.b8 177 +.b8 226 +.b8 200 +.b8 171 +.b8 2 +.b8 2 +.b32 12554 +.b8 0 +.b8 0 +.b8 34 +.b32 15936 +.b64 $L__tmp194 +.b64 $L__tmp195 +.b8 3 +.b8 15 +.b8 4 +.b8 36 +.b8 6 +.b8 144 +.b8 179 +.b8 226 +.b8 200 +.b8 171 +.b8 2 +.b8 2 +.b32 16045 +.b8 36 +.b8 6 +.b8 144 +.b8 180 +.b8 226 +.b8 200 +.b8 171 +.b8 2 +.b8 2 +.b32 16058 +.b8 36 +.b8 6 +.b8 144 +.b8 181 +.b8 226 +.b8 200 +.b8 171 +.b8 2 +.b8 2 +.b32 16072 +.b8 36 +.b8 6 +.b8 144 +.b8 183 +.b8 226 +.b8 200 +.b8 171 +.b8 2 +.b8 2 +.b32 16086 +.b8 0 +.b8 0 +.b8 0 +.b8 0 +.b8 0 +.b8 41 +.b64 $L__func_begin2 +.b64 $L__func_end2 +.b8 1 +.b8 156 +.b8 95,90,78,52,100,105,109,51,67,49,69,53,117,105,110,116,51 +.b8 0 +.b8 100,105,109,51 +.b8 0 +.b8 7 +.b8 211 +.b8 3 +.b32 10149 +.b8 1 +.b8 42 +.b8 6 +.b8 144 +.b8 177 +.b8 200 +.b8 201 +.b8 171 +.b8 2 +.b8 2 +.b8 116,104,105,115 +.b8 0 +.b32 16695 +.b8 43 +.b8 6 +.b8 11 +.b8 3 +.b64 __local_depot2 +.b8 35 +.b8 0 +.b8 118 +.b8 0 +.b8 7 +.b8 211 +.b8 3 +.b32 10480 +.b8 0 +.b8 4 +.b32 10330 +.b32 12 +.b8 7 +.b32 16700 +.b8 4 +.b32 10379 +.b32 12 +.b8 0 + } + .section .debug_macinfo + { +.b8 0 + + } + + +--------------------------------------- +instantiated kernel +--------------------------------------- +--- Linker for void AxB_dot3_phase3_mp(long long, long long, long long*, GB_Matrix_opaque*, GB_Matrix_opaque*, GB_Matrix_opaque*, GB_Matrix_opaque*, int) --- +--------------------------------------- +ptxas info : 59 bytes gmem +ptxas info : Function properties for _Z13GB_reduce_sumIiLi32EET_N18cooperative_groups4__v117thread_block_tileIXT0_EvEES0_ +ptxas . 88 bytes stack frame, 52 bytes spill stores, 52 bytes spill loads +ptxas info : Function properties for _ZN18cooperative_groups4__v117thread_group_baseILj4EEC2Ev +ptxas . 16 bytes stack frame, 8 bytes spill stores, 8 bytes spill loads +ptxas info : Function properties for _ZN18cooperative_groups4__v17details22thread_block_tile_implILj32EvLb0EEC2ILj32ENS0_12thread_blockELb0EEERKNS2_IXT_ET0_XT1_EEE +ptxas . 104 bytes stack frame, 24 bytes spill stores, 24 bytes spill loads +ptxas info : Function properties for _ZN18cooperative_groups4__v117thread_block_tileILj32ENS0_12thread_blockEEC2ERKS2_ +ptxas . 24 bytes stack frame, 8 bytes spill stores, 8 bytes spill loads +ptxas info : Compiling entry function '_Z18AxB_dot3_phase3_mpIiiiEvxxPxP16GB_Matrix_opaqueS2_S2_S2_i' for 'sm_70' +ptxas info : Function properties for _Z18AxB_dot3_phase3_mpIiiiEvxxPxP16GB_Matrix_opaqueS2_S2_S2_i +ptxas . 160 bytes stack frame, 0 bytes spill stores, 0 bytes spill loads +ptxas info : Used 138 registers, 412 bytes cmem[0] +ptxas info : Function properties for _ZN4dim3C1E5uint3 +ptxas . 16 bytes stack frame, 0 bytes spill stores, 0 bytes spill loads +ptxas info : Function properties for _ZN18cooperative_groups4__v112thread_groupC2Ej +ptxas . 8 bytes stack frame, 0 bytes spill stores, 0 bytes spill loads +ptxas info : Function properties for _ZN18cooperative_groups4__v117thread_group_baseILj1EEC2Ev +ptxas . 16 bytes stack frame, 8 bytes spill stores, 8 bytes spill loads +ptxas info : Function properties for _ZN18cooperative_groups4__v131__single_warp_thread_block_tileILj32EvEC2Ejj +ptxas . 32 bytes stack frame, 24 bytes spill stores, 24 bytes spill loads +ptxas info : Function properties for _ZN18cooperative_groups4__v131__single_warp_thread_block_tileILj32ENS0_12thread_blockEEC2Ev +ptxas . 8 bytes stack frame, 0 bytes spill stores, 0 bytes spill loads +ptxas info : Function properties for _ZN18cooperative_groups4__v17details22thread_block_tile_implILj32ENS0_12thread_blockELb0EEC2ERKS3_ +ptxas . 24 bytes stack frame, 8 bytes spill stores, 8 bytes spill loads +ptxas info : Function properties for _Z9atomicAddPyy +ptxas . 8 bytes stack frame, 8 bytes spill stores, 8 bytes spill loads +ptxas info : Function properties for _Z13__ballot_syncji +ptxas . 8 bytes stack frame, 8 bytes spill stores, 8 bytes spill loads +ptxas info : Function properties for _Z16__shfl_down_syncjiji +ptxas . 8 bytes stack frame, 8 bytes spill stores, 8 bytes spill loads +ptxas info : Function properties for __ullAtomicAdd +ptxas . 0 bytes stack frame, 0 bytes spill stores, 0 bytes spill loads +info : 59 bytes gmem +info : Function properties for '_Z18AxB_dot3_phase3_mpIiiiEvxxPxP16GB_Matrix_opaqueS2_S2_S2_i': +info : used 138 registers, 320 stack, 0 bytes smem, 412 bytes cmem[0], 0 bytes lmem + + +--------------------------------------- +completed func() +Inside serialize!!!! + compiled serialized prog AxB_dot3_phase3_mp_int32_t_int32_t_int32_t +writing prog in file /home/cjnolet/.SuiteSparse/GraphBLAS/6.3.0/AxB_dot3_phase3_mp_int32_t_int32_t_int32_t +--------------------------------------- +--- Linker for void AxB_dot3_phase3_mp(long long, long long, long long*, GB_Matrix_opaque*, GB_Matrix_opaque*, GB_Matrix_opaque*, GB_Matrix_opaque*, int) --- +--------------------------------------- +info : 59 bytes gmem +info : Function properties for '_Z18AxB_dot3_phase3_mpIiiiEvxxPxP16GB_Matrix_opaqueS2_S2_S2_i': +info : used 138 registers, 320 stack, 0 bytes smem, 412 bytes cmem[0], 0 bytes lmem + + +--------------------------------------- +Launching _Z18AxB_dot3_phase3_mpIiiiEvxxPxP16GB_Matrix_opaqueS2_S2_S2_i<<<1,32,0,0>>>(long,long,long*,GB_Matrix_opaque*,GB_Matrix_opaque*,GB_Matrix_opaque*,GB_Matrix_opaque*,int) +warp 0 zombie count = 27, nzombies = 0 + Czombie = 27 +Printing bucketp +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +Done. +returned from kernel 2222.69ms + + 32x32 GraphBLAS int32_t matrix, sparse (jumbled) by row + sparsity control: sparse only + C GPU, 32 entries, memory: 1.1 KB + pending tuples: 0 max pending: 0 zombies: 27 + + (0,6) zombie + (1,1) 1 + (3,12) zombie + (3,17) zombie + (4,19) zombie + (5,19) zombie + (6,22) zombie + (6,24) 0 + (8,10) zombie + (9,19) zombie + (9,31) 0 + (11,13) zombie + (12,11) zombie + (14,24) zombie + (15,30) zombie + (16,20) 0 + (17,30) zombie + (18,18) zombie + (19,1) zombie + (20,25) zombie + (21,24) zombie + (21,27) zombie + (22,30) zombie + (23,30) zombie + (24,14) zombie + (25,4) 1 + (26,15) zombie + (27,28) zombie + (28,16) zombie + ... +Not using cuda path. M_is_hypersparse: 0, A->iso: 0, B->iso: 0, A_BITMAP: 0, B_BITMAP: 0, GB_IS_FULL(A): 0, GB_IS_FULL(B): 0, semiring header size: 32 + rmm_wrap_alloc 512 bytes + rmm_wrap_alloc 512 bytes + rmm_wrap_alloc 512 bytes + + 32x32 GraphBLAS bool matrix, sparse by row + sparsity control: sparse only + M actual, 32 entries, memory: 1.0 KB + + (0,6) 1 + (1,1) 1 + (3,12) 1 + (3,17) 1 + (4,19) 1 + (5,19) 1 + (6,22) 1 + (6,24) 1 + (8,10) 1 + (9,19) 1 + (9,31) 1 + (11,13) 1 + (12,11) 1 + (14,24) 1 + (15,30) 1 + (16,20) 1 + (17,30) 1 + (18,18) 1 + (19,1) 1 + (20,25) 1 + (21,24) 1 + (21,27) 1 + (22,30) 1 + (23,30) 1 + (24,14) 1 + (25,4) 1 + (26,15) 1 + (27,28) 1 + (28,16) 1 + ... + + 32x32 GraphBLAS int32_t matrix, sparse by row + sparsity control: sparse only + C GPU, 5 entries, memory: 864 bytes + + (1,1) 1 + (6,24) 0 + (9,31) 0 + (16,20) 0 + (25,4) 1 + + + 32x32 GraphBLAS int32_t matrix, sparse by row + C_actual, 5 entries, memory: 864 bytes + + (1,1) 1 + (6,24) 0 + (9,31) 0 + (16,20) 0 + (25,4) 1 + + rmm_wrap_alloc 256 bytes + + 32x32 GraphBLAS double matrix, sparse by row + Diff actual, 5 entries, memory: 896 bytes + + (1,1) 0 + (6,24) 0 + (9,31) 0 + (16,20) 0 + (25,4) 0 + + + 32x32 GraphBLAS bool matrix, sparse by row + T actual, 5 entries, memory: 840 bytes + + (1,1) 1 + (6,24) 1 + (9,31) 1 + (16,20) 1 + (25,4) 1 + work:5 gpus:0 [ OK ] AxB_dot3_tests_PLUS_TIMES_3.tinyxtinyPLUS_TIMES_Cint32_tMboolAint32_tBint32_tXint32_tYint32_tZint32_t (2249 ms) +[ RUN ] AxB_dot3_tests_PLUS_TIMES_3.smallxsmallPLUS_TIMES_Cint32_tMboolAint32_tBint32_tXint32_tYint32_tZint32_t +Getting test data +Creating problem gen +filling matrices +inside fill, using seed 4567 +fill_random nrows=1024ncols=1024 need 1024 values, invsparse = 1024 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling sparse +1024 nonzeroes left to fill.. +35 nonzeroes left to fill.. + rmm_wrap_alloc 16384 bytes +inside fill, using seed 4567 +fill_random nrows=1024ncols=1024 need 1024 values, invsparse = 1024 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling sparse +1024 nonzeroes left to fill.. +35 nonzeroes left to fill.. + rmm_wrap_alloc 16384 bytes +inside fill, using seed 543210 +fill_random nrows=1024ncols=1024 need 1048576 values, invsparse = 1 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling dense + rmm_wrap_alloc 16384 bytes + rmm_wrap_alloc 16384 bytes + rmm_wrap_alloc 524288 bytes + rmm_wrap_alloc 524288 bytes + rmm_wrap_alloc 1048576 bytes + rmm_wrap_alloc 1048576 bytes + rmm_wrap_alloc 524288 bytes + rmm_wrap_alloc 2097152 bytes + rmm_wrap_alloc 2097152 bytes + rmm_wrap_alloc 1048576 bytes + rmm_wrap_alloc 4194304 bytes + rmm_wrap_alloc 4194304 bytes + rmm_wrap_alloc 2097152 bytes + rmm_wrap_alloc 8388608 bytes + rmm_wrap_alloc 8388608 bytes + rmm_wrap_alloc 4194304 bytes + rmm_wrap_alloc 8388608 bytes + rmm_wrap_alloc 4194304 bytes + rmm_wrap_alloc 8388608 bytes +inside fill, using seed 32 +fill_random nrows=1024ncols=1024 need 1048576 values, invsparse = 1 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling dense + rmm_wrap_alloc 16384 bytes + rmm_wrap_alloc 524288 bytes + rmm_wrap_alloc 524288 bytes + rmm_wrap_alloc 1048576 bytes + rmm_wrap_alloc 1048576 bytes + rmm_wrap_alloc 524288 bytes + rmm_wrap_alloc 2097152 bytes + rmm_wrap_alloc 2097152 bytes + rmm_wrap_alloc 1048576 bytes + rmm_wrap_alloc 4194304 bytes + rmm_wrap_alloc 4194304 bytes + rmm_wrap_alloc 2097152 bytes + rmm_wrap_alloc 8388608 bytes + rmm_wrap_alloc 8388608 bytes + rmm_wrap_alloc 4194304 bytes + rmm_wrap_alloc 8388608 bytes + rmm_wrap_alloc 4194304 bytes + rmm_wrap_alloc 8388608 bytes +1024 slots to fill +all pairs to bucket 1, no filling +done assigning buckets +Building semiring factgory + calling stringify semiring: 0x7f1ff5305700 +inside enumify: 0x7f1ff5305700 + + GraphBLAS Semiring: semiring (user-defined) + GraphBLAS Monoid: semiring->add (built-in) + GraphBLAS BinaryOp: monoid->op (built-in) z=plus(x,y) + GraphBLAS type: ztype int32_t size: 4 + GraphBLAS type: xtype int32_t size: 4 + GraphBLAS type: ytype int32_t size: 4 + identity: [ 0 ] + + GraphBLAS BinaryOp: semiring->multiply (built-in) z=times(x,y) + GraphBLAS type: ztype int32_t size: 4 + GraphBLAS type: xtype int32_t size: 4 + GraphBLAS type: ytype int32_t size: 4 +Getting semiring add +Getting semiring mult +Getting semiring add op +Getting types +Getting opcodes +Getting typecodes +Performing asserts +Invoking boolean rename +Invoking boolean rename +Invoking enumify binop +e 14 +Invoking enumify monoid +Calling enumify binop +Inside plus binop code +e 11 +Calling enumify identity +Calling enumify terminal +Done enumify monoid +Done invoking enumify monoid +atype +btype +ctype +Invoking enumify_mask, mtype 0x7f2028b57180 +GB_enumify_mask gets mcode: 1 Mask_struct: 0 Mask_comp: 0 +got mask_ecode: 4 +constructing semiring scode +before: add_ecode: 11, id_ecode: 0, term_ecode: 31, mult_ecode: 14, flipxy: 0, zcode: 6, xcode: 6, ycode: 6, mask_ecode: 4, ccode: 6, acode: 6, bcode: 6, csparsity: 0, msparsity: 0, asparsity: 0, bsparsity: 0 +serialized_scode: 397409434374399488 +done enumify semiring +scode=397409434374399488 +done stringify semiring + returned from stringify semiring + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 8192 bytes +1024 slots to fill +all pairs to bucket 1, no filling +done assigning buckets +bucket 1 has 1024 dots to do +LAUNCHING BUCKET CODE: 1 +INside get cached file +looking for prog in file /home/cjnolet/.SuiteSparse/GraphBLAS/6.3.0/GB_semiring_397409434374399488.h +opening /home/cjnolet/.SuiteSparse/GraphBLAS/6.3.0/GB_semiring_397409434374399488.h for write +about to close + read cache file /home/cjnolet/.SuiteSparse/GraphBLAS/6.3.0/GB_semiring_397409434374399488.h +successful_read: 1 +Just closed + jit_cache get program GB_jit_AxB_dot3_phase3_dndn +found memory-cached prog GB_jit_AxB_dot3_phase3_dndn + got kernel instance AxB_dot3_phase3_dndn_int32_t_int32_t_int32_t +found memory-cached prog AxB_dot3_phase3_dndn_int32_t_int32_t_int32_t +Launching _Z20AxB_dot3_phase3_dndnIiiiEvxxPxP16GB_Matrix_opaqueS2_S2_S2_i<<<32,32,0,0>>>(long,long,long*,GB_Matrix_opaque*,GB_Matrix_opaque*,GB_Matrix_opaque*,GB_Matrix_opaque*,int) +tid=0, i,j = 428,26 nnzA= 1024, nnzB=1024 +tid=0, i,j = 397,12 nnzA= 1024, nnzB=1024 +tid=0, i,j = 479,21 nnzA= 1024, nnzB=1024 +tid=0, i,j = 904,16 nnzA= 1024, nnzB=1024 +tid=0, i,j = 478,0 nnzA= 1024, nnzB=1024 +tid=0, i,j = 666,7 nnzA= 1024, nnzB=1024 +tid=0, i,j = 569,22 nnzA= 1024, nnzB=1024 +tid=0, i,j = 192,13 nnzA= 1024, nnzB=1024 +tid=0, i,j = 103,18 nnzA= 1024, nnzB=1024 +tid=0, i,j = 886,19 nnzA= 1024, nnzB=1024 +tid=0, i,j = 905,24 nnzA= 1024, nnzB=1024 +tid=0, i,j = 568,15 nnzA= 1024, nnzB=1024 +tid=0, i,j = 996,6 nnzA= 1024, nnzB=1024 +tid=0, i,j = 187,9 nnzA= 1024, nnzB=1024 +tid=0, i,j = 376,2 nnzA= 1024, nnzB=1024 +tid=0, i,j = 975,21 nnzA= 1024, nnzB=1024 +tid=0, i,j = 107,28 nnzA= 1024, nnzB=1024 +tid=0, i,j = 46,11 nnzA= 1024, nnzB=1024 +tid=0, i,j = 574,0 nnzA= 1024, nnzB=1024 +tid=0, i,j = 928,17 nnzA= 1024, nnzB=1024 +tid=0, i,j = 310,23 nnzA= 1024, nnzB=1024 +tid=0, i,j = 896,8 nnzA= 1024, nnzB=1024 +tid=0, i,j = 560,5 nnzA= 1024, nnzB=1024 +tid=0, i,j = 821,19 nnzA= 1024, nnzB=1024 +tid=0, i,j = 953,12 nnzA= 1024, nnzB=1024 +tid=0, i,j = 446,10 nnzA= 1024, nnzB=1024 +tid=0, i,j = 421,14 nnzA= 1024, nnzB=1024 +tid=0, i,j = 241,25 nnzA= 1024, nnzB=1024 +tid=0, i,j = 474,20 nnzA= 1024, nnzB=1024 +tid=0, i,j = 788,16 nnzA= 1024, nnzB=1024 +tid=0, i,j = 955,11 nnzA= 1024, nnzB=1024 +tid=0, i,j = 183,7 nnzA= 1024, nnzB=1024 +tid=0, i,j = 960,54 nnzA= 1024, nnzB=1024 +tid=0, i,j = 893,57 nnzA= 1024, nnzB=1024 +tid=0, i,j = 476,49 nnzA= 1024, nnzB=1024 +tid=0, i,j = 940,53 nnzA= 1024, nnzB=1024 +tid=0, i,j = 590,61 nnzA= 1024, nnzB=1024 +tid=0, i,j = 108,59 nnzA= 1024, nnzB=1024 +tid=0, i,j = 70,47 nnzA= 1024, nnzB=1024 +tid=0, i,j = 90,56 nnzA= 1024, nnzB=1024 +tid=0, i,j = 804,34 nnzA= 1024, nnzB=1024 +tid=0, i,j = 121,32 nnzA= 1024, nnzB=1024 +tid=0, i,j = 846,57 nnzA= 1024, nnzB=1024 +tid=0, i,j = 568,41 nnzA= 1024, nnzB=1024 +tid=0, i,j = 698,39 nnzA= 1024, nnzB=1024 +tid=0, i,j = 771,62 nnzA= 1024, nnzB=1024 +tid=0, i,j = 694,30 nnzA= 1024, nnzB=1024 +tid=0, i,j = 138,38 nnzA= 1024, nnzB=1024 +tid=0, i,j = 999,52 nnzA= 1024, nnzB=1024 +tid=0, i,j = 208,46 nnzA= 1024, nnzB=1024 +tid=0, i,j = 35,58 nnzA= 1024, nnzB=1024 +tid=0, i,j = 81,33 nnzA= 1024, nnzB=1024 +tid=0, i,j = 558,54 nnzA= 1024, nnzB=1024 +tid=0, i,j = 950,40 nnzA= 1024, nnzB=1024 +tid=0, i,j = 336,48 nnzA= 1024, nnzB=1024 +tid=0, i,j = 798,43 nnzA= 1024, nnzB=1024 +tid=0, i,j = 556,51 nnzA= 1024, nnzB=1024 +tid=0, i,j = 479,60 nnzA= 1024, nnzB=1024 +tid=0, i,j = 35,50 nnzA= 1024, nnzB=1024 +tid=0, i,j = 609,37 nnzA= 1024, nnzB=1024 +tid=0, i,j = 979,55 nnzA= 1024, nnzB=1024 +tid=0, i,j = 441,28 nnzA= 1024, nnzB=1024 +tid=0, i,j = 324,42 nnzA= 1024, nnzB=1024 +tid=0, i,j = 451,36 nnzA= 1024, nnzB=1024 +tid=0, i,j = 665,73 nnzA= 1024, nnzB=1024 +tid=0, i,j = 50,63 nnzA= 1024, nnzB=1024 +tid=0, i,j = 297,72 nnzA= 1024, nnzB=1024 +tid=0, i,j = 324,68 nnzA= 1024, nnzB=1024 +tid=0, i,j = 234,91 nnzA= 1024, nnzB=1024 +tid=0, i,j = 690,92 nnzA= 1024, nnzB=1024 +tid=0, i,j = 243,82 nnzA= 1024, nnzB=1024 +tid=0, i,j = 218,92 nnzA= 1024, nnzB=1024 +tid=0, i,j = 748,97 nnzA= 1024, nnzB=1024 +tid=0, i,j = 268,64 nnzA= 1024, nnzB=1024 +tid=0, i,j = 657,87 nnzA= 1024, nnzB=1024 +tid=0, i,j = 825,89 nnzA= 1024, nnzB=1024 +tid=0, i,j = 564,96 nnzA= 1024, nnzB=1024 +tid=0, i,j = 90,78 nnzA= 1024, nnzB=1024 +tid=0, i,j = 248,76 nnzA= 1024, nnzB=1024 +tid=0, i,j = 637,94 nnzA= 1024, nnzB=1024 +tid=0, i,j = 74,92 nnzA= 1024, nnzB=1024 +tid=0, i,j = 539,70 nnzA= 1024, nnzB=1024 +tid=0, i,j = 37,90 nnzA= 1024, nnzB=1024 +tid=0, i,j = 228,71 nnzA= 1024, nnzB=1024 +tid=0, i,j = 411,67 nnzA= 1024, nnzB=1024 +tid=0, i,j = 722,94 nnzA= 1024, nnzB=1024 +tid=0, i,j = 719,66 nnzA= 1024, nnzB=1024 +tid=0, i,j = 104,86 nnzA= 1024, nnzB=1024 +tid=0, i,j = 402,110 nnzA= 1024, nnzB=1024 +tid=0, i,j = 519,91 nnzA= 1024, nnzB=1024 +tid=0, i,j = 694,65 nnzA= 1024, nnzB=1024 +tid=0, i,j = 477,69 nnzA= 1024, nnzB=1024 +tid=0, i,j = 326,99 nnzA= 1024, nnzB=1024 +tid=0, i,j = 177,125 nnzA= 1024, nnzB=1024 +tid=0, i,j = 240,115 nnzA= 1024, nnzB=1024 +tid=0, i,j = 820,128 nnzA= 1024, nnzB=1024 +tid=0, i,j = 660,129 nnzA= 1024, nnzB=1024 +tid=0, i,j = 623,130 nnzA= 1024, nnzB=1024 +tid=0, i,j = 99,118 nnzA= 1024, nnzB=1024 +tid=0, i,j = 278,113 nnzA= 1024, nnzB=1024 +tid=0, i,j = 127,112 nnzA= 1024, nnzB=1024 +tid=0, i,j = 151,122 nnzA= 1024, nnzB=1024 +tid=0, i,j = 338,117 nnzA= 1024, nnzB=1024 +tid=0, i,j = 644,104 nnzA= 1024, nnzB=1024 +tid=0, i,j = 522,107 nnzA= 1024, nnzB=1024 +tid=0, i,j = 621,103 nnzA= 1024, nnzB=1024 +tid=0, i,j = 835,128 nnzA= 1024, nnzB=1024 +tid=0, i,j = 219,117 nnzA= 1024, nnzB=1024 +tid=0, i,j = 284,139 nnzA= 1024, nnzB=1024 +tid=0, i,j = 609,102 nnzA= 1024, nnzB=1024 +tid=0, i,j = 715,121 nnzA= 1024, nnzB=1024 +tid=0, i,j = 239,107 nnzA= 1024, nnzB=1024 +tid=0, i,j = 601,162 nnzA= 1024, nnzB=1024 +tid=0, i,j = 816,163 nnzA= 1024, nnzB=1024 +tid=0, i,j = 959,158 nnzA= 1024, nnzB=1024 +tid=0, i,j = 492,133 nnzA= 1024, nnzB=1024 +tid=0, i,j = 108,136 nnzA= 1024, nnzB=1024 +tid=0, i,j = 570,132 nnzA= 1024, nnzB=1024 +tid=0, i,j = 297,160 nnzA= 1024, nnzB=1024 +tid=0, i,j = 714,157 nnzA= 1024, nnzB=1024 +tid=0, i,j = 995,146 nnzA= 1024, nnzB=1024 +tid=0, i,j = 123,143 nnzA= 1024, nnzB=1024 +tid=0, i,j = 484,151 nnzA= 1024, nnzB=1024 +tid=0, i,j = 355,131 nnzA= 1024, nnzB=1024 +tid=0, i,j = 717,150 nnzA= 1024, nnzB=1024 +tid=0, i,j = 887,140 nnzA= 1024, nnzB=1024 +tid=0, i,j = 621,123 nnzA= 1024, nnzB=1024 +tid=0, i,j = 554,121 nnzA= 1024, nnzB=1024 +tid=0, i,j = 486,93 nnzA= 1024, nnzB=1024 +tid=0, i,j = 87,142 nnzA= 1024, nnzB=1024 +tid=0, i,j = 194,146 nnzA= 1024, nnzB=1024 +tid=0, i,j = 281,100 nnzA= 1024, nnzB=1024 +tid=0, i,j = 477,120 nnzA= 1024, nnzB=1024 +tid=0, i,j = 552,145 nnzA= 1024, nnzB=1024 +tid=0, i,j = 754,81 nnzA= 1024, nnzB=1024 +tid=0, i,j = 884,109 nnzA= 1024, nnzB=1024 +tid=0, i,j = 433,77 nnzA= 1024, nnzB=1024 +tid=0, i,j = 131,108 nnzA= 1024, nnzB=1024 +tid=0, i,j = 595,174 nnzA= 1024, nnzB=1024 +tid=0, i,j = 253,84 nnzA= 1024, nnzB=1024 +tid=0, i,j = 295,135 nnzA= 1024, nnzB=1024 +tid=0, i,j = 855,75 nnzA= 1024, nnzB=1024 +tid=0, i,j = 652,106 nnzA= 1024, nnzB=1024 +tid=0, i,j = 807,130 nnzA= 1024, nnzB=1024 +tid=0, i,j = 953,192 nnzA= 1024, nnzB=1024 +tid=0, i,j = 644,192 nnzA= 1024, nnzB=1024 +tid=0, i,j = 321,168 nnzA= 1024, nnzB=1024 +tid=0, i,j = 15,171 nnzA= 1024, nnzB=1024 +tid=0, i,j = 997,187 nnzA= 1024, nnzB=1024 +tid=0, i,j = 990,190 nnzA= 1024, nnzB=1024 +tid=0, i,j = 111,189 nnzA= 1024, nnzB=1024 +tid=0, i,j = 675,182 nnzA= 1024, nnzB=1024 +tid=0, i,j = 879,167 nnzA= 1024, nnzB=1024 +tid=0, i,j = 949,149 nnzA= 1024, nnzB=1024 +tid=0, i,j = 17,177 nnzA= 1024, nnzB=1024 +tid=0, i,j = 290,156 nnzA= 1024, nnzB=1024 +tid=0, i,j = 450,185 nnzA= 1024, nnzB=1024 +tid=0, i,j = 801,166 nnzA= 1024, nnzB=1024 +tid=0, i,j = 36,126 nnzA= 1024, nnzB=1024 +tid=0, i,j = 614,176 nnzA= 1024, nnzB=1024 +tid=0, i,j = 143,184 nnzA= 1024, nnzB=1024 +tid=0, i,j = 830,180 nnzA= 1024, nnzB=1024 +tid=0, i,j = 357,148 nnzA= 1024, nnzB=1024 +tid=0, i,j = 253,131 nnzA= 1024, nnzB=1024 +tid=0, i,j = 519,114 nnzA= 1024, nnzB=1024 +tid=0, i,j = 108,176 nnzA= 1024, nnzB=1024 +tid=0, i,j = 932,179 nnzA= 1024, nnzB=1024 +tid=0, i,j = 779,112 nnzA= 1024, nnzB=1024 +tid=0, i,j = 834,137 nnzA= 1024, nnzB=1024 +tid=0, i,j = 288,138 nnzA= 1024, nnzB=1024 +tid=0, i,j = 198,116 nnzA= 1024, nnzB=1024 +tid=0, i,j = 612,170 nnzA= 1024, nnzB=1024 +tid=0, i,j = 905,111 nnzA= 1024, nnzB=1024 +tid=0, i,j = 821,134 nnzA= 1024, nnzB=1024 +tid=0, i,j = 221,164 nnzA= 1024, nnzB=1024 +tid=0, i,j = 910,198 nnzA= 1024, nnzB=1024 +tid=0, i,j = 863,202 nnzA= 1024, nnzB=1024 +tid=0, i,j = 692,183 nnzA= 1024, nnzB=1024 +tid=0, i,j = 779,186 nnzA= 1024, nnzB=1024 +tid=0, i,j = 974,157 nnzA= 1024, nnzB=1024 +tid=0, i,j = 859,197 nnzA= 1024, nnzB=1024 +tid=0, i,j = 922,195 nnzA= 1024, nnzB=1024 +tid=0, i,j = 1001,182 nnzA= 1024, nnzB=1024 +tid=0, i,j = 396,165 nnzA= 1024, nnzB=1024 +tid=0, i,j = 199,141 nnzA= 1024, nnzB=1024 +tid=0, i,j = 225,142 nnzA= 1024, nnzB=1024 +tid=0, i,j = 951,172 nnzA= 1024, nnzB=1024 +tid=0, i,j = 0,174 nnzA= 1024, nnzB=1024 +tid=0, i,j = 574,144 nnzA= 1024, nnzB=1024 +tid=0, i,j = 907,201 nnzA= 1024, nnzB=1024 +tid=0, i,j = 945,139 nnzA= 1024, nnzB=1024 +tid=0, i,j = 901,169 nnzA= 1024, nnzB=1024 +tid=0, i,j = 135,193 nnzA= 1024, nnzB=1024 +tid=0, i,j = 71,211 nnzA= 1024, nnzB=1024 +tid=0, i,j = 865,203 nnzA= 1024, nnzB=1024 +tid=0, i,j = 357,188 nnzA= 1024, nnzB=1024 +tid=0, i,j = 704,220 nnzA= 1024, nnzB=1024 +tid=0, i,j = 550,216 nnzA= 1024, nnzB=1024 +tid=0, i,j = 545,222 nnzA= 1024, nnzB=1024 +tid=0, i,j = 704,218 nnzA= 1024, nnzB=1024 +tid=0, i,j = 426,212 nnzA= 1024, nnzB=1024 +tid=0, i,j = 721,224 nnzA= 1024, nnzB=1024 +tid=0, i,j = 935,225 nnzA= 1024, nnzB=1024 +tid=0, i,j = 268,215 nnzA= 1024, nnzB=1024 +tid=0, i,j = 614,204 nnzA= 1024, nnzB=1024 +tid=0, i,j = 66,219 nnzA= 1024, nnzB=1024 +tid=0, i,j = 808,209 nnzA= 1024, nnzB=1024 +tid=0, i,j = 922,218 nnzA= 1024, nnzB=1024 +tid=0, i,j = 781,176 nnzA= 1024, nnzB=1024 +tid=0, i,j = 520,234 nnzA= 1024, nnzB=1024 +tid=0, i,j = 382,230 nnzA= 1024, nnzB=1024 +tid=0, i,j = 535,229 nnzA= 1024, nnzB=1024 +tid=0, i,j = 551,231 nnzA= 1024, nnzB=1024 +tid=0, i,j = 826,207 nnzA= 1024, nnzB=1024 +tid=0, i,j = 631,178 nnzA= 1024, nnzB=1024 +tid=0, i,j = 137,194 nnzA= 1024, nnzB=1024 +tid=0, i,j = 56,221 nnzA= 1024, nnzB=1024 +tid=0, i,j = 188,176 nnzA= 1024, nnzB=1024 +tid=0, i,j = 921,217 nnzA= 1024, nnzB=1024 +tid=0, i,j = 1016,223 nnzA= 1024, nnzB=1024 +tid=0, i,j = 928,214 nnzA= 1024, nnzB=1024 +tid=0, i,j = 727,226 nnzA= 1024, nnzB=1024 +tid=0, i,j = 669,175 nnzA= 1024, nnzB=1024 +tid=0, i,j = 570,233 nnzA= 1024, nnzB=1024 +tid=0, i,j = 531,199 nnzA= 1024, nnzB=1024 +tid=0, i,j = 500,241 nnzA= 1024, nnzB=1024 +tid=0, i,j = 522,235 nnzA= 1024, nnzB=1024 +tid=0, i,j = 551,221 nnzA= 1024, nnzB=1024 +tid=0, i,j = 233,248 nnzA= 1024, nnzB=1024 +tid=0, i,j = 1004,244 nnzA= 1024, nnzB=1024 +tid=0, i,j = 197,252 nnzA= 1024, nnzB=1024 +tid=0, i,j = 326,246 nnzA= 1024, nnzB=1024 +tid=0, i,j = 40,254 nnzA= 1024, nnzB=1024 +tid=0, i,j = 193,242 nnzA= 1024, nnzB=1024 +tid=0, i,j = 221,236 nnzA= 1024, nnzB=1024 +tid=0, i,j = 238,254 nnzA= 1024, nnzB=1024 +tid=0, i,j = 588,244 nnzA= 1024, nnzB=1024 +tid=0, i,j = 147,247 nnzA= 1024, nnzB=1024 +tid=0, i,j = 931,211 nnzA= 1024, nnzB=1024 +tid=0, i,j = 796,257 nnzA= 1024, nnzB=1024 +tid=0, i,j = 368,262 nnzA= 1024, nnzB=1024 +tid=0, i,j = 115,247 nnzA= 1024, nnzB=1024 +tid=0, i,j = 82,239 nnzA= 1024, nnzB=1024 +tid=0, i,j = 485,250 nnzA= 1024, nnzB=1024 +tid=0, i,j = 461,257 nnzA= 1024, nnzB=1024 +tid=0, i,j = 743,228 nnzA= 1024, nnzB=1024 +tid=0, i,j = 152,213 nnzA= 1024, nnzB=1024 +tid=0, i,j = 494,245 nnzA= 1024, nnzB=1024 +tid=0, i,j = 755,237 nnzA= 1024, nnzB=1024 +tid=0, i,j = 233,258 nnzA= 1024, nnzB=1024 +tid=0, i,j = 485,253 nnzA= 1024, nnzB=1024 +tid=0, i,j = 945,261 nnzA= 1024, nnzB=1024 +tid=0, i,j = 659,210 nnzA= 1024, nnzB=1024 +tid=0, i,j = 985,208 nnzA= 1024, nnzB=1024 +tid=0, i,j = 897,232 nnzA= 1024, nnzB=1024 +tid=0, i,j = 895,255 nnzA= 1024, nnzB=1024 +tid=0, i,j = 300,243 nnzA= 1024, nnzB=1024 +tid=0, i,j = 708,251 nnzA= 1024, nnzB=1024 +tid=0, i,j = 333,276 nnzA= 1024, nnzB=1024 +tid=0, i,j = 32,285 nnzA= 1024, nnzB=1024 +tid=0, i,j = 179,282 nnzA= 1024, nnzB=1024 +tid=0, i,j = 877,270 nnzA= 1024, nnzB=1024 +tid=0, i,j = 755,264 nnzA= 1024, nnzB=1024 +tid=0, i,j = 126,287 nnzA= 1024, nnzB=1024 +tid=0, i,j = 877,279 nnzA= 1024, nnzB=1024 +tid=0, i,j = 394,287 nnzA= 1024, nnzB=1024 +tid=0, i,j = 124,265 nnzA= 1024, nnzB=1024 +tid=0, i,j = 979,274 nnzA= 1024, nnzB=1024 +tid=0, i,j = 449,281 nnzA= 1024, nnzB=1024 +tid=0, i,j = 944,271 nnzA= 1024, nnzB=1024 +tid=0, i,j = 562,292 nnzA= 1024, nnzB=1024 +tid=0, i,j = 106,295 nnzA= 1024, nnzB=1024 +tid=0, i,j = 18,280 nnzA= 1024, nnzB=1024 +tid=0, i,j = 114,256 nnzA= 1024, nnzB=1024 +tid=0, i,j = 377,277 nnzA= 1024, nnzB=1024 +tid=0, i,j = 466,294 nnzA= 1024, nnzB=1024 +tid=0, i,j = 1007,283 nnzA= 1024, nnzB=1024 +tid=0, i,j = 124,242 nnzA= 1024, nnzB=1024 +tid=0, i,j = 352,266 nnzA= 1024, nnzB=1024 +tid=0, i,j = 234,268 nnzA= 1024, nnzB=1024 +tid=0, i,j = 594,291 nnzA= 1024, nnzB=1024 +tid=0, i,j = 621,242 nnzA= 1024, nnzB=1024 +tid=0, i,j = 37,286 nnzA= 1024, nnzB=1024 +tid=0, i,j = 100,273 nnzA= 1024, nnzB=1024 +tid=0, i,j = 964,238 nnzA= 1024, nnzB=1024 +tid=0, i,j = 884,260 nnzA= 1024, nnzB=1024 +tid=0, i,j = 1,295 nnzA= 1024, nnzB=1024 +tid=0, i,j = 388,240 nnzA= 1024, nnzB=1024 +tid=0, i,j = 848,288 nnzA= 1024, nnzB=1024 +tid=0, i,j = 595,284 nnzA= 1024, nnzB=1024 +tid=0, i,j = 571,314 nnzA= 1024, nnzB=1024 +tid=0, i,j = 264,307 nnzA= 1024, nnzB=1024 +tid=0, i,j = 355,311 nnzA= 1024, nnzB=1024 +tid=0, i,j = 298,315 nnzA= 1024, nnzB=1024 +tid=0, i,j = 109,296 nnzA= 1024, nnzB=1024 +tid=0, i,j = 741,315 nnzA= 1024, nnzB=1024 +tid=0, i,j = 30,302 nnzA= 1024, nnzB=1024 +tid=0, i,j = 183,296 nnzA= 1024, nnzB=1024 +tid=0, i,j = 328,309 nnzA= 1024, nnzB=1024 +tid=0, i,j = 554,300 nnzA= 1024, nnzB=1024 +tid=0, i,j = 357,310 nnzA= 1024, nnzB=1024 +tid=0, i,j = 622,305 nnzA= 1024, nnzB=1024 +tid=0, i,j = 317,290 nnzA= 1024, nnzB=1024 +tid=0, i,j = 28,308 nnzA= 1024, nnzB=1024 +tid=0, i,j = 627,309 nnzA= 1024, nnzB=1024 +tid=0, i,j = 61,312 nnzA= 1024, nnzB=1024 +tid=0, i,j = 595,318 nnzA= 1024, nnzB=1024 +tid=0, i,j = 235,322 nnzA= 1024, nnzB=1024 +tid=0, i,j = 924,270 nnzA= 1024, nnzB=1024 +tid=0, i,j = 126,319 nnzA= 1024, nnzB=1024 +tid=0, i,j = 245,296 nnzA= 1024, nnzB=1024 +tid=0, i,j = 1006,297 nnzA= 1024, nnzB=1024 +tid=0, i,j = 323,317 nnzA= 1024, nnzB=1024 +tid=0, i,j = 177,315 nnzA= 1024, nnzB=1024 +tid=0, i,j = 67,272 nnzA= 1024, nnzB=1024 +tid=0, i,j = 73,321 nnzA= 1024, nnzB=1024 +tid=0, i,j = 960,294 nnzA= 1024, nnzB=1024 +tid=0, i,j = 229,304 nnzA= 1024, nnzB=1024 +tid=0, i,j = 400,269 nnzA= 1024, nnzB=1024 +tid=0, i,j = 10,267 nnzA= 1024, nnzB=1024 +tid=0, i,j = 177,316 nnzA= 1024, nnzB=1024 +tid=0, i,j = 758,313 nnzA= 1024, nnzB=1024 +tid=0, i,j = 695,334 nnzA= 1024, nnzB=1024 +tid=0, i,j = 993,338 nnzA= 1024, nnzB=1024 +tid=0, i,j = 678,343 nnzA= 1024, nnzB=1024 +tid=0, i,j = 36,341 nnzA= 1024, nnzB=1024 +tid=0, i,j = 775,336 nnzA= 1024, nnzB=1024 +tid=0, i,j = 384,344 nnzA= 1024, nnzB=1024 +tid=0, i,j = 375,323 nnzA= 1024, nnzB=1024 +tid=0, i,j = 176,329 nnzA= 1024, nnzB=1024 +tid=0, i,j = 795,332 nnzA= 1024, nnzB=1024 +tid=0, i,j = 651,323 nnzA= 1024, nnzB=1024 +tid=0, i,j = 704,327 nnzA= 1024, nnzB=1024 +tid=0, i,j = 608,337 nnzA= 1024, nnzB=1024 +tid=0, i,j = 308,316 nnzA= 1024, nnzB=1024 +tid=0, i,j = 680,339 nnzA= 1024, nnzB=1024 +tid=0, i,j = 694,335 nnzA= 1024, nnzB=1024 +tid=0, i,j = 808,336 nnzA= 1024, nnzB=1024 +tid=0, i,j = 60,348 nnzA= 1024, nnzB=1024 +tid=0, i,j = 142,351 nnzA= 1024, nnzB=1024 +tid=0, i,j = 821,348 nnzA= 1024, nnzB=1024 +tid=0, i,j = 549,324 nnzA= 1024, nnzB=1024 +tid=0, i,j = 996,347 nnzA= 1024, nnzB=1024 +tid=0, i,j = 774,301 nnzA= 1024, nnzB=1024 +tid=0, i,j = 487,325 nnzA= 1024, nnzB=1024 +tid=0, i,j = 723,342 nnzA= 1024, nnzB=1024 +tid=0, i,j = 645,303 nnzA= 1024, nnzB=1024 +tid=0, i,j = 282,350 nnzA= 1024, nnzB=1024 +tid=0, i,j = 468,320 nnzA= 1024, nnzB=1024 +tid=0, i,j = 965,330 nnzA= 1024, nnzB=1024 +tid=0, i,j = 912,297 nnzA= 1024, nnzB=1024 +tid=0, i,j = 680,344 nnzA= 1024, nnzB=1024 +tid=0, i,j = 159,299 nnzA= 1024, nnzB=1024 +tid=0, i,j = 849,340 nnzA= 1024, nnzB=1024 +tid=0, i,j = 990,365 nnzA= 1024, nnzB=1024 +tid=0, i,j = 471,372 nnzA= 1024, nnzB=1024 +tid=0, i,j = 669,374 nnzA= 1024, nnzB=1024 +tid=0, i,j = 684,369 nnzA= 1024, nnzB=1024 +tid=0, i,j = 971,366 nnzA= 1024, nnzB=1024 +tid=0, i,j = 937,351 nnzA= 1024, nnzB=1024 +tid=0, i,j = 992,375 nnzA= 1024, nnzB=1024 +tid=0, i,j = 629,364 nnzA= 1024, nnzB=1024 +tid=0, i,j = 679,360 nnzA= 1024, nnzB=1024 +tid=0, i,j = 160,352 nnzA= 1024, nnzB=1024 +tid=0, i,j = 587,368 nnzA= 1024, nnzB=1024 +tid=0, i,j = 989,361 nnzA= 1024, nnzB=1024 +tid=0, i,j = 270,370 nnzA= 1024, nnzB=1024 +tid=0, i,j = 841,366 nnzA= 1024, nnzB=1024 +tid=0, i,j = 816,382 nnzA= 1024, nnzB=1024 +tid=0, i,j = 592,379 nnzA= 1024, nnzB=1024 +tid=0, i,j = 888,367 nnzA= 1024, nnzB=1024 +tid=0, i,j = 77,380 nnzA= 1024, nnzB=1024 +tid=0, i,j = 75,345 nnzA= 1024, nnzB=1024 +tid=0, i,j = 536,353 nnzA= 1024, nnzB=1024 +tid=0, i,j = 882,378 nnzA= 1024, nnzB=1024 +tid=0, i,j = 142,328 nnzA= 1024, nnzB=1024 +tid=0, i,j = 848,330 nnzA= 1024, nnzB=1024 +tid=0, i,j = 1012,381 nnzA= 1024, nnzB=1024 +tid=0, i,j = 340,356 nnzA= 1024, nnzB=1024 +tid=0, i,j = 88,373 nnzA= 1024, nnzB=1024 +tid=0, i,j = 306,325 nnzA= 1024, nnzB=1024 +tid=0, i,j = 336,376 nnzA= 1024, nnzB=1024 +tid=0, i,j = 206,363 nnzA= 1024, nnzB=1024 +tid=0, i,j = 804,349 nnzA= 1024, nnzB=1024 +tid=0, i,j = 649,326 nnzA= 1024, nnzB=1024 +tid=0, i,j = 327,371 nnzA= 1024, nnzB=1024 +tid=0, i,j = 835,397 nnzA= 1024, nnzB=1024 +tid=0, i,j = 49,405 nnzA= 1024, nnzB=1024 +tid=0, i,j = 382,400 nnzA= 1024, nnzB=1024 +tid=0, i,j = 40,403 nnzA= 1024, nnzB=1024 +tid=0, i,j = 246,398 nnzA= 1024, nnzB=1024 +tid=0, i,j = 645,396 nnzA= 1024, nnzB=1024 +tid=0, i,j = 733,389 nnzA= 1024, nnzB=1024 +tid=0, i,j = 790,401 nnzA= 1024, nnzB=1024 +tid=0, i,j = 475,405 nnzA= 1024, nnzB=1024 +tid=0, i,j = 670,385 nnzA= 1024, nnzB=1024 +tid=0, i,j = 107,398 nnzA= 1024, nnzB=1024 +tid=0, i,j = 711,383 nnzA= 1024, nnzB=1024 +tid=0, i,j = 26,415 nnzA= 1024, nnzB=1024 +tid=0, i,j = 643,411 nnzA= 1024, nnzB=1024 +tid=0, i,j = 86,377 nnzA= 1024, nnzB=1024 +tid=0, i,j = 172,400 nnzA= 1024, nnzB=1024 +tid=0, i,j = 59,391 nnzA= 1024, nnzB=1024 +tid=0, i,j = 537,386 nnzA= 1024, nnzB=1024 +tid=0, i,j = 949,412 nnzA= 1024, nnzB=1024 +tid=0, i,j = 794,361 nnzA= 1024, nnzB=1024 +tid=0, i,j = 436,399 nnzA= 1024, nnzB=1024 +tid=0, i,j = 816,362 nnzA= 1024, nnzB=1024 +tid=0, i,j = 754,410 nnzA= 1024, nnzB=1024 +tid=0, i,j = 641,404 nnzA= 1024, nnzB=1024 +tid=0, i,j = 991,414 nnzA= 1024, nnzB=1024 +tid=0, i,j = 494,388 nnzA= 1024, nnzB=1024 +tid=0, i,j = 352,355 nnzA= 1024, nnzB=1024 +tid=0, i,j = 320,407 nnzA= 1024, nnzB=1024 +tid=0, i,j = 692,394 nnzA= 1024, nnzB=1024 +tid=0, i,j = 678,358 nnzA= 1024, nnzB=1024 +tid=0, i,j = 643,380 nnzA= 1024, nnzB=1024 +tid=0, i,j = 320,402 nnzA= 1024, nnzB=1024 +tid=0, i,j = 377,426 nnzA= 1024, nnzB=1024 +tid=0, i,j = 105,437 nnzA= 1024, nnzB=1024 +tid=0, i,j = 51,431 nnzA= 1024, nnzB=1024 +tid=0, i,j = 152,435 nnzA= 1024, nnzB=1024 +tid=0, i,j = 950,428 nnzA= 1024, nnzB=1024 +tid=0, i,j = 857,432 nnzA= 1024, nnzB=1024 +tid=0, i,j = 263,425 nnzA= 1024, nnzB=1024 +tid=0, i,j = 748,420 nnzA= 1024, nnzB=1024 +tid=0, i,j = 366,417 nnzA= 1024, nnzB=1024 +tid=0, i,j = 575,416 nnzA= 1024, nnzB=1024 +tid=0, i,j = 970,447 nnzA= 1024, nnzB=1024 +tid=0, i,j = 753,443 nnzA= 1024, nnzB=1024 +tid=0, i,j = 814,438 nnzA= 1024, nnzB=1024 +tid=0, i,j = 61,408 nnzA= 1024, nnzB=1024 +tid=0, i,j = 177,422 nnzA= 1024, nnzB=1024 +tid=0, i,j = 149,427 nnzA= 1024, nnzB=1024 +tid=0, i,j = 160,418 nnzA= 1024, nnzB=1024 +tid=0, i,j = 669,444 nnzA= 1024, nnzB=1024 +tid=0, i,j = 551,390 nnzA= 1024, nnzB=1024 +tid=0, i,j = 718,430 nnzA= 1024, nnzB=1024 +tid=0, i,j = 880,442 nnzA= 1024, nnzB=1024 +tid=0, i,j = 305,429 nnzA= 1024, nnzB=1024 +tid=0, i,j = 356,436 nnzA= 1024, nnzB=1024 +tid=0, i,j = 600,391 nnzA= 1024, nnzB=1024 +tid=0, i,j = 209,419 nnzA= 1024, nnzB=1024 +tid=0, i,j = 328,389 nnzA= 1024, nnzB=1024 +tid=0, i,j = 741,446 nnzA= 1024, nnzB=1024 +tid=0, i,j = 542,424 nnzA= 1024, nnzB=1024 +tid=0, i,j = 347,387 nnzA= 1024, nnzB=1024 +tid=0, i,j = 338,440 nnzA= 1024, nnzB=1024 +tid=0, i,j = 94,413 nnzA= 1024, nnzB=1024 +tid=0, i,j = 604,434 nnzA= 1024, nnzB=1024 +tid=0, i,j = 985,456 nnzA= 1024, nnzB=1024 +tid=0, i,j = 849,456 nnzA= 1024, nnzB=1024 +tid=0, i,j = 868,452 nnzA= 1024, nnzB=1024 +tid=0, i,j = 646,448 nnzA= 1024, nnzB=1024 +tid=0, i,j = 744,448 nnzA= 1024, nnzB=1024 +tid=0, i,j = 415,454 nnzA= 1024, nnzB=1024 +tid=0, i,j = 982,441 nnzA= 1024, nnzB=1024 +tid=0, i,j = 218,457 nnzA= 1024, nnzB=1024 +tid=0, i,j = 835,449 nnzA= 1024, nnzB=1024 +tid=0, i,j = 614,421 nnzA= 1024, nnzB=1024 +tid=0, i,j = 147,451 nnzA= 1024, nnzB=1024 +tid=0, i,j = 873,423 nnzA= 1024, nnzB=1024 +tid=0, i,j = 285,419 nnzA= 1024, nnzB=1024 +tid=0, i,j = 43,455 nnzA= 1024, nnzB=1024 +tid=0, i,j = 669,418 nnzA= 1024, nnzB=1024 +tid=0, i,j = 952,445 nnzA= 1024, nnzB=1024 +tid=0, i,j = 26,453 nnzA= 1024, nnzB=1024 +tid=0, i,j = 668,454 nnzA= 1024, nnzB=1024 +tid=0, i,j = 1017,451 nnzA= 1024, nnzB=1024 +tid=0, i,j = 579,450 nnzA= 1024, nnzB=1024 +tid=0, i,j = 849,461 nnzA= 1024, nnzB=1024 +tid=0, i,j = 801,465 nnzA= 1024, nnzB=1024 +tid=0, i,j = 900,462 nnzA= 1024, nnzB=1024 +tid=0, i,j = 112,467 nnzA= 1024, nnzB=1024 +tid=0, i,j = 917,461 nnzA= 1024, nnzB=1024 +tid=0, i,j = 316,463 nnzA= 1024, nnzB=1024 +tid=0, i,j = 510,458 nnzA= 1024, nnzB=1024 +tid=0, i,j = 836,460 nnzA= 1024, nnzB=1024 +tid=0, i,j = 889,470 nnzA= 1024, nnzB=1024 +tid=0, i,j = 762,464 nnzA= 1024, nnzB=1024 +tid=0, i,j = 737,459 nnzA= 1024, nnzB=1024 +tid=0, i,j = 673,466 nnzA= 1024, nnzB=1024 +tid=0, i,j = 355,465 nnzA= 1024, nnzB=1024 +tid=0, i,j = 288,468 nnzA= 1024, nnzB=1024 +tid=0, i,j = 680,482 nnzA= 1024, nnzB=1024 +tid=0, i,j = 802,489 nnzA= 1024, nnzB=1024 +tid=0, i,j = 194,487 nnzA= 1024, nnzB=1024 +tid=0, i,j = 558,481 nnzA= 1024, nnzB=1024 +tid=0, i,j = 811,490 nnzA= 1024, nnzB=1024 +tid=0, i,j = 44,476 nnzA= 1024, nnzB=1024 +tid=0, i,j = 667,480 nnzA= 1024, nnzB=1024 +tid=0, i,j = 650,471 nnzA= 1024, nnzB=1024 +tid=0, i,j = 382,475 nnzA= 1024, nnzB=1024 +tid=0, i,j = 274,485 nnzA= 1024, nnzB=1024 +tid=0, i,j = 342,477 nnzA= 1024, nnzB=1024 +tid=0, i,j = 1015,486 nnzA= 1024, nnzB=1024 +tid=0, i,j = 786,478 nnzA= 1024, nnzB=1024 +tid=0, i,j = 127,473 nnzA= 1024, nnzB=1024 +tid=0, i,j = 517,483 nnzA= 1024, nnzB=1024 +tid=0, i,j = 121,473 nnzA= 1024, nnzB=1024 +tid=0, i,j = 487,474 nnzA= 1024, nnzB=1024 +tid=0, i,j = 961,484 nnzA= 1024, nnzB=1024 +tid=0, i,j = 319,491 nnzA= 1024, nnzB=1024 +tid=0, i,j = 377,492 nnzA= 1024, nnzB=1024 +tid=0, i,j = 1013,521 nnzA= 1024, nnzB=1024 +tid=0, i,j = 267,496 nnzA= 1024, nnzB=1024 +tid=0, i,j = 809,495 nnzA= 1024, nnzB=1024 +tid=0, i,j = 194,498 nnzA= 1024, nnzB=1024 +tid=0, i,j = 704,501 nnzA= 1024, nnzB=1024 +tid=0, i,j = 362,525 nnzA= 1024, nnzB=1024 +tid=0, i,j = 84,500 nnzA= 1024, nnzB=1024 +tid=0, i,j = 284,522 nnzA= 1024, nnzB=1024 +tid=0, i,j = 270,520 nnzA= 1024, nnzB=1024 +tid=0, i,j = 432,494 nnzA= 1024, nnzB=1024 +tid=0, i,j = 94,525 nnzA= 1024, nnzB=1024 +tid=0, i,j = 271,513 nnzA= 1024, nnzB=1024 +tid=0, i,j = 61,527 nnzA= 1024, nnzB=1024 +tid=0, i,j = 758,515 nnzA= 1024, nnzB=1024 +tid=0, i,j = 404,514 nnzA= 1024, nnzB=1024 +tid=0, i,j = 643,507 nnzA= 1024, nnzB=1024 +tid=0, i,j = 369,517 nnzA= 1024, nnzB=1024 +tid=0, i,j = 887,504 nnzA= 1024, nnzB=1024 +tid=0, i,j = 945,524 nnzA= 1024, nnzB=1024 +tid=0, i,j = 952,499 nnzA= 1024, nnzB=1024 +tid=0, i,j = 519,503 nnzA= 1024, nnzB=1024 +tid=0, i,j = 892,512 nnzA= 1024, nnzB=1024 +tid=0, i,j = 510,504 nnzA= 1024, nnzB=1024 +tid=0, i,j = 902,496 nnzA= 1024, nnzB=1024 +tid=0, i,j = 449,508 nnzA= 1024, nnzB=1024 +tid=0, i,j = 574,505 nnzA= 1024, nnzB=1024 +tid=0, i,j = 52,526 nnzA= 1024, nnzB=1024 +tid=0, i,j = 293,518 nnzA= 1024, nnzB=1024 +tid=0, i,j = 632,523 nnzA= 1024, nnzB=1024 +tid=0, i,j = 786,519 nnzA= 1024, nnzB=1024 +tid=0, i,j = 294,528 nnzA= 1024, nnzB=1024 +tid=0, i,j = 505,534 nnzA= 1024, nnzB=1024 +tid=0, i,j = 908,531 nnzA= 1024, nnzB=1024 +tid=0, i,j = 137,542 nnzA= 1024, nnzB=1024 +tid=0, i,j = 500,536 nnzA= 1024, nnzB=1024 +tid=0, i,j = 710,552 nnzA= 1024, nnzB=1024 +tid=0, i,j = 910,557 nnzA= 1024, nnzB=1024 +tid=0, i,j = 998,529 nnzA= 1024, nnzB=1024 +tid=0, i,j = 792,560 nnzA= 1024, nnzB=1024 +tid=0, i,j = 767,547 nnzA= 1024, nnzB=1024 +tid=0, i,j = 927,550 nnzA= 1024, nnzB=1024 +tid=0, i,j = 391,553 nnzA= 1024, nnzB=1024 +tid=0, i,j = 1018,547 nnzA= 1024, nnzB=1024 +tid=0, i,j = 46,548 nnzA= 1024, nnzB=1024 +tid=0, i,j = 26,556 nnzA= 1024, nnzB=1024 +tid=0, i,j = 660,535 nnzA= 1024, nnzB=1024 +tid=0, i,j = 861,538 nnzA= 1024, nnzB=1024 +tid=0, i,j = 10,555 nnzA= 1024, nnzB=1024 +tid=0, i,j = 492,538 nnzA= 1024, nnzB=1024 +tid=0, i,j = 112,530 nnzA= 1024, nnzB=1024 +tid=0, i,j = 799,537 nnzA= 1024, nnzB=1024 +tid=0, i,j = 145,529 nnzA= 1024, nnzB=1024 +tid=0, i,j = 674,533 nnzA= 1024, nnzB=1024 +tid=0, i,j = 776,535 nnzA= 1024, nnzB=1024 +tid=0, i,j = 912,547 nnzA= 1024, nnzB=1024 +tid=0, i,j = 552,558 nnzA= 1024, nnzB=1024 +tid=0, i,j = 245,540 nnzA= 1024, nnzB=1024 +tid=0, i,j = 658,545 nnzA= 1024, nnzB=1024 +tid=0, i,j = 351,554 nnzA= 1024, nnzB=1024 +tid=0, i,j = 697,548 nnzA= 1024, nnzB=1024 +tid=0, i,j = 213,546 nnzA= 1024, nnzB=1024 +tid=0, i,j = 602,549 nnzA= 1024, nnzB=1024 +tid=0, i,j = 597,561 nnzA= 1024, nnzB=1024 +tid=0, i,j = 490,565 nnzA= 1024, nnzB=1024 +tid=0, i,j = 872,569 nnzA= 1024, nnzB=1024 +tid=0, i,j = 603,574 nnzA= 1024, nnzB=1024 +tid=0, i,j = 914,588 nnzA= 1024, nnzB=1024 +tid=0, i,j = 310,564 nnzA= 1024, nnzB=1024 +tid=0, i,j = 182,580 nnzA= 1024, nnzB=1024 +tid=0, i,j = 177,582 nnzA= 1024, nnzB=1024 +tid=0, i,j = 877,563 nnzA= 1024, nnzB=1024 +tid=0, i,j = 118,583 nnzA= 1024, nnzB=1024 +tid=0, i,j = 271,571 nnzA= 1024, nnzB=1024 +tid=0, i,j = 564,566 nnzA= 1024, nnzB=1024 +tid=0, i,j = 919,571 nnzA= 1024, nnzB=1024 +tid=0, i,j = 862,562 nnzA= 1024, nnzB=1024 +tid=0, i,j = 254,587 nnzA= 1024, nnzB=1024 +tid=0, i,j = 1016,589 nnzA= 1024, nnzB=1024 +tid=0, i,j = 802,590 nnzA= 1024, nnzB=1024 +tid=0, i,j = 609,564 nnzA= 1024, nnzB=1024 +tid=0, i,j = 607,566 nnzA= 1024, nnzB=1024 +tid=0, i,j = 445,581 nnzA= 1024, nnzB=1024 +tid=0, i,j = 182,562 nnzA= 1024, nnzB=1024 +tid=0, i,j = 95,590 nnzA= 1024, nnzB=1024 +tid=0, i,j = 630,572 nnzA= 1024, nnzB=1024 +tid=0, i,j = 433,585 nnzA= 1024, nnzB=1024 +tid=0, i,j = 399,584 nnzA= 1024, nnzB=1024 +tid=0, i,j = 274,579 nnzA= 1024, nnzB=1024 +tid=0, i,j = 465,570 nnzA= 1024, nnzB=1024 +tid=0, i,j = 256,576 nnzA= 1024, nnzB=1024 +tid=0, i,j = 787,595 nnzA= 1024, nnzB=1024 +tid=0, i,j = 759,611 nnzA= 1024, nnzB=1024 +tid=0, i,j = 127,599 nnzA= 1024, nnzB=1024 +tid=0, i,j = 832,608 nnzA= 1024, nnzB=1024 +tid=0, i,j = 527,592 nnzA= 1024, nnzB=1024 +tid=0, i,j = 76,605 nnzA= 1024, nnzB=1024 +tid=0, i,j = 478,601 nnzA= 1024, nnzB=1024 +tid=0, i,j = 794,610 nnzA= 1024, nnzB=1024 +tid=0, i,j = 270,604 nnzA= 1024, nnzB=1024 +tid=0, i,j = 430,594 nnzA= 1024, nnzB=1024 +tid=0, i,j = 143,593 nnzA= 1024, nnzB=1024 +tid=0, i,j = 5,620 nnzA= 1024, nnzB=1024 +tid=0, i,j = 928,616 nnzA= 1024, nnzB=1024 +tid=0, i,j = 41,621 nnzA= 1024, nnzB=1024 +tid=0, i,j = 788,598 nnzA= 1024, nnzB=1024 +tid=0, i,j = 996,619 nnzA= 1024, nnzB=1024 +tid=0, i,j = 978,591 nnzA= 1024, nnzB=1024 +tid=0, i,j = 398,613 nnzA= 1024, nnzB=1024 +tid=0, i,j = 287,609 nnzA= 1024, nnzB=1024 +tid=0, i,j = 79,623 nnzA= 1024, nnzB=1024 +tid=0, i,j = 1006,612 nnzA= 1024, nnzB=1024 +tid=0, i,j = 339,600 nnzA= 1024, nnzB=1024 +tid=0, i,j = 386,614 nnzA= 1024, nnzB=1024 +tid=0, i,j = 44,623 nnzA= 1024, nnzB=1024 +tid=0, i,j = 218,602 nnzA= 1024, nnzB=1024 +tid=0, i,j = 337,620 nnzA= 1024, nnzB=1024 +tid=0, i,j = 677,596 nnzA= 1024, nnzB=1024 +tid=0, i,j = 759,603 nnzA= 1024, nnzB=1024 +tid=0, i,j = 30,617 nnzA= 1024, nnzB=1024 +tid=0, i,j = 930,606 nnzA= 1024, nnzB=1024 +tid=0, i,j = 115,615 nnzA= 1024, nnzB=1024 +tid=0, i,j = 361,618 nnzA= 1024, nnzB=1024 +tid=0, i,j = 113,638 nnzA= 1024, nnzB=1024 +tid=0, i,j = 396,628 nnzA= 1024, nnzB=1024 +tid=0, i,j = 73,632 nnzA= 1024, nnzB=1024 +tid=0, i,j = 242,624 nnzA= 1024, nnzB=1024 +tid=0, i,j = 614,649 nnzA= 1024, nnzB=1024 +tid=0, i,j = 19,624 nnzA= 1024, nnzB=1024 +tid=0, i,j = 957,650 nnzA= 1024, nnzB=1024 +tid=0, i,j = 617,636 nnzA= 1024, nnzB=1024 +tid=0, i,j = 341,630 nnzA= 1024, nnzB=1024 +tid=0, i,j = 716,637 nnzA= 1024, nnzB=1024 +tid=0, i,j = 285,637 nnzA= 1024, nnzB=1024 +tid=0, i,j = 361,627 nnzA= 1024, nnzB=1024 +tid=0, i,j = 683,625 nnzA= 1024, nnzB=1024 +tid=0, i,j = 912,634 nnzA= 1024, nnzB=1024 +tid=0, i,j = 966,623 nnzA= 1024, nnzB=1024 +tid=0, i,j = 934,643 nnzA= 1024, nnzB=1024 +tid=0, i,j = 882,629 nnzA= 1024, nnzB=1024 +tid=0, i,j = 49,631 nnzA= 1024, nnzB=1024 +tid=0, i,j = 616,639 nnzA= 1024, nnzB=1024 +tid=0, i,j = 973,652 nnzA= 1024, nnzB=1024 +tid=0, i,j = 60,653 nnzA= 1024, nnzB=1024 +tid=0, i,j = 524,624 nnzA= 1024, nnzB=1024 +tid=0, i,j = 367,638 nnzA= 1024, nnzB=1024 +tid=0, i,j = 783,647 nnzA= 1024, nnzB=1024 +tid=0, i,j = 457,641 nnzA= 1024, nnzB=1024 +tid=0, i,j = 585,631 nnzA= 1024, nnzB=1024 +tid=0, i,j = 195,648 nnzA= 1024, nnzB=1024 +tid=0, i,j = 837,640 nnzA= 1024, nnzB=1024 +tid=0, i,j = 333,653 nnzA= 1024, nnzB=1024 +tid=0, i,j = 281,651 nnzA= 1024, nnzB=1024 +tid=0, i,j = 51,626 nnzA= 1024, nnzB=1024 +tid=0, i,j = 882,635 nnzA= 1024, nnzB=1024 +tid=0, i,j = 158,664 nnzA= 1024, nnzB=1024 +tid=0, i,j = 300,669 nnzA= 1024, nnzB=1024 +tid=0, i,j = 454,675 nnzA= 1024, nnzB=1024 +tid=0, i,j = 503,666 nnzA= 1024, nnzB=1024 +tid=0, i,j = 699,681 nnzA= 1024, nnzB=1024 +tid=0, i,j = 819,672 nnzA= 1024, nnzB=1024 +tid=0, i,j = 910,655 nnzA= 1024, nnzB=1024 +tid=0, i,j = 349,656 nnzA= 1024, nnzB=1024 +tid=0, i,j = 141,671 nnzA= 1024, nnzB=1024 +tid=0, i,j = 819,674 nnzA= 1024, nnzB=1024 +tid=0, i,j = 36,674 nnzA= 1024, nnzB=1024 +tid=0, i,j = 767,663 nnzA= 1024, nnzB=1024 +tid=0, i,j = 77,665 nnzA= 1024, nnzB=1024 +tid=0, i,j = 117,682 nnzA= 1024, nnzB=1024 +tid=0, i,j = 591,660 nnzA= 1024, nnzB=1024 +tid=0, i,j = 951,667 nnzA= 1024, nnzB=1024 +tid=0, i,j = 17,685 nnzA= 1024, nnzB=1024 +tid=0, i,j = 605,654 nnzA= 1024, nnzB=1024 +tid=0, i,j = 907,684 nnzA= 1024, nnzB=1024 +tid=0, i,j = 899,679 nnzA= 1024, nnzB=1024 +tid=0, i,j = 289,677 nnzA= 1024, nnzB=1024 +tid=0, i,j = 110,683 nnzA= 1024, nnzB=1024 +tid=0, i,j = 202,686 nnzA= 1024, nnzB=1024 +tid=0, i,j = 17,659 nnzA= 1024, nnzB=1024 +tid=0, i,j = 512,661 nnzA= 1024, nnzB=1024 +tid=0, i,j = 647,677 nnzA= 1024, nnzB=1024 +tid=0, i,j = 398,680 nnzA= 1024, nnzB=1024 +tid=0, i,j = 802,678 nnzA= 1024, nnzB=1024 +tid=0, i,j = 565,671 nnzA= 1024, nnzB=1024 +tid=0, i,j = 242,676 nnzA= 1024, nnzB=1024 +tid=0, i,j = 365,668 nnzA= 1024, nnzB=1024 +tid=0, i,j = 390,681 nnzA= 1024, nnzB=1024 +tid=0, i,j = 323,701 nnzA= 1024, nnzB=1024 +tid=0, i,j = 702,695 nnzA= 1024, nnzB=1024 +tid=0, i,j = 76,707 nnzA= 1024, nnzB=1024 +tid=0, i,j = 502,689 nnzA= 1024, nnzB=1024 +tid=0, i,j = 287,688 nnzA= 1024, nnzB=1024 +tid=0, i,j = 631,698 nnzA= 1024, nnzB=1024 +tid=0, i,j = 102,696 nnzA= 1024, nnzB=1024 +tid=0, i,j = 374,704 nnzA= 1024, nnzB=1024 +tid=0, i,j = 511,706 nnzA= 1024, nnzB=1024 +tid=0, i,j = 579,705 nnzA= 1024, nnzB=1024 +tid=0, i,j = 378,694 nnzA= 1024, nnzB=1024 +tid=0, i,j = 777,702 nnzA= 1024, nnzB=1024 +tid=0, i,j = 462,711 nnzA= 1024, nnzB=1024 +tid=0, i,j = 392,691 nnzA= 1024, nnzB=1024 +tid=0, i,j = 152,699 nnzA= 1024, nnzB=1024 +tid=0, i,j = 45,687 nnzA= 1024, nnzB=1024 +tid=0, i,j = 475,714 nnzA= 1024, nnzB=1024 +tid=0, i,j = 872,709 nnzA= 1024, nnzB=1024 +tid=0, i,j = 299,690 nnzA= 1024, nnzB=1024 +tid=0, i,j = 600,692 nnzA= 1024, nnzB=1024 +tid=0, i,j = 172,715 nnzA= 1024, nnzB=1024 +tid=0, i,j = 107,710 nnzA= 1024, nnzB=1024 +tid=0, i,j = 293,710 nnzA= 1024, nnzB=1024 +tid=0, i,j = 925,708 nnzA= 1024, nnzB=1024 +tid=0, i,j = 873,709 nnzA= 1024, nnzB=1024 +tid=0, i,j = 840,700 nnzA= 1024, nnzB=1024 +tid=0, i,j = 259,708 nnzA= 1024, nnzB=1024 +tid=0, i,j = 210,711 nnzA= 1024, nnzB=1024 +tid=0, i,j = 132,703 nnzA= 1024, nnzB=1024 +tid=0, i,j = 697,716 nnzA= 1024, nnzB=1024 +tid=0, i,j = 480,724 nnzA= 1024, nnzB=1024 +tid=0, i,j = 797,728 nnzA= 1024, nnzB=1024 +tid=0, i,j = 337,726 nnzA= 1024, nnzB=1024 +tid=0, i,j = 884,729 nnzA= 1024, nnzB=1024 +tid=0, i,j = 848,718 nnzA= 1024, nnzB=1024 +tid=0, i,j = 485,742 nnzA= 1024, nnzB=1024 +tid=0, i,j = 201,720 nnzA= 1024, nnzB=1024 +tid=0, i,j = 574,734 nnzA= 1024, nnzB=1024 +tid=0, i,j = 912,737 nnzA= 1024, nnzB=1024 +tid=0, i,j = 146,737 nnzA= 1024, nnzB=1024 +tid=0, i,j = 751,715 nnzA= 1024, nnzB=1024 +tid=0, i,j = 630,741 nnzA= 1024, nnzB=1024 +tid=0, i,j = 910,732 nnzA= 1024, nnzB=1024 +tid=0, i,j = 1023,738 nnzA= 1024, nnzB=1024 +tid=0, i,j = 806,721 nnzA= 1024, nnzB=1024 +tid=0, i,j = 335,739 nnzA= 1024, nnzB=1024 +tid=0, i,j = 596,740 nnzA= 1024, nnzB=1024 +tid=0, i,j = 675,723 nnzA= 1024, nnzB=1024 +tid=0, i,j = 934,722 nnzA= 1024, nnzB=1024 +tid=0, i,j = 725,720 nnzA= 1024, nnzB=1024 +tid=0, i,j = 331,719 nnzA= 1024, nnzB=1024 +tid=0, i,j = 275,731 nnzA= 1024, nnzB=1024 +tid=0, i,j = 415,722 nnzA= 1024, nnzB=1024 +tid=0, i,j = 177,727 nnzA= 1024, nnzB=1024 +tid=0, i,j = 365,741 nnzA= 1024, nnzB=1024 +tid=0, i,j = 620,717 nnzA= 1024, nnzB=1024 +tid=0, i,j = 763,733 nnzA= 1024, nnzB=1024 +tid=0, i,j = 234,717 nnzA= 1024, nnzB=1024 +tid=0, i,j = 268,735 nnzA= 1024, nnzB=1024 +tid=0, i,j = 115,736 nnzA= 1024, nnzB=1024 +tid=0, i,j = 767,730 nnzA= 1024, nnzB=1024 +tid=0, i,j = 621,745 nnzA= 1024, nnzB=1024 +tid=0, i,j = 592,753 nnzA= 1024, nnzB=1024 +tid=0, i,j = 847,758 nnzA= 1024, nnzB=1024 +tid=0, i,j = 56,773 nnzA= 1024, nnzB=1024 +tid=0, i,j = 472,753 nnzA= 1024, nnzB=1024 +tid=0, i,j = 91,747 nnzA= 1024, nnzB=1024 +tid=0, i,j = 633,756 nnzA= 1024, nnzB=1024 +tid=0, i,j = 936,766 nnzA= 1024, nnzB=1024 +tid=0, i,j = 954,763 nnzA= 1024, nnzB=1024 +tid=0, i,j = 487,771 nnzA= 1024, nnzB=1024 +tid=0, i,j = 381,760 nnzA= 1024, nnzB=1024 +tid=0, i,j = 59,749 nnzA= 1024, nnzB=1024 +tid=0, i,j = 742,752 nnzA= 1024, nnzB=1024 +tid=0, i,j = 915,766 nnzA= 1024, nnzB=1024 +tid=0, i,j = 745,752 nnzA= 1024, nnzB=1024 +tid=0, i,j = 186,743 nnzA= 1024, nnzB=1024 +tid=0, i,j = 458,770 nnzA= 1024, nnzB=1024 +tid=0, i,j = 755,749 nnzA= 1024, nnzB=1024 +tid=0, i,j = 340,760 nnzA= 1024, nnzB=1024 +tid=0, i,j = 307,768 nnzA= 1024, nnzB=1024 +tid=0, i,j = 313,752 nnzA= 1024, nnzB=1024 +tid=0, i,j = 372,767 nnzA= 1024, nnzB=1024 +tid=0, i,j = 348,751 nnzA= 1024, nnzB=1024 +tid=0, i,j = 103,771 nnzA= 1024, nnzB=1024 +tid=0, i,j = 1007,754 nnzA= 1024, nnzB=1024 +tid=0, i,j = 886,748 nnzA= 1024, nnzB=1024 +tid=0, i,j = 645,745 nnzA= 1024, nnzB=1024 +tid=0, i,j = 273,746 nnzA= 1024, nnzB=1024 +tid=0, i,j = 913,764 nnzA= 1024, nnzB=1024 +tid=0, i,j = 392,764 nnzA= 1024, nnzB=1024 +tid=0, i,j = 500,759 nnzA= 1024, nnzB=1024 +tid=0, i,j = 962,762 nnzA= 1024, nnzB=1024 +tid=0, i,j = 115,775 nnzA= 1024, nnzB=1024 +tid=0, i,j = 1022,804 nnzA= 1024, nnzB=1024 +tid=0, i,j = 462,786 nnzA= 1024, nnzB=1024 +tid=0, i,j = 696,789 nnzA= 1024, nnzB=1024 +tid=0, i,j = 816,785 nnzA= 1024, nnzB=1024 +tid=0, i,j = 893,778 nnzA= 1024, nnzB=1024 +tid=0, i,j = 273,788 nnzA= 1024, nnzB=1024 +tid=0, i,j = 896,803 nnzA= 1024, nnzB=1024 +tid=0, i,j = 256,780 nnzA= 1024, nnzB=1024 +tid=0, i,j = 880,797 nnzA= 1024, nnzB=1024 +tid=0, i,j = 809,793 nnzA= 1024, nnzB=1024 +tid=0, i,j = 614,783 nnzA= 1024, nnzB=1024 +tid=0, i,j = 636,792 nnzA= 1024, nnzB=1024 +tid=0, i,j = 899,781 nnzA= 1024, nnzB=1024 +tid=0, i,j = 793,791 nnzA= 1024, nnzB=1024 +tid=0, i,j = 54,784 nnzA= 1024, nnzB=1024 +tid=0, i,j = 347,796 nnzA= 1024, nnzB=1024 +tid=0, i,j = 773,774 nnzA= 1024, nnzB=1024 +tid=0, i,j = 399,782 nnzA= 1024, nnzB=1024 +tid=0, i,j = 876,787 nnzA= 1024, nnzB=1024 +tid=0, i,j = 802,798 nnzA= 1024, nnzB=1024 +tid=0, i,j = 294,799 nnzA= 1024, nnzB=1024 +tid=0, i,j = 892,782 nnzA= 1024, nnzB=1024 +tid=0, i,j = 970,800 nnzA= 1024, nnzB=1024 +tid=0, i,j = 537,776 nnzA= 1024, nnzB=1024 +tid=0, i,j = 130,801 nnzA= 1024, nnzB=1024 +tid=0, i,j = 644,779 nnzA= 1024, nnzB=1024 +tid=0, i,j = 392,777 nnzA= 1024, nnzB=1024 +tid=0, i,j = 986,794 nnzA= 1024, nnzB=1024 +tid=0, i,j = 471,790 nnzA= 1024, nnzB=1024 +tid=0, i,j = 955,792 nnzA= 1024, nnzB=1024 +tid=0, i,j = 656,795 nnzA= 1024, nnzB=1024 +tid=0, i,j = 479,805 nnzA= 1024, nnzB=1024 +tid=0, i,j = 985,818 nnzA= 1024, nnzB=1024 +tid=0, i,j = 430,817 nnzA= 1024, nnzB=1024 +tid=0, i,j = 839,821 nnzA= 1024, nnzB=1024 +tid=0, i,j = 115,835 nnzA= 1024, nnzB=1024 +tid=0, i,j = 646,810 nnzA= 1024, nnzB=1024 +tid=0, i,j = 747,822 nnzA= 1024, nnzB=1024 +tid=0, i,j = 625,827 nnzA= 1024, nnzB=1024 +tid=0, i,j = 982,831 nnzA= 1024, nnzB=1024 +tid=0, i,j = 719,809 nnzA= 1024, nnzB=1024 +tid=0, i,j = 408,815 nnzA= 1024, nnzB=1024 +tid=0, i,j = 547,811 nnzA= 1024, nnzB=1024 +tid=0, i,j = 295,834 nnzA= 1024, nnzB=1024 +tid=0, i,j = 915,830 nnzA= 1024, nnzB=1024 +tid=0, i,j = 902,816 nnzA= 1024, nnzB=1024 +tid=0, i,j = 406,825 nnzA= 1024, nnzB=1024 +tid=0, i,j = 886,824 nnzA= 1024, nnzB=1024 +tid=0, i,j = 1003,832 nnzA= 1024, nnzB=1024 +tid=0, i,j = 375,812 nnzA= 1024, nnzB=1024 +tid=0, i,j = 362,833 nnzA= 1024, nnzB=1024 +tid=0, i,j = 200,813 nnzA= 1024, nnzB=1024 +tid=0, i,j = 737,809 nnzA= 1024, nnzB=1024 +tid=0, i,j = 889,806 nnzA= 1024, nnzB=1024 +tid=0, i,j = 32,805 nnzA= 1024, nnzB=1024 +tid=0, i,j = 688,819 nnzA= 1024, nnzB=1024 +tid=0, i,j = 999,833 nnzA= 1024, nnzB=1024 +tid=0, i,j = 504,807 nnzA= 1024, nnzB=1024 +tid=0, i,j = 39,823 nnzA= 1024, nnzB=1024 +tid=0, i,j = 136,834 nnzA= 1024, nnzB=1024 +tid=0, i,j = 814,826 nnzA= 1024, nnzB=1024 +tid=0, i,j = 407,828 nnzA= 1024, nnzB=1024 +tid=0, i,j = 511,829 nnzA= 1024, nnzB=1024 +tid=0, i,j = 565,837 nnzA= 1024, nnzB=1024 +tid=0, i,j = 741,847 nnzA= 1024, nnzB=1024 +tid=0, i,j = 483,848 nnzA= 1024, nnzB=1024 +tid=0, i,j = 972,865 nnzA= 1024, nnzB=1024 +tid=0, i,j = 636,841 nnzA= 1024, nnzB=1024 +tid=0, i,j = 994,858 nnzA= 1024, nnzB=1024 +tid=0, i,j = 302,850 nnzA= 1024, nnzB=1024 +tid=0, i,j = 1022,864 nnzA= 1024, nnzB=1024 +tid=0, i,j = 694,845 nnzA= 1024, nnzB=1024 +tid=0, i,j = 567,851 nnzA= 1024, nnzB=1024 +tid=0, i,j = 533,846 nnzA= 1024, nnzB=1024 +tid=0, i,j = 1014,856 nnzA= 1024, nnzB=1024 +tid=0, i,j = 159,840 nnzA= 1024, nnzB=1024 +tid=0, i,j = 781,858 nnzA= 1024, nnzB=1024 +tid=0, i,j = 136,842 nnzA= 1024, nnzB=1024 +tid=0, i,j = 529,852 nnzA= 1024, nnzB=1024 +tid=0, i,j = 103,841 nnzA= 1024, nnzB=1024 +tid=0, i,j = 524,843 nnzA= 1024, nnzB=1024 +tid=0, i,j = 716,859 nnzA= 1024, nnzB=1024 +tid=0, i,j = 623,853 nnzA= 1024, nnzB=1024 +tid=0, i,j = 218,836 nnzA= 1024, nnzB=1024 +tid=0, i,j = 508,859 nnzA= 1024, nnzB=1024 +tid=0, i,j = 541,839 nnzA= 1024, nnzB=1024 +tid=0, i,j = 114,844 nnzA= 1024, nnzB=1024 +tid=0, i,j = 464,849 nnzA= 1024, nnzB=1024 +tid=0, i,j = 636,862 nnzA= 1024, nnzB=1024 +tid=0, i,j = 106,855 nnzA= 1024, nnzB=1024 +tid=0, i,j = 150,852 nnzA= 1024, nnzB=1024 +tid=0, i,j = 711,839 nnzA= 1024, nnzB=1024 +tid=0, i,j = 21,863 nnzA= 1024, nnzB=1024 +tid=0, i,j = 650,857 nnzA= 1024, nnzB=1024 +tid=0, i,j = 151,857 nnzA= 1024, nnzB=1024 +tid=0, i,j = 48,867 nnzA= 1024, nnzB=1024 +tid=0, i,j = 990,877 nnzA= 1024, nnzB=1024 +tid=0, i,j = 176,880 nnzA= 1024, nnzB=1024 +tid=0, i,j = 775,872 nnzA= 1024, nnzB=1024 +tid=0, i,j = 687,900 nnzA= 1024, nnzB=1024 +tid=0, i,j = 357,876 nnzA= 1024, nnzB=1024 +tid=0, i,j = 929,883 nnzA= 1024, nnzB=1024 +tid=0, i,j = 42,897 nnzA= 1024, nnzB=1024 +tid=0, i,j = 722,877 nnzA= 1024, nnzB=1024 +tid=0, i,j = 608,882 nnzA= 1024, nnzB=1024 +tid=0, i,j = 653,892 nnzA= 1024, nnzB=1024 +tid=0, i,j = 173,873 nnzA= 1024, nnzB=1024 +tid=0, i,j = 679,891 nnzA= 1024, nnzB=1024 +tid=0, i,j = 453,871 nnzA= 1024, nnzB=1024 +tid=0, i,j = 943,888 nnzA= 1024, nnzB=1024 +tid=0, i,j = 506,870 nnzA= 1024, nnzB=1024 +tid=0, i,j = 37,895 nnzA= 1024, nnzB=1024 +tid=0, i,j = 485,874 nnzA= 1024, nnzB=1024 +tid=0, i,j = 487,887 nnzA= 1024, nnzB=1024 +tid=0, i,j = 687,885 nnzA= 1024, nnzB=1024 +tid=0, i,j = 97,866 nnzA= 1024, nnzB=1024 +tid=0, i,j = 168,875 nnzA= 1024, nnzB=1024 +tid=0, i,j = 110,888 nnzA= 1024, nnzB=1024 +tid=0, i,j = 33,894 nnzA= 1024, nnzB=1024 +tid=0, i,j = 695,895 nnzA= 1024, nnzB=1024 +tid=0, i,j = 303,868 nnzA= 1024, nnzB=1024 +tid=0, i,j = 23,881 nnzA= 1024, nnzB=1024 +tid=0, i,j = 390,896 nnzA= 1024, nnzB=1024 +tid=0, i,j = 364,869 nnzA= 1024, nnzB=1024 +tid=0, i,j = 628,890 nnzA= 1024, nnzB=1024 +tid=0, i,j = 643,884 nnzA= 1024, nnzB=1024 +tid=0, i,j = 892,889 nnzA= 1024, nnzB=1024 +tid=0, i,j = 605,901 nnzA= 1024, nnzB=1024 +tid=0, i,j = 502,915 nnzA= 1024, nnzB=1024 +tid=0, i,j = 366,915 nnzA= 1024, nnzB=1024 +tid=0, i,j = 548,911 nnzA= 1024, nnzB=1024 +tid=0, i,j = 479,906 nnzA= 1024, nnzB=1024 +tid=0, i,j = 823,916 nnzA= 1024, nnzB=1024 +tid=0, i,j = 496,939 nnzA= 1024, nnzB=1024 +tid=0, i,j = 215,938 nnzA= 1024, nnzB=1024 +tid=0, i,j = 480,918 nnzA= 1024, nnzB=1024 +tid=0, i,j = 924,914 nnzA= 1024, nnzB=1024 +tid=0, i,j = 674,907 nnzA= 1024, nnzB=1024 +tid=0, i,j = 479,931 nnzA= 1024, nnzB=1024 +tid=0, i,j = 13,930 nnzA= 1024, nnzB=1024 +tid=0, i,j = 795,905 nnzA= 1024, nnzB=1024 +tid=0, i,j = 165,934 nnzA= 1024, nnzB=1024 +tid=0, i,j = 685,925 nnzA= 1024, nnzB=1024 +tid=0, i,j = 608,920 nnzA= 1024, nnzB=1024 +tid=0, i,j = 456,909 nnzA= 1024, nnzB=1024 +tid=0, i,j = 966,921 nnzA= 1024, nnzB=1024 +tid=0, i,j = 216,923 nnzA= 1024, nnzB=1024 +tid=0, i,j = 808,904 nnzA= 1024, nnzB=1024 +tid=0, i,j = 146,901 nnzA= 1024, nnzB=1024 +tid=0, i,j = 167,910 nnzA= 1024, nnzB=1024 +tid=0, i,j = 860,933 nnzA= 1024, nnzB=1024 +tid=0, i,j = 399,936 nnzA= 1024, nnzB=1024 +tid=0, i,j = 351,935 nnzA= 1024, nnzB=1024 +tid=0, i,j = 420,916 nnzA= 1024, nnzB=1024 +tid=0, i,j = 57,902 nnzA= 1024, nnzB=1024 +tid=0, i,j = 970,919 nnzA= 1024, nnzB=1024 +tid=0, i,j = 1021,903 nnzA= 1024, nnzB=1024 +tid=0, i,j = 755,926 nnzA= 1024, nnzB=1024 +tid=0, i,j = 538,929 nnzA= 1024, nnzB=1024 +tid=0, i,j = 414,940 nnzA= 1024, nnzB=1024 +tid=0, i,j = 221,951 nnzA= 1024, nnzB=1024 +tid=0, i,j = 1017,950 nnzA= 1024, nnzB=1024 +tid=0, i,j = 628,948 nnzA= 1024, nnzB=1024 +tid=0, i,j = 109,944 nnzA= 1024, nnzB=1024 +tid=0, i,j = 653,956 nnzA= 1024, nnzB=1024 +tid=0, i,j = 316,949 nnzA= 1024, nnzB=1024 +tid=0, i,j = 875,971 nnzA= 1024, nnzB=1024 +tid=0, i,j = 823,955 nnzA= 1024, nnzB=1024 +tid=0, i,j = 671,945 nnzA= 1024, nnzB=1024 +tid=0, i,j = 856,970 nnzA= 1024, nnzB=1024 +tid=0, i,j = 538,943 nnzA= 1024, nnzB=1024 +tid=0, i,j = 370,964 nnzA= 1024, nnzB=1024 +tid=0, i,j = 504,965 nnzA= 1024, nnzB=1024 +tid=0, i,j = 644,958 nnzA= 1024, nnzB=1024 +tid=0, i,j = 667,959 nnzA= 1024, nnzB=1024 +tid=0, i,j = 828,961 nnzA= 1024, nnzB=1024 +tid=0, i,j = 1023,967 nnzA= 1024, nnzB=1024 +tid=0, i,j = 748,939 nnzA= 1024, nnzB=1024 +tid=0, i,j = 246,946 nnzA= 1024, nnzB=1024 +tid=0, i,j = 78,960 nnzA= 1024, nnzB=1024 +tid=0, i,j = 31,943 nnzA= 1024, nnzB=1024 +tid=0, i,j = 564,969 nnzA= 1024, nnzB=1024 +tid=0, i,j = 400,968 nnzA= 1024, nnzB=1024 +tid=0, i,j = 182,947 nnzA= 1024, nnzB=1024 +tid=0, i,j = 483,966 nnzA= 1024, nnzB=1024 +tid=0, i,j = 656,957 nnzA= 1024, nnzB=1024 +tid=0, i,j = 457,955 nnzA= 1024, nnzB=1024 +tid=0, i,j = 877,962 nnzA= 1024, nnzB=1024 +tid=0, i,j = 356,942 nnzA= 1024, nnzB=1024 +tid=0, i,j = 586,941 nnzA= 1024, nnzB=1024 +tid=0, i,j = 397,963 nnzA= 1024, nnzB=1024 +tid=0, i,j = 549,972 nnzA= 1024, nnzB=1024 +tid=0, i,j = 72,985 nnzA= 1024, nnzB=1024 +tid=0, i,j = 582,984 nnzA= 1024, nnzB=1024 +tid=0, i,j = 743,985 nnzA= 1024, nnzB=1024 +tid=0, i,j = 340,987 nnzA= 1024, nnzB=1024 +tid=0, i,j = 123,978 nnzA= 1024, nnzB=1024 +tid=0, i,j = 449,984 nnzA= 1024, nnzB=1024 +tid=0, i,j = 120,987 nnzA= 1024, nnzB=1024 +tid=0, i,j = 371,980 nnzA= 1024, nnzB=1024 +tid=0, i,j = 980,976 nnzA= 1024, nnzB=1024 +tid=0, i,j = 58,983 nnzA= 1024, nnzB=1024 +tid=0, i,j = 323,986 nnzA= 1024, nnzB=1024 +tid=0, i,j = 175,981 nnzA= 1024, nnzB=1024 +tid=0, i,j = 630,972 nnzA= 1024, nnzB=1024 +tid=0, i,j = 347,977 nnzA= 1024, nnzB=1024 +tid=0, i,j = 934,974 nnzA= 1024, nnzB=1024 +tid=0, i,j = 170,995 nnzA= 1024, nnzB=1024 +tid=0, i,j = 660,991 nnzA= 1024, nnzB=1024 +tid=0, i,j = 172,988 nnzA= 1024, nnzB=1024 +tid=0, i,j = 67,1020 nnzA= 1024, nnzB=1024 +tid=0, i,j = 514,990 nnzA= 1024, nnzB=1024 +tid=0, i,j = 946,997 nnzA= 1024, nnzB=1024 +tid=0, i,j = 1022,1006 nnzA= 1024, nnzB=1024 +tid=0, i,j = 287,1019 nnzA= 1024, nnzB=1024 +tid=0, i,j = 678,998 nnzA= 1024, nnzB=1024 +tid=0, i,j = 877,1001 nnzA= 1024, nnzB=1024 +tid=0, i,j = 491,1022 nnzA= 1024, nnzB=1024 +tid=0, i,j = 167,999 nnzA= 1024, nnzB=1024 +tid=0, i,j = 585,989 nnzA= 1024, nnzB=1024 +tid=0, i,j = 250,1004 nnzA= 1024, nnzB=1024 +tid=0, i,j = 87,993 nnzA= 1024, nnzB=1024 +tid=0, i,j = 151,1020 nnzA= 1024, nnzB=1024 +tid=0, i,j = 180,1017 nnzA= 1024, nnzB=1024 +tid=0, i,j = 254,1018 nnzA= 1024, nnzB=1024 +tid=0, i,j = 667,1014 nnzA= 1024, nnzB=1024 +tid=0, i,j = 992,994 nnzA= 1024, nnzB=1024 +tid=0, i,j = 531,992 nnzA= 1024, nnzB=1024 +tid=0, i,j = 286,1002 nnzA= 1024, nnzB=1024 +tid=0, i,j = 15,992 nnzA= 1024, nnzB=1024 +tid=0, i,j = 674,993 nnzA= 1024, nnzB=1024 +tid=0, i,j = 127,1015 nnzA= 1024, nnzB=1024 +tid=0, i,j = 810,1021 nnzA= 1024, nnzB=1024 +tid=0, i,j = 533,1012 nnzA= 1024, nnzB=1024 +tid=0, i,j = 457,1016 nnzA= 1024, nnzB=1024 +tid=0, i,j = 159,1008 nnzA= 1024, nnzB=1024 +tid=0, i,j = 613,1015 nnzA= 1024, nnzB=1024 +tid=0, i,j = 574,1013 nnzA= 1024, nnzB=1024 +tid=0, i,j = 574,1009 nnzA= 1024, nnzB=1024 +tid=0, i,j = 840,1023 nnzA= 1024, nnzB=1024 +Printing bucketp +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +Done. +returned from kernel 9.10131ms + + 1024x1024 GraphBLAS int32_t matrix, sparse (jumbled) by row + sparsity control: sparse only + C GPU, 1024 entries, memory: 28.2 KB + + (0,478) 268 + (0,574) 246 + (2,376) 235 + (5,560) 278 + (6,996) 255 + (7,183) 256 + (7,666) 248 + (8,896) 255 + (9,187) 274 + (10,446) 256 + (11,46) 270 + (11,955) 284 + (12,397) 250 + (12,953) 259 + (13,192) 278 + (14,421) 267 + (15,568) 251 + (16,788) 225 + (16,904) 246 + (17,928) 240 + (18,103) 262 + (19,821) 235 + (19,886) 236 + (20,474) 267 + (21,479) 248 + (21,975) 251 + (22,569) 255 + (23,310) 272 + (24,905) 262 + ... +Not using cuda path. M_is_hypersparse: 0, A->iso: 0, B->iso: 0, A_BITMAP: 0, B_BITMAP: 0, GB_IS_FULL(A): 0, GB_IS_FULL(B): 0, semiring header size: 32 + rmm_wrap_alloc 16384 bytes + + 1024x1024 GraphBLAS bool matrix, sparse by row + sparsity control: sparse only + M actual, 1024 entries, memory: 25.2 KB + + (0,478) 1 + (0,574) 1 + (2,376) 1 + (5,560) 1 + (6,996) 1 + (7,183) 1 + (7,666) 1 + (8,896) 1 + (9,187) 1 + (10,446) 1 + (11,46) 1 + (11,955) 1 + (12,397) 1 + (12,953) 1 + (13,192) 1 + (14,421) 1 + (15,568) 1 + (16,788) 1 + (16,904) 1 + (17,928) 1 + (18,103) 1 + (19,821) 1 + (19,886) 1 + (20,474) 1 + (21,479) 1 + (21,975) 1 + (22,569) 1 + (23,310) 1 + (24,905) 1 + ... + + 1024x1024 GraphBLAS int32_t matrix, sparse by row + sparsity control: sparse only + C GPU, 1024 entries, memory: 28.2 KB + + (0,478) 268 + (0,574) 246 + (2,376) 235 + (5,560) 278 + (6,996) 255 + (7,183) 256 + (7,666) 248 + (8,896) 255 + (9,187) 274 + (10,446) 256 + (11,46) 270 + (11,955) 284 + (12,397) 250 + (12,953) 259 + (13,192) 278 + (14,421) 267 + (15,568) 251 + (16,788) 225 + (16,904) 246 + (17,928) 240 + (18,103) 262 + (19,821) 235 + (19,886) 236 + (20,474) 267 + (21,479) 248 + (21,975) 251 + (22,569) 255 + (23,310) 272 + (24,905) 262 + (25,241) 225 + (26,428) 224 + (28,107) 228 + (28,441) 274 + (30,694) 269 + (32,121) 239 + (33,81) 249 + (34,804) 269 + (36,451) 264 + (37,609) 263 + (38,138) 258 + (39,698) 263 + (40,950) 236 + (41,568) 236 + (42,324) 238 + (43,798) 244 + (46,208) 240 + (47,70) 264 + (48,336) 277 + (49,476) 254 + (50,35) 242 + (51,556) 265 + (52,999) 247 + (53,940) 264 + (54,558) 257 + (54,960) 259 + (55,979) 251 + (56,90) 305 + (57,846) 275 + (57,893) 272 + (58,35) 260 + (59,108) 255 + (60,479) 255 + (61,590) 264 + (62,771) 259 + (63,50) 267 + (64,268) 276 + (65,694) 249 + (66,719) 261 + (67,411) 239 + (68,324) 246 + (69,477) 254 + (70,539) 241 + (71,228) 235 + (72,297) 242 + (73,665) 269 + (75,855) 227 + (76,248) 235 + (77,433) 251 + (78,90) 275 + (81,754) 270 + (82,243) 286 + (84,253) 267 + (86,104) 247 + (87,657) 255 + (89,825) 251 + (90,37) 248 + (91,234) 259 + (91,519) 276 + (92,74) 259 + (92,218) 266 + (92,690) 256 + (93,486) 268 + (94,637) 277 + (94,722) 261 + (96,564) 282 + (97,748) 245 + (99,326) 249 + (100,281) 248 + (102,609) 258 + (103,621) 277 + (104,644) 226 + (106,652) 244 + (107,239) 273 + (107,522) 234 + (108,131) 274 + (109,884) 253 + (110,402) 251 + (111,905) 256 + (112,127) 241 + (112,779) 239 + (113,278) 251 + (114,519) 264 + (115,240) 262 + (116,198) 258 + (117,219) 230 + (117,338) 251 + (118,99) 260 + (120,477) 266 + (121,554) 271 + (121,715) 291 + (122,151) 253 + (123,621) 252 + (125,177) 236 + (126,36) 275 + (128,820) 263 + (128,835) 248 + (129,660) 255 + (130,623) 246 + (130,807) 273 + (131,253) 271 + (131,355) 260 + (132,570) 264 + (133,492) 278 + (134,821) 268 + (135,295) 266 + (136,108) 263 + (137,834) 271 + (138,288) 253 + (139,284) 249 + (139,945) 286 + (140,887) 265 + (141,199) 274 + (142,87) 235 + (142,225) 261 + (143,123) 258 + (144,574) 262 + (145,552) 250 + (146,194) 244 + (146,995) 255 + (148,357) 253 + (149,949) 253 + (150,717) 255 + (151,484) 272 + (156,290) 250 + (157,714) 302 + (157,974) 274 + (158,959) 228 + (160,297) 252 + (162,601) 264 + (163,816) 271 + (164,221) 254 + (165,396) 243 + (166,801) 242 + (167,879) 234 + (168,321) 273 + (169,901) 286 + (170,612) 282 + (171,15) 253 + (172,951) 261 + (174,0) 258 + (174,595) 259 + (175,669) 254 + (176,108) 261 + (176,188) 279 + (176,614) 269 + (176,781) 255 + (177,17) 261 + (178,631) 265 + (179,932) 225 + (180,830) 258 + (182,675) 259 + (182,1001) 257 + (183,692) 240 + (184,143) 247 + (185,450) 240 + (186,779) 270 + (187,997) 256 + (188,357) 265 + (189,111) 250 + (190,990) 262 + (192,644) 269 + (192,953) 250 + (193,135) 246 + (194,137) 267 + (195,922) 276 + (197,859) 269 + (198,910) 239 + (199,531) 270 + (201,907) 253 + (202,863) 255 + (203,865) 232 + (204,614) 268 + (207,826) 239 + (208,985) 262 + (209,808) 256 + (210,659) 250 + (211,71) 236 + (211,931) 266 + (212,426) 291 + (213,152) 255 + (214,928) 264 + (215,268) 270 + (216,550) 268 + (217,921) 252 + (218,704) 246 + (218,922) 265 + (219,66) 232 + (220,704) 235 + (221,56) 280 + (221,551) 273 + (222,545) 243 + (223,1016) 249 + (224,721) 261 + (225,935) 270 + (226,727) 254 + (228,743) 240 + (229,535) 242 + (230,382) 245 + (231,551) 260 + (232,897) 273 + (233,570) 235 + (234,520) 246 + (235,522) 261 + (236,221) 244 + (237,755) 271 + (238,964) 243 + (239,82) 243 + (240,388) 238 + (241,500) 276 + (242,124) 240 + (242,193) 243 + (242,621) 243 + (243,300) 254 + (244,588) 256 + (244,1004) 265 + (245,494) 253 + (246,326) 262 + (247,115) 263 + (247,147) 263 + (248,233) 224 + (250,485) 259 + (251,708) 262 + (252,197) 237 + (253,485) 256 + (254,40) 243 + (254,238) 261 + (255,895) 243 + (256,114) 268 + (257,461) 250 + (257,796) 237 + (258,233) 236 + (260,884) 257 + (261,945) 279 + (262,368) 260 + (264,755) 251 + (265,124) 253 + (266,352) 255 + (267,10) 238 + (268,234) 248 + (269,400) 248 + (270,877) 259 + (270,924) 231 + (271,944) 245 + (272,67) 253 + (273,100) 273 + (274,979) 284 + (276,333) 258 + (277,377) 245 + (279,877) 252 + (280,18) 242 + (281,449) 240 + (282,179) 259 + (283,1007) 244 + (284,595) 271 + (285,32) 231 + (286,37) 245 + (287,126) 299 + (287,394) 257 + (288,848) 267 + (290,317) 257 + (291,594) 264 + (292,562) 257 + (294,466) 265 + (294,960) 262 + (295,1) 245 + (295,106) 252 + (296,109) 245 + (296,183) 243 + (296,245) 238 + (297,912) 281 + (297,1006) 269 + (299,159) 271 + (300,554) 260 + (301,774) 240 + (302,30) 273 + (303,645) 243 + (304,229) 263 + (305,622) 282 + (307,264) 267 + (308,28) 241 + (309,328) 249 + (309,627) 280 + (310,357) 234 + (311,355) 243 + (312,61) 239 + (313,758) 265 + (314,571) 268 + (315,177) 236 + (315,298) 244 + (315,741) 236 + (316,177) 226 + (316,308) 279 + (317,323) 245 + (318,595) 288 + (319,126) 281 + (320,468) 260 + (321,73) 267 + (322,235) 246 + (323,375) 233 + (323,651) 255 + (324,549) 239 + (325,306) 246 + (325,487) 279 + (326,649) 272 + (327,704) 246 + (328,142) 271 + (329,176) 257 + (330,848) 249 + (330,965) 244 + (332,795) 265 + (334,695) 275 + (335,694) 236 + (336,775) 251 + (336,808) 231 + (337,608) 236 + (338,993) 243 + (339,680) 277 + (340,849) 251 + (341,36) 273 + (342,723) 252 + (343,678) 235 + (344,384) 255 + (344,680) 248 + (345,75) 252 + (347,996) 264 + (348,60) 280 + (348,821) 297 + (349,804) 265 + (350,282) 254 + (351,142) 272 + (351,937) 275 + (352,160) 256 + (353,536) 260 + (355,352) 264 + (356,340) 243 + (358,678) 257 + (360,679) 276 + (361,794) 255 + (361,989) 264 + (362,816) 295 + (363,206) 250 + (364,629) 267 + (365,990) 269 + (366,841) 262 + (366,971) 261 + (367,888) 315 + (368,587) 245 + (369,684) 261 + (370,270) 253 + (371,327) 257 + (372,471) 258 + (373,88) 246 + (374,669) 242 + (375,992) 241 + (376,336) 259 + (377,86) 292 + (378,882) 270 + (379,592) 264 + (380,77) 258 + (380,643) 240 + (381,1012) 255 + (382,816) 253 + (383,711) 240 + (385,670) 249 + (386,537) 255 + (387,347) 240 + (388,494) 268 + (389,328) 235 + (389,733) 237 + (390,551) 269 + (391,59) 254 + (391,600) 270 + (394,692) 247 + (396,645) 233 + (397,835) 259 + (398,107) 261 + (398,246) 264 + (399,436) 267 + (400,172) 260 + (400,382) 240 + (401,790) 245 + (402,320) 258 + (403,40) 257 + (404,641) 250 + (405,49) 269 + (405,475) 257 + (407,320) 277 + (408,61) 253 + (410,754) 239 + (411,643) 269 + (412,949) 260 + (413,94) 254 + (414,991) 257 + (415,26) 244 + (416,575) 254 + (417,366) 232 + (418,160) 258 + (418,669) 266 + (419,209) 252 + (419,285) 266 + (420,748) 277 + (421,614) 258 + (422,177) 217 + (423,873) 251 + (424,542) 258 + (425,263) 247 + (426,377) 261 + (427,149) 236 + (428,950) 246 + (429,305) 277 + (430,718) 237 + (431,51) 246 + (432,857) 246 + (434,604) 248 + (435,152) 248 + (436,356) 286 + (437,105) 235 + (438,814) 254 + (440,338) 251 + (441,982) 259 + (442,880) 244 + (443,753) 273 + (444,669) 240 + (445,952) 236 + (446,741) 264 + (447,970) 247 + (448,646) 244 + (448,744) 237 + (449,835) 286 + (450,579) 241 + (451,147) 258 + (451,1017) 257 + (452,868) 247 + (453,26) 262 + (454,415) 236 + (454,668) 249 + (455,43) 247 + (456,849) 270 + (456,985) 251 + (457,218) 266 + (458,510) 282 + (459,737) 250 + (460,836) 269 + (461,849) 263 + (461,917) 270 + (462,900) 262 + (463,316) 256 + (464,762) 250 + (465,355) 262 + (465,801) 254 + (466,673) 247 + (467,112) 260 + (468,288) 261 + (470,889) 248 + (471,650) 269 + (473,121) 239 + (473,127) 251 + (474,487) 265 + (475,382) 218 + (476,44) 258 + (477,342) 257 + (478,786) 267 + (480,667) 244 + (481,558) 252 + (482,680) 224 + (483,517) 270 + (484,961) 276 + (485,274) 249 + (486,1015) 262 + (487,194) 241 + (489,802) 252 + (490,811) 260 + (491,319) 254 + (492,377) 242 + (494,432) 207 + (495,809) 292 + (496,267) 255 + (496,902) 247 + (498,194) 244 + (499,952) 273 + (500,84) 259 + (501,704) 233 + (503,519) 278 + (504,510) 264 + (504,887) 262 + (505,574) 285 + (507,643) 259 + (508,449) 241 + (512,892) 253 + (513,271) 242 + (514,404) 276 + (515,758) 263 + (517,369) 271 + (518,293) 245 + (519,786) 261 + (520,270) 256 + (521,1013) 259 + (522,284) 262 + (523,632) 265 + (524,945) 273 + (525,94) 249 + (525,362) 257 + (526,52) 282 + (527,61) 242 + (528,294) 274 + (529,145) 248 + (529,998) 261 + (530,112) 253 + (531,908) 249 + (533,674) 252 + (534,505) 227 + (535,660) 261 + (535,776) 265 + (536,500) 274 + (537,799) 258 + (538,492) 241 + (538,861) 258 + (540,245) 272 + (542,137) 268 + (545,658) 246 + (546,213) 272 + (547,767) 255 + (547,912) 279 + (547,1018) 252 + (548,46) 261 + (548,697) 265 + (549,602) 257 + (550,927) 277 + (552,710) 271 + (553,391) 244 + (554,351) 227 + (555,10) 235 + (556,26) 238 + (557,910) 255 + (558,552) 261 + (560,792) 265 + (561,597) 257 + (562,182) 264 + (562,862) 261 + (563,877) 276 + (564,310) 259 + (564,609) 251 + (565,490) 251 + (566,564) 263 + (566,607) 251 + (569,872) 279 + (570,465) 263 + (571,271) 271 + (571,919) 243 + (572,630) 237 + (574,603) 272 + (576,256) 284 + (579,274) 236 + (580,182) 252 + (581,445) 251 + (582,177) 196 + (583,118) 280 + (584,399) 250 + (585,433) 244 + (587,254) 237 + (588,914) 254 + (589,1016) 269 + (590,95) 277 + (590,802) 279 + (591,978) 265 + (592,527) 245 + (593,143) 276 + (594,430) 232 + (595,787) 261 + (596,677) 247 + (598,788) 250 + (599,127) 228 + (600,339) 249 + (601,478) 271 + (602,218) 271 + (603,759) 242 + (604,270) 247 + (605,76) 243 + (606,930) 257 + (608,832) 267 + (609,287) 265 + (610,794) 256 + (611,759) 247 + (612,1006) 282 + (613,398) 239 + (614,386) 259 + (615,115) 264 + (616,928) 254 + (617,30) 260 + (618,361) 243 + (619,996) 222 + (620,5) 248 + (620,337) 256 + (621,41) 251 + (623,44) 267 + (623,79) 252 + (623,966) 263 + (624,19) 270 + (624,242) 258 + (624,524) 244 + (625,683) 288 + (626,51) 242 + (627,361) 257 + (628,396) 248 + (629,882) 260 + (630,341) 237 + (631,49) 238 + (631,585) 234 + (632,73) 268 + (634,912) 278 + (635,882) 266 + (636,617) 252 + (637,285) 251 + (637,716) 275 + (638,113) 274 + (638,367) 254 + (639,616) 258 + (640,837) 234 + (641,457) 251 + (643,934) 265 + (647,783) 240 + (648,195) 270 + (649,614) 239 + (650,957) 265 + (651,281) 252 + (652,973) 267 + (653,60) 249 + (653,333) 268 + (654,605) 272 + (655,910) 234 + (656,349) 255 + (659,17) 250 + (660,591) 275 + (661,512) 277 + (663,767) 258 + (664,158) 224 + (665,77) 239 + (666,503) 248 + (667,951) 261 + (668,365) 278 + (669,300) 273 + (671,141) 272 + (671,565) 285 + (672,819) 223 + (674,36) 249 + (674,819) 249 + (675,454) 234 + (676,242) 263 + (677,289) 278 + (677,647) 255 + (678,802) 240 + (679,899) 242 + (680,398) 266 + (681,390) 266 + (681,699) 233 + (682,117) 246 + (683,110) 265 + (684,907) 243 + (685,17) 239 + (686,202) 255 + (687,45) 222 + (688,287) 242 + (689,502) 257 + (690,299) 252 + (691,392) 256 + (692,600) 264 + (694,378) 243 + (695,702) 271 + (696,102) 251 + (698,631) 252 + (699,152) 272 + (700,840) 267 + (701,323) 239 + (702,777) 232 + (703,132) 264 + (704,374) 261 + (705,579) 254 + (706,511) 233 + (707,76) 261 + (708,259) 269 + (708,925) 266 + (709,872) 269 + (709,873) 265 + (710,107) 235 + (710,293) 266 + (711,210) 257 + (711,462) 267 + (714,475) 245 + (715,172) 253 + (715,751) 241 + (716,697) 249 + (717,234) 239 + (717,620) 244 + (718,848) 260 + (719,331) 265 + (720,201) 255 + (720,725) 272 + (721,806) 262 + (722,415) 239 + (722,934) 262 + (723,675) 249 + (724,480) 259 + (726,337) 259 + (727,177) 237 + (728,797) 272 + (729,884) 241 + (730,767) 249 + (731,275) 275 + (732,910) 231 + (733,763) 283 + (734,574) 263 + (735,268) 253 + (736,115) 218 + (737,146) 238 + (737,912) 249 + (738,1023) 252 + (739,335) 259 + (740,596) 233 + (741,365) 270 + (741,630) 256 + (742,485) 250 + (743,186) 252 + (745,621) 250 + (745,645) 246 + (746,273) 276 + (747,91) 256 + (748,886) 245 + (749,59) 273 + (749,755) 254 + (751,348) 253 + (752,313) 255 + (752,742) 277 + (752,745) 260 + (753,472) 260 + (753,592) 249 + (754,1007) 234 + (756,633) 255 + (758,847) 268 + (759,500) 253 + (760,340) 251 + (760,381) 270 + (762,962) 270 + (763,954) 236 + (764,392) 236 + (764,913) 258 + (766,915) 265 + (766,936) 259 + (767,372) 266 + (768,307) 266 + (770,458) 265 + (771,103) 241 + (771,487) 264 + (773,56) 248 + (774,773) 259 + (775,115) 266 + (776,537) 254 + (777,392) 258 + (778,893) 287 + (779,644) 270 + (780,256) 263 + (781,899) 261 + (782,399) 251 + (782,892) 277 + (783,614) 237 + (784,54) 231 + (785,816) 261 + (786,462) 248 + (787,876) 262 + (788,273) 276 + (789,696) 260 + (790,471) 251 + (791,793) 261 + (792,636) 264 + (792,955) 263 + (793,809) 269 + (794,986) 249 + (795,656) 253 + (796,347) 246 + (797,880) 264 + (798,802) 256 + (799,294) 267 + (800,970) 231 + (801,130) 244 + (803,896) 256 + (804,1022) 257 + (805,32) 232 + (805,479) 257 + (806,889) 245 + (807,504) 251 + (809,719) 272 + (809,737) 270 + (810,646) 241 + (811,547) 238 + (812,375) 262 + (813,200) 257 + (815,408) 252 + (816,902) 256 + (817,430) 241 + (818,985) 256 + (819,688) 254 + (821,839) 257 + (822,747) 262 + (823,39) 259 + (824,886) 241 + (825,406) 247 + (826,814) 242 + (827,625) 266 + (828,407) 260 + (829,511) 254 + (830,915) 263 + (831,982) 266 + (832,1003) 246 + (833,362) 259 + (833,999) 258 + (834,136) 263 + (834,295) 267 + (835,115) 281 + (836,218) 272 + (837,565) 285 + (839,541) 280 + (839,711) 273 + (840,159) 251 + (841,103) 240 + (841,636) 271 + (842,136) 257 + (843,524) 254 + (844,114) 260 + (845,694) 268 + (846,533) 274 + (847,741) 243 + (848,483) 269 + (849,464) 257 + (850,302) 245 + (851,567) 248 + (852,150) 262 + (852,529) 258 + (853,623) 234 + (855,106) 265 + (856,1014) 261 + (857,151) 270 + (857,650) 280 + (858,781) 242 + (858,994) 242 + (859,508) 255 + (859,716) 284 + (862,636) 241 + (863,21) 242 + (864,1022) 242 + (865,972) 264 + (866,97) 243 + (867,48) 235 + (868,303) 249 + (869,364) 255 + (870,506) 241 + (871,453) 255 + (872,775) 259 + (873,173) 269 + (874,485) 249 + (875,168) 249 + (876,357) 243 + (877,722) 255 + (877,990) 267 + (880,176) 291 + (881,23) 268 + (882,608) 248 + (883,929) 251 + (884,643) 247 + (885,687) 259 + (887,487) 257 + (888,110) 266 + (888,943) 264 + (889,892) 267 + (890,628) 261 + (891,679) 258 + (892,653) 254 + (894,33) 258 + (895,37) 266 + (895,695) 269 + (896,390) 269 + (897,42) 265 + (900,687) 281 + (901,146) 241 + (901,605) 261 + (902,57) 230 + (903,1021) 250 + (904,808) 237 + (905,795) 271 + (906,479) 257 + (907,674) 277 + (909,456) 250 + (910,167) 265 + (911,548) 248 + (914,924) 250 + (915,366) 253 + (915,502) 238 + (916,420) 273 + (916,823) 247 + (918,480) 248 + (919,970) 259 + (920,608) 246 + (921,966) 230 + (923,216) 247 + (925,685) 275 + (926,755) 274 + (929,538) 268 + (930,13) 259 + (931,479) 250 + (933,860) 261 + (934,165) 250 + (935,351) 233 + (936,399) 244 + (938,215) 264 + (939,496) 276 + (939,748) 262 + (940,414) 242 + (941,586) 265 + (942,356) 274 + (943,31) 263 + (943,538) 262 + (944,109) 249 + (945,671) 258 + (946,246) 255 + (947,182) 262 + (948,628) 262 + (949,316) 238 + (950,1017) 259 + (951,221) 250 + (955,457) 237 + (955,823) 241 + (956,653) 258 + (957,656) 255 + (958,644) 238 + (959,667) 246 + (960,78) 247 + (961,828) 252 + (962,877) 269 + (963,397) 284 + (964,370) 262 + (965,504) 244 + (966,483) 246 + (967,1023) 246 + (968,400) 233 + (969,564) 254 + (970,856) 257 + (971,875) 243 + (972,549) 259 + (972,630) 240 + (974,934) 281 + (976,980) 247 + (977,347) 230 + (978,123) 258 + (980,371) 245 + (981,175) 258 + (983,58) 252 + (984,449) 248 + (984,582) 246 + (985,72) 253 + (985,743) 237 + (986,323) 248 + (987,120) 241 + (987,340) 266 + (988,172) 251 + (989,585) 241 + (990,514) 271 + (991,660) 256 + (992,15) 283 + (992,531) 277 + (993,87) 267 + (993,674) 252 + (994,992) 244 + (995,170) 269 + (997,946) 270 + (998,678) 251 + (999,167) 258 + (1001,877) 250 + (1002,286) 242 + (1004,250) 259 + (1006,1022) 248 + (1008,159) 264 + (1009,574) 258 + (1012,533) 270 + (1013,574) 273 + (1014,667) 247 + (1015,127) 244 + (1015,613) 245 + (1016,457) 246 + (1017,180) 267 + (1018,254) 237 + (1019,287) 248 + (1020,67) 261 + (1020,151) 248 + (1021,810) 239 + (1022,491) 268 + (1023,840) 264 + + + 1024x1024 GraphBLAS int32_t matrix, sparse by row + C_actual, 1024 entries, memory: 40.2 KB + + (0,478) 268 + (0,574) 246 + (2,376) 235 + (5,560) 278 + (6,996) 255 + (7,183) 256 + (7,666) 248 + (8,896) 255 + (9,187) 274 + (10,446) 256 + (11,46) 270 + (11,955) 284 + (12,397) 250 + (12,953) 259 + (13,192) 278 + (14,421) 267 + (15,568) 251 + (16,788) 225 + (16,904) 246 + (17,928) 240 + (18,103) 262 + (19,821) 235 + (19,886) 236 + (20,474) 267 + (21,479) 248 + (21,975) 251 + (22,569) 255 + (23,310) 272 + (24,905) 262 + (25,241) 225 + (26,428) 224 + (28,107) 228 + (28,441) 274 + (30,694) 269 + (32,121) 239 + (33,81) 249 + (34,804) 269 + (36,451) 264 + (37,609) 263 + (38,138) 258 + (39,698) 263 + (40,950) 236 + (41,568) 236 + (42,324) 238 + (43,798) 244 + (46,208) 240 + (47,70) 264 + (48,336) 277 + (49,476) 254 + (50,35) 242 + (51,556) 265 + (52,999) 247 + (53,940) 264 + (54,558) 257 + (54,960) 259 + (55,979) 251 + (56,90) 305 + (57,846) 275 + (57,893) 272 + (58,35) 260 + (59,108) 255 + (60,479) 255 + (61,590) 264 + (62,771) 259 + (63,50) 267 + (64,268) 276 + (65,694) 249 + (66,719) 261 + (67,411) 239 + (68,324) 246 + (69,477) 254 + (70,539) 241 + (71,228) 235 + (72,297) 242 + (73,665) 269 + (75,855) 227 + (76,248) 235 + (77,433) 251 + (78,90) 275 + (81,754) 270 + (82,243) 286 + (84,253) 267 + (86,104) 247 + (87,657) 255 + (89,825) 251 + (90,37) 248 + (91,234) 259 + (91,519) 276 + (92,74) 259 + (92,218) 266 + (92,690) 256 + (93,486) 268 + (94,637) 277 + (94,722) 261 + (96,564) 282 + (97,748) 245 + (99,326) 249 + (100,281) 248 + (102,609) 258 + (103,621) 277 + (104,644) 226 + (106,652) 244 + (107,239) 273 + (107,522) 234 + (108,131) 274 + (109,884) 253 + (110,402) 251 + (111,905) 256 + (112,127) 241 + (112,779) 239 + (113,278) 251 + (114,519) 264 + (115,240) 262 + (116,198) 258 + (117,219) 230 + (117,338) 251 + (118,99) 260 + (120,477) 266 + (121,554) 271 + (121,715) 291 + (122,151) 253 + (123,621) 252 + (125,177) 236 + (126,36) 275 + (128,820) 263 + (128,835) 248 + (129,660) 255 + (130,623) 246 + (130,807) 273 + (131,253) 271 + (131,355) 260 + (132,570) 264 + (133,492) 278 + (134,821) 268 + (135,295) 266 + (136,108) 263 + (137,834) 271 + (138,288) 253 + (139,284) 249 + (139,945) 286 + (140,887) 265 + (141,199) 274 + (142,87) 235 + (142,225) 261 + (143,123) 258 + (144,574) 262 + (145,552) 250 + (146,194) 244 + (146,995) 255 + (148,357) 253 + (149,949) 253 + (150,717) 255 + (151,484) 272 + (156,290) 250 + (157,714) 302 + (157,974) 274 + (158,959) 228 + (160,297) 252 + (162,601) 264 + (163,816) 271 + (164,221) 254 + (165,396) 243 + (166,801) 242 + (167,879) 234 + (168,321) 273 + (169,901) 286 + (170,612) 282 + (171,15) 253 + (172,951) 261 + (174,0) 258 + (174,595) 259 + (175,669) 254 + (176,108) 261 + (176,188) 279 + (176,614) 269 + (176,781) 255 + (177,17) 261 + (178,631) 265 + (179,932) 225 + (180,830) 258 + (182,675) 259 + (182,1001) 257 + (183,692) 240 + (184,143) 247 + (185,450) 240 + (186,779) 270 + (187,997) 256 + (188,357) 265 + (189,111) 250 + (190,990) 262 + (192,644) 269 + (192,953) 250 + (193,135) 246 + (194,137) 267 + (195,922) 276 + (197,859) 269 + (198,910) 239 + (199,531) 270 + (201,907) 253 + (202,863) 255 + (203,865) 232 + (204,614) 268 + (207,826) 239 + (208,985) 262 + (209,808) 256 + (210,659) 250 + (211,71) 236 + (211,931) 266 + (212,426) 291 + (213,152) 255 + (214,928) 264 + (215,268) 270 + (216,550) 268 + (217,921) 252 + (218,704) 246 + (218,922) 265 + (219,66) 232 + (220,704) 235 + (221,56) 280 + (221,551) 273 + (222,545) 243 + (223,1016) 249 + (224,721) 261 + (225,935) 270 + (226,727) 254 + (228,743) 240 + (229,535) 242 + (230,382) 245 + (231,551) 260 + (232,897) 273 + (233,570) 235 + (234,520) 246 + (235,522) 261 + (236,221) 244 + (237,755) 271 + (238,964) 243 + (239,82) 243 + (240,388) 238 + (241,500) 276 + (242,124) 240 + (242,193) 243 + (242,621) 243 + (243,300) 254 + (244,588) 256 + (244,1004) 265 + (245,494) 253 + (246,326) 262 + (247,115) 263 + (247,147) 263 + (248,233) 224 + (250,485) 259 + (251,708) 262 + (252,197) 237 + (253,485) 256 + (254,40) 243 + (254,238) 261 + (255,895) 243 + (256,114) 268 + (257,461) 250 + (257,796) 237 + (258,233) 236 + (260,884) 257 + (261,945) 279 + (262,368) 260 + (264,755) 251 + (265,124) 253 + (266,352) 255 + (267,10) 238 + (268,234) 248 + (269,400) 248 + (270,877) 259 + (270,924) 231 + (271,944) 245 + (272,67) 253 + (273,100) 273 + (274,979) 284 + (276,333) 258 + (277,377) 245 + (279,877) 252 + (280,18) 242 + (281,449) 240 + (282,179) 259 + (283,1007) 244 + (284,595) 271 + (285,32) 231 + (286,37) 245 + (287,126) 299 + (287,394) 257 + (288,848) 267 + (290,317) 257 + (291,594) 264 + (292,562) 257 + (294,466) 265 + (294,960) 262 + (295,1) 245 + (295,106) 252 + (296,109) 245 + (296,183) 243 + (296,245) 238 + (297,912) 281 + (297,1006) 269 + (299,159) 271 + (300,554) 260 + (301,774) 240 + (302,30) 273 + (303,645) 243 + (304,229) 263 + (305,622) 282 + (307,264) 267 + (308,28) 241 + (309,328) 249 + (309,627) 280 + (310,357) 234 + (311,355) 243 + (312,61) 239 + (313,758) 265 + (314,571) 268 + (315,177) 236 + (315,298) 244 + (315,741) 236 + (316,177) 226 + (316,308) 279 + (317,323) 245 + (318,595) 288 + (319,126) 281 + (320,468) 260 + (321,73) 267 + (322,235) 246 + (323,375) 233 + (323,651) 255 + (324,549) 239 + (325,306) 246 + (325,487) 279 + (326,649) 272 + (327,704) 246 + (328,142) 271 + (329,176) 257 + (330,848) 249 + (330,965) 244 + (332,795) 265 + (334,695) 275 + (335,694) 236 + (336,775) 251 + (336,808) 231 + (337,608) 236 + (338,993) 243 + (339,680) 277 + (340,849) 251 + (341,36) 273 + (342,723) 252 + (343,678) 235 + (344,384) 255 + (344,680) 248 + (345,75) 252 + (347,996) 264 + (348,60) 280 + (348,821) 297 + (349,804) 265 + (350,282) 254 + (351,142) 272 + (351,937) 275 + (352,160) 256 + (353,536) 260 + (355,352) 264 + (356,340) 243 + (358,678) 257 + (360,679) 276 + (361,794) 255 + (361,989) 264 + (362,816) 295 + (363,206) 250 + (364,629) 267 + (365,990) 269 + (366,841) 262 + (366,971) 261 + (367,888) 315 + (368,587) 245 + (369,684) 261 + (370,270) 253 + (371,327) 257 + (372,471) 258 + (373,88) 246 + (374,669) 242 + (375,992) 241 + (376,336) 259 + (377,86) 292 + (378,882) 270 + (379,592) 264 + (380,77) 258 + (380,643) 240 + (381,1012) 255 + (382,816) 253 + (383,711) 240 + (385,670) 249 + (386,537) 255 + (387,347) 240 + (388,494) 268 + (389,328) 235 + (389,733) 237 + (390,551) 269 + (391,59) 254 + (391,600) 270 + (394,692) 247 + (396,645) 233 + (397,835) 259 + (398,107) 261 + (398,246) 264 + (399,436) 267 + (400,172) 260 + (400,382) 240 + (401,790) 245 + (402,320) 258 + (403,40) 257 + (404,641) 250 + (405,49) 269 + (405,475) 257 + (407,320) 277 + (408,61) 253 + (410,754) 239 + (411,643) 269 + (412,949) 260 + (413,94) 254 + (414,991) 257 + (415,26) 244 + (416,575) 254 + (417,366) 232 + (418,160) 258 + (418,669) 266 + (419,209) 252 + (419,285) 266 + (420,748) 277 + (421,614) 258 + (422,177) 217 + (423,873) 251 + (424,542) 258 + (425,263) 247 + (426,377) 261 + (427,149) 236 + (428,950) 246 + (429,305) 277 + (430,718) 237 + (431,51) 246 + (432,857) 246 + (434,604) 248 + (435,152) 248 + (436,356) 286 + (437,105) 235 + (438,814) 254 + (440,338) 251 + (441,982) 259 + (442,880) 244 + (443,753) 273 + (444,669) 240 + (445,952) 236 + (446,741) 264 + (447,970) 247 + (448,646) 244 + (448,744) 237 + (449,835) 286 + (450,579) 241 + (451,147) 258 + (451,1017) 257 + (452,868) 247 + (453,26) 262 + (454,415) 236 + (454,668) 249 + (455,43) 247 + (456,849) 270 + (456,985) 251 + (457,218) 266 + (458,510) 282 + (459,737) 250 + (460,836) 269 + (461,849) 263 + (461,917) 270 + (462,900) 262 + (463,316) 256 + (464,762) 250 + (465,355) 262 + (465,801) 254 + (466,673) 247 + (467,112) 260 + (468,288) 261 + (470,889) 248 + (471,650) 269 + (473,121) 239 + (473,127) 251 + (474,487) 265 + (475,382) 218 + (476,44) 258 + (477,342) 257 + (478,786) 267 + (480,667) 244 + (481,558) 252 + (482,680) 224 + (483,517) 270 + (484,961) 276 + (485,274) 249 + (486,1015) 262 + (487,194) 241 + (489,802) 252 + (490,811) 260 + (491,319) 254 + (492,377) 242 + (494,432) 207 + (495,809) 292 + (496,267) 255 + (496,902) 247 + (498,194) 244 + (499,952) 273 + (500,84) 259 + (501,704) 233 + (503,519) 278 + (504,510) 264 + (504,887) 262 + (505,574) 285 + (507,643) 259 + (508,449) 241 + (512,892) 253 + (513,271) 242 + (514,404) 276 + (515,758) 263 + (517,369) 271 + (518,293) 245 + (519,786) 261 + (520,270) 256 + (521,1013) 259 + (522,284) 262 + (523,632) 265 + (524,945) 273 + (525,94) 249 + (525,362) 257 + (526,52) 282 + (527,61) 242 + (528,294) 274 + (529,145) 248 + (529,998) 261 + (530,112) 253 + (531,908) 249 + (533,674) 252 + (534,505) 227 + (535,660) 261 + (535,776) 265 + (536,500) 274 + (537,799) 258 + (538,492) 241 + (538,861) 258 + (540,245) 272 + (542,137) 268 + (545,658) 246 + (546,213) 272 + (547,767) 255 + (547,912) 279 + (547,1018) 252 + (548,46) 261 + (548,697) 265 + (549,602) 257 + (550,927) 277 + (552,710) 271 + (553,391) 244 + (554,351) 227 + (555,10) 235 + (556,26) 238 + (557,910) 255 + (558,552) 261 + (560,792) 265 + (561,597) 257 + (562,182) 264 + (562,862) 261 + (563,877) 276 + (564,310) 259 + (564,609) 251 + (565,490) 251 + (566,564) 263 + (566,607) 251 + (569,872) 279 + (570,465) 263 + (571,271) 271 + (571,919) 243 + (572,630) 237 + (574,603) 272 + (576,256) 284 + (579,274) 236 + (580,182) 252 + (581,445) 251 + (582,177) 196 + (583,118) 280 + (584,399) 250 + (585,433) 244 + (587,254) 237 + (588,914) 254 + (589,1016) 269 + (590,95) 277 + (590,802) 279 + (591,978) 265 + (592,527) 245 + (593,143) 276 + (594,430) 232 + (595,787) 261 + (596,677) 247 + (598,788) 250 + (599,127) 228 + (600,339) 249 + (601,478) 271 + (602,218) 271 + (603,759) 242 + (604,270) 247 + (605,76) 243 + (606,930) 257 + (608,832) 267 + (609,287) 265 + (610,794) 256 + (611,759) 247 + (612,1006) 282 + (613,398) 239 + (614,386) 259 + (615,115) 264 + (616,928) 254 + (617,30) 260 + (618,361) 243 + (619,996) 222 + (620,5) 248 + (620,337) 256 + (621,41) 251 + (623,44) 267 + (623,79) 252 + (623,966) 263 + (624,19) 270 + (624,242) 258 + (624,524) 244 + (625,683) 288 + (626,51) 242 + (627,361) 257 + (628,396) 248 + (629,882) 260 + (630,341) 237 + (631,49) 238 + (631,585) 234 + (632,73) 268 + (634,912) 278 + (635,882) 266 + (636,617) 252 + (637,285) 251 + (637,716) 275 + (638,113) 274 + (638,367) 254 + (639,616) 258 + (640,837) 234 + (641,457) 251 + (643,934) 265 + (647,783) 240 + (648,195) 270 + (649,614) 239 + (650,957) 265 + (651,281) 252 + (652,973) 267 + (653,60) 249 + (653,333) 268 + (654,605) 272 + (655,910) 234 + (656,349) 255 + (659,17) 250 + (660,591) 275 + (661,512) 277 + (663,767) 258 + (664,158) 224 + (665,77) 239 + (666,503) 248 + (667,951) 261 + (668,365) 278 + (669,300) 273 + (671,141) 272 + (671,565) 285 + (672,819) 223 + (674,36) 249 + (674,819) 249 + (675,454) 234 + (676,242) 263 + (677,289) 278 + (677,647) 255 + (678,802) 240 + (679,899) 242 + (680,398) 266 + (681,390) 266 + (681,699) 233 + (682,117) 246 + (683,110) 265 + (684,907) 243 + (685,17) 239 + (686,202) 255 + (687,45) 222 + (688,287) 242 + (689,502) 257 + (690,299) 252 + (691,392) 256 + (692,600) 264 + (694,378) 243 + (695,702) 271 + (696,102) 251 + (698,631) 252 + (699,152) 272 + (700,840) 267 + (701,323) 239 + (702,777) 232 + (703,132) 264 + (704,374) 261 + (705,579) 254 + (706,511) 233 + (707,76) 261 + (708,259) 269 + (708,925) 266 + (709,872) 269 + (709,873) 265 + (710,107) 235 + (710,293) 266 + (711,210) 257 + (711,462) 267 + (714,475) 245 + (715,172) 253 + (715,751) 241 + (716,697) 249 + (717,234) 239 + (717,620) 244 + (718,848) 260 + (719,331) 265 + (720,201) 255 + (720,725) 272 + (721,806) 262 + (722,415) 239 + (722,934) 262 + (723,675) 249 + (724,480) 259 + (726,337) 259 + (727,177) 237 + (728,797) 272 + (729,884) 241 + (730,767) 249 + (731,275) 275 + (732,910) 231 + (733,763) 283 + (734,574) 263 + (735,268) 253 + (736,115) 218 + (737,146) 238 + (737,912) 249 + (738,1023) 252 + (739,335) 259 + (740,596) 233 + (741,365) 270 + (741,630) 256 + (742,485) 250 + (743,186) 252 + (745,621) 250 + (745,645) 246 + (746,273) 276 + (747,91) 256 + (748,886) 245 + (749,59) 273 + (749,755) 254 + (751,348) 253 + (752,313) 255 + (752,742) 277 + (752,745) 260 + (753,472) 260 + (753,592) 249 + (754,1007) 234 + (756,633) 255 + (758,847) 268 + (759,500) 253 + (760,340) 251 + (760,381) 270 + (762,962) 270 + (763,954) 236 + (764,392) 236 + (764,913) 258 + (766,915) 265 + (766,936) 259 + (767,372) 266 + (768,307) 266 + (770,458) 265 + (771,103) 241 + (771,487) 264 + (773,56) 248 + (774,773) 259 + (775,115) 266 + (776,537) 254 + (777,392) 258 + (778,893) 287 + (779,644) 270 + (780,256) 263 + (781,899) 261 + (782,399) 251 + (782,892) 277 + (783,614) 237 + (784,54) 231 + (785,816) 261 + (786,462) 248 + (787,876) 262 + (788,273) 276 + (789,696) 260 + (790,471) 251 + (791,793) 261 + (792,636) 264 + (792,955) 263 + (793,809) 269 + (794,986) 249 + (795,656) 253 + (796,347) 246 + (797,880) 264 + (798,802) 256 + (799,294) 267 + (800,970) 231 + (801,130) 244 + (803,896) 256 + (804,1022) 257 + (805,32) 232 + (805,479) 257 + (806,889) 245 + (807,504) 251 + (809,719) 272 + (809,737) 270 + (810,646) 241 + (811,547) 238 + (812,375) 262 + (813,200) 257 + (815,408) 252 + (816,902) 256 + (817,430) 241 + (818,985) 256 + (819,688) 254 + (821,839) 257 + (822,747) 262 + (823,39) 259 + (824,886) 241 + (825,406) 247 + (826,814) 242 + (827,625) 266 + (828,407) 260 + (829,511) 254 + (830,915) 263 + (831,982) 266 + (832,1003) 246 + (833,362) 259 + (833,999) 258 + (834,136) 263 + (834,295) 267 + (835,115) 281 + (836,218) 272 + (837,565) 285 + (839,541) 280 + (839,711) 273 + (840,159) 251 + (841,103) 240 + (841,636) 271 + (842,136) 257 + (843,524) 254 + (844,114) 260 + (845,694) 268 + (846,533) 274 + (847,741) 243 + (848,483) 269 + (849,464) 257 + (850,302) 245 + (851,567) 248 + (852,150) 262 + (852,529) 258 + (853,623) 234 + (855,106) 265 + (856,1014) 261 + (857,151) 270 + (857,650) 280 + (858,781) 242 + (858,994) 242 + (859,508) 255 + (859,716) 284 + (862,636) 241 + (863,21) 242 + (864,1022) 242 + (865,972) 264 + (866,97) 243 + (867,48) 235 + (868,303) 249 + (869,364) 255 + (870,506) 241 + (871,453) 255 + (872,775) 259 + (873,173) 269 + (874,485) 249 + (875,168) 249 + (876,357) 243 + (877,722) 255 + (877,990) 267 + (880,176) 291 + (881,23) 268 + (882,608) 248 + (883,929) 251 + (884,643) 247 + (885,687) 259 + (887,487) 257 + (888,110) 266 + (888,943) 264 + (889,892) 267 + (890,628) 261 + (891,679) 258 + (892,653) 254 + (894,33) 258 + (895,37) 266 + (895,695) 269 + (896,390) 269 + (897,42) 265 + (900,687) 281 + (901,146) 241 + (901,605) 261 + (902,57) 230 + (903,1021) 250 + (904,808) 237 + (905,795) 271 + (906,479) 257 + (907,674) 277 + (909,456) 250 + (910,167) 265 + (911,548) 248 + (914,924) 250 + (915,366) 253 + (915,502) 238 + (916,420) 273 + (916,823) 247 + (918,480) 248 + (919,970) 259 + (920,608) 246 + (921,966) 230 + (923,216) 247 + (925,685) 275 + (926,755) 274 + (929,538) 268 + (930,13) 259 + (931,479) 250 + (933,860) 261 + (934,165) 250 + (935,351) 233 + (936,399) 244 + (938,215) 264 + (939,496) 276 + (939,748) 262 + (940,414) 242 + (941,586) 265 + (942,356) 274 + (943,31) 263 + (943,538) 262 + (944,109) 249 + (945,671) 258 + (946,246) 255 + (947,182) 262 + (948,628) 262 + (949,316) 238 + (950,1017) 259 + (951,221) 250 + (955,457) 237 + (955,823) 241 + (956,653) 258 + (957,656) 255 + (958,644) 238 + (959,667) 246 + (960,78) 247 + (961,828) 252 + (962,877) 269 + (963,397) 284 + (964,370) 262 + (965,504) 244 + (966,483) 246 + (967,1023) 246 + (968,400) 233 + (969,564) 254 + (970,856) 257 + (971,875) 243 + (972,549) 259 + (972,630) 240 + (974,934) 281 + (976,980) 247 + (977,347) 230 + (978,123) 258 + (980,371) 245 + (981,175) 258 + (983,58) 252 + (984,449) 248 + (984,582) 246 + (985,72) 253 + (985,743) 237 + (986,323) 248 + (987,120) 241 + (987,340) 266 + (988,172) 251 + (989,585) 241 + (990,514) 271 + (991,660) 256 + (992,15) 283 + (992,531) 277 + (993,87) 267 + (993,674) 252 + (994,992) 244 + (995,170) 269 + (997,946) 270 + (998,678) 251 + (999,167) 258 + (1001,877) 250 + (1002,286) 242 + (1004,250) 259 + (1006,1022) 248 + (1008,159) 264 + (1009,574) 258 + (1012,533) 270 + (1013,574) 273 + (1014,667) 247 + (1015,127) 244 + (1015,613) 245 + (1016,457) 246 + (1017,180) 267 + (1018,254) 237 + (1019,287) 248 + (1020,67) 261 + (1020,151) 248 + (1021,810) 239 + (1022,491) 268 + (1023,840) 264 + + rmm_wrap_alloc 16384 bytes + rmm_wrap_alloc 16384 bytes + + 1024x1024 GraphBLAS double matrix, sparse by row + Diff actual, 1024 entries, memory: 32.2 KB + + (0,478) 0 + (0,574) 0 + (2,376) 0 + (5,560) 0 + (6,996) 0 + (7,183) 0 + (7,666) 0 + (8,896) 0 + (9,187) 0 + (10,446) 0 + (11,46) 0 + (11,955) 0 + (12,397) 0 + (12,953) 0 + (13,192) 0 + (14,421) 0 + (15,568) 0 + (16,788) 0 + (16,904) 0 + (17,928) 0 + (18,103) 0 + (19,821) 0 + (19,886) 0 + (20,474) 0 + (21,479) 0 + (21,975) 0 + (22,569) 0 + (23,310) 0 + (24,905) 0 + (25,241) 0 + (26,428) 0 + (28,107) 0 + (28,441) 0 + (30,694) 0 + (32,121) 0 + (33,81) 0 + (34,804) 0 + (36,451) 0 + (37,609) 0 + (38,138) 0 + (39,698) 0 + (40,950) 0 + (41,568) 0 + (42,324) 0 + (43,798) 0 + (46,208) 0 + (47,70) 0 + (48,336) 0 + (49,476) 0 + (50,35) 0 + (51,556) 0 + (52,999) 0 + (53,940) 0 + (54,558) 0 + (54,960) 0 + (55,979) 0 + (56,90) 0 + (57,846) 0 + (57,893) 0 + (58,35) 0 + (59,108) 0 + (60,479) 0 + (61,590) 0 + (62,771) 0 + (63,50) 0 + (64,268) 0 + (65,694) 0 + (66,719) 0 + (67,411) 0 + (68,324) 0 + (69,477) 0 + (70,539) 0 + (71,228) 0 + (72,297) 0 + (73,665) 0 + (75,855) 0 + (76,248) 0 + (77,433) 0 + (78,90) 0 + (81,754) 0 + (82,243) 0 + (84,253) 0 + (86,104) 0 + (87,657) 0 + (89,825) 0 + (90,37) 0 + (91,234) 0 + (91,519) 0 + (92,74) 0 + (92,218) 0 + (92,690) 0 + (93,486) 0 + (94,637) 0 + (94,722) 0 + (96,564) 0 + (97,748) 0 + (99,326) 0 + (100,281) 0 + (102,609) 0 + (103,621) 0 + (104,644) 0 + (106,652) 0 + (107,239) 0 + (107,522) 0 + (108,131) 0 + (109,884) 0 + (110,402) 0 + (111,905) 0 + (112,127) 0 + (112,779) 0 + (113,278) 0 + (114,519) 0 + (115,240) 0 + (116,198) 0 + (117,219) 0 + (117,338) 0 + (118,99) 0 + (120,477) 0 + (121,554) 0 + (121,715) 0 + (122,151) 0 + (123,621) 0 + (125,177) 0 + (126,36) 0 + (128,820) 0 + (128,835) 0 + (129,660) 0 + (130,623) 0 + (130,807) 0 + (131,253) 0 + (131,355) 0 + (132,570) 0 + (133,492) 0 + (134,821) 0 + (135,295) 0 + (136,108) 0 + (137,834) 0 + (138,288) 0 + (139,284) 0 + (139,945) 0 + (140,887) 0 + (141,199) 0 + (142,87) 0 + (142,225) 0 + (143,123) 0 + (144,574) 0 + (145,552) 0 + (146,194) 0 + (146,995) 0 + (148,357) 0 + (149,949) 0 + (150,717) 0 + (151,484) 0 + (156,290) 0 + (157,714) 0 + (157,974) 0 + (158,959) 0 + (160,297) 0 + (162,601) 0 + (163,816) 0 + (164,221) 0 + (165,396) 0 + (166,801) 0 + (167,879) 0 + (168,321) 0 + (169,901) 0 + (170,612) 0 + (171,15) 0 + (172,951) 0 + (174,0) 0 + (174,595) 0 + (175,669) 0 + (176,108) 0 + (176,188) 0 + (176,614) 0 + (176,781) 0 + (177,17) 0 + (178,631) 0 + (179,932) 0 + (180,830) 0 + (182,675) 0 + (182,1001) 0 + (183,692) 0 + (184,143) 0 + (185,450) 0 + (186,779) 0 + (187,997) 0 + (188,357) 0 + (189,111) 0 + (190,990) 0 + (192,644) 0 + (192,953) 0 + (193,135) 0 + (194,137) 0 + (195,922) 0 + (197,859) 0 + (198,910) 0 + (199,531) 0 + (201,907) 0 + (202,863) 0 + (203,865) 0 + (204,614) 0 + (207,826) 0 + (208,985) 0 + (209,808) 0 + (210,659) 0 + (211,71) 0 + (211,931) 0 + (212,426) 0 + (213,152) 0 + (214,928) 0 + (215,268) 0 + (216,550) 0 + (217,921) 0 + (218,704) 0 + (218,922) 0 + (219,66) 0 + (220,704) 0 + (221,56) 0 + (221,551) 0 + (222,545) 0 + (223,1016) 0 + (224,721) 0 + (225,935) 0 + (226,727) 0 + (228,743) 0 + (229,535) 0 + (230,382) 0 + (231,551) 0 + (232,897) 0 + (233,570) 0 + (234,520) 0 + (235,522) 0 + (236,221) 0 + (237,755) 0 + (238,964) 0 + (239,82) 0 + (240,388) 0 + (241,500) 0 + (242,124) 0 + (242,193) 0 + (242,621) 0 + (243,300) 0 + (244,588) 0 + (244,1004) 0 + (245,494) 0 + (246,326) 0 + (247,115) 0 + (247,147) 0 + (248,233) 0 + (250,485) 0 + (251,708) 0 + (252,197) 0 + (253,485) 0 + (254,40) 0 + (254,238) 0 + (255,895) 0 + (256,114) 0 + (257,461) 0 + (257,796) 0 + (258,233) 0 + (260,884) 0 + (261,945) 0 + (262,368) 0 + (264,755) 0 + (265,124) 0 + (266,352) 0 + (267,10) 0 + (268,234) 0 + (269,400) 0 + (270,877) 0 + (270,924) 0 + (271,944) 0 + (272,67) 0 + (273,100) 0 + (274,979) 0 + (276,333) 0 + (277,377) 0 + (279,877) 0 + (280,18) 0 + (281,449) 0 + (282,179) 0 + (283,1007) 0 + (284,595) 0 + (285,32) 0 + (286,37) 0 + (287,126) 0 + (287,394) 0 + (288,848) 0 + (290,317) 0 + (291,594) 0 + (292,562) 0 + (294,466) 0 + (294,960) 0 + (295,1) 0 + (295,106) 0 + (296,109) 0 + (296,183) 0 + (296,245) 0 + (297,912) 0 + (297,1006) 0 + (299,159) 0 + (300,554) 0 + (301,774) 0 + (302,30) 0 + (303,645) 0 + (304,229) 0 + (305,622) 0 + (307,264) 0 + (308,28) 0 + (309,328) 0 + (309,627) 0 + (310,357) 0 + (311,355) 0 + (312,61) 0 + (313,758) 0 + (314,571) 0 + (315,177) 0 + (315,298) 0 + (315,741) 0 + (316,177) 0 + (316,308) 0 + (317,323) 0 + (318,595) 0 + (319,126) 0 + (320,468) 0 + (321,73) 0 + (322,235) 0 + (323,375) 0 + (323,651) 0 + (324,549) 0 + (325,306) 0 + (325,487) 0 + (326,649) 0 + (327,704) 0 + (328,142) 0 + (329,176) 0 + (330,848) 0 + (330,965) 0 + (332,795) 0 + (334,695) 0 + (335,694) 0 + (336,775) 0 + (336,808) 0 + (337,608) 0 + (338,993) 0 + (339,680) 0 + (340,849) 0 + (341,36) 0 + (342,723) 0 + (343,678) 0 + (344,384) 0 + (344,680) 0 + (345,75) 0 + (347,996) 0 + (348,60) 0 + (348,821) 0 + (349,804) 0 + (350,282) 0 + (351,142) 0 + (351,937) 0 + (352,160) 0 + (353,536) 0 + (355,352) 0 + (356,340) 0 + (358,678) 0 + (360,679) 0 + (361,794) 0 + (361,989) 0 + (362,816) 0 + (363,206) 0 + (364,629) 0 + (365,990) 0 + (366,841) 0 + (366,971) 0 + (367,888) 0 + (368,587) 0 + (369,684) 0 + (370,270) 0 + (371,327) 0 + (372,471) 0 + (373,88) 0 + (374,669) 0 + (375,992) 0 + (376,336) 0 + (377,86) 0 + (378,882) 0 + (379,592) 0 + (380,77) 0 + (380,643) 0 + (381,1012) 0 + (382,816) 0 + (383,711) 0 + (385,670) 0 + (386,537) 0 + (387,347) 0 + (388,494) 0 + (389,328) 0 + (389,733) 0 + (390,551) 0 + (391,59) 0 + (391,600) 0 + (394,692) 0 + (396,645) 0 + (397,835) 0 + (398,107) 0 + (398,246) 0 + (399,436) 0 + (400,172) 0 + (400,382) 0 + (401,790) 0 + (402,320) 0 + (403,40) 0 + (404,641) 0 + (405,49) 0 + (405,475) 0 + (407,320) 0 + (408,61) 0 + (410,754) 0 + (411,643) 0 + (412,949) 0 + (413,94) 0 + (414,991) 0 + (415,26) 0 + (416,575) 0 + (417,366) 0 + (418,160) 0 + (418,669) 0 + (419,209) 0 + (419,285) 0 + (420,748) 0 + (421,614) 0 + (422,177) 0 + (423,873) 0 + (424,542) 0 + (425,263) 0 + (426,377) 0 + (427,149) 0 + (428,950) 0 + (429,305) 0 + (430,718) 0 + (431,51) 0 + (432,857) 0 + (434,604) 0 + (435,152) 0 + (436,356) 0 + (437,105) 0 + (438,814) 0 + (440,338) 0 + (441,982) 0 + (442,880) 0 + (443,753) 0 + (444,669) 0 + (445,952) 0 + (446,741) 0 + (447,970) 0 + (448,646) 0 + (448,744) 0 + (449,835) 0 + (450,579) 0 + (451,147) 0 + (451,1017) 0 + (452,868) 0 + (453,26) 0 + (454,415) 0 + (454,668) 0 + (455,43) 0 + (456,849) 0 + (456,985) 0 + (457,218) 0 + (458,510) 0 + (459,737) 0 + (460,836) 0 + (461,849) 0 + (461,917) 0 + (462,900) 0 + (463,316) 0 + (464,762) 0 + (465,355) 0 + (465,801) 0 + (466,673) 0 + (467,112) 0 + (468,288) 0 + (470,889) 0 + (471,650) 0 + (473,121) 0 + (473,127) 0 + (474,487) 0 + (475,382) 0 + (476,44) 0 + (477,342) 0 + (478,786) 0 + (480,667) 0 + (481,558) 0 + (482,680) 0 + (483,517) 0 + (484,961) 0 + (485,274) 0 + (486,1015) 0 + (487,194) 0 + (489,802) 0 + (490,811) 0 + (491,319) 0 + (492,377) 0 + (494,432) 0 + (495,809) 0 + (496,267) 0 + (496,902) 0 + (498,194) 0 + (499,952) 0 + (500,84) 0 + (501,704) 0 + (503,519) 0 + (504,510) 0 + (504,887) 0 + (505,574) 0 + (507,643) 0 + (508,449) 0 + (512,892) 0 + (513,271) 0 + (514,404) 0 + (515,758) 0 + (517,369) 0 + (518,293) 0 + (519,786) 0 + (520,270) 0 + (521,1013) 0 + (522,284) 0 + (523,632) 0 + (524,945) 0 + (525,94) 0 + (525,362) 0 + (526,52) 0 + (527,61) 0 + (528,294) 0 + (529,145) 0 + (529,998) 0 + (530,112) 0 + (531,908) 0 + (533,674) 0 + (534,505) 0 + (535,660) 0 + (535,776) 0 + (536,500) 0 + (537,799) 0 + (538,492) 0 + (538,861) 0 + (540,245) 0 + (542,137) 0 + (545,658) 0 + (546,213) 0 + (547,767) 0 + (547,912) 0 + (547,1018) 0 + (548,46) 0 + (548,697) 0 + (549,602) 0 + (550,927) 0 + (552,710) 0 + (553,391) 0 + (554,351) 0 + (555,10) 0 + (556,26) 0 + (557,910) 0 + (558,552) 0 + (560,792) 0 + (561,597) 0 + (562,182) 0 + (562,862) 0 + (563,877) 0 + (564,310) 0 + (564,609) 0 + (565,490) 0 + (566,564) 0 + (566,607) 0 + (569,872) 0 + (570,465) 0 + (571,271) 0 + (571,919) 0 + (572,630) 0 + (574,603) 0 + (576,256) 0 + (579,274) 0 + (580,182) 0 + (581,445) 0 + (582,177) 0 + (583,118) 0 + (584,399) 0 + (585,433) 0 + (587,254) 0 + (588,914) 0 + (589,1016) 0 + (590,95) 0 + (590,802) 0 + (591,978) 0 + (592,527) 0 + (593,143) 0 + (594,430) 0 + (595,787) 0 + (596,677) 0 + (598,788) 0 + (599,127) 0 + (600,339) 0 + (601,478) 0 + (602,218) 0 + (603,759) 0 + (604,270) 0 + (605,76) 0 + (606,930) 0 + (608,832) 0 + (609,287) 0 + (610,794) 0 + (611,759) 0 + (612,1006) 0 + (613,398) 0 + (614,386) 0 + (615,115) 0 + (616,928) 0 + (617,30) 0 + (618,361) 0 + (619,996) 0 + (620,5) 0 + (620,337) 0 + (621,41) 0 + (623,44) 0 + (623,79) 0 + (623,966) 0 + (624,19) 0 + (624,242) 0 + (624,524) 0 + (625,683) 0 + (626,51) 0 + (627,361) 0 + (628,396) 0 + (629,882) 0 + (630,341) 0 + (631,49) 0 + (631,585) 0 + (632,73) 0 + (634,912) 0 + (635,882) 0 + (636,617) 0 + (637,285) 0 + (637,716) 0 + (638,113) 0 + (638,367) 0 + (639,616) 0 + (640,837) 0 + (641,457) 0 + (643,934) 0 + (647,783) 0 + (648,195) 0 + (649,614) 0 + (650,957) 0 + (651,281) 0 + (652,973) 0 + (653,60) 0 + (653,333) 0 + (654,605) 0 + (655,910) 0 + (656,349) 0 + (659,17) 0 + (660,591) 0 + (661,512) 0 + (663,767) 0 + (664,158) 0 + (665,77) 0 + (666,503) 0 + (667,951) 0 + (668,365) 0 + (669,300) 0 + (671,141) 0 + (671,565) 0 + (672,819) 0 + (674,36) 0 + (674,819) 0 + (675,454) 0 + (676,242) 0 + (677,289) 0 + (677,647) 0 + (678,802) 0 + (679,899) 0 + (680,398) 0 + (681,390) 0 + (681,699) 0 + (682,117) 0 + (683,110) 0 + (684,907) 0 + (685,17) 0 + (686,202) 0 + (687,45) 0 + (688,287) 0 + (689,502) 0 + (690,299) 0 + (691,392) 0 + (692,600) 0 + (694,378) 0 + (695,702) 0 + (696,102) 0 + (698,631) 0 + (699,152) 0 + (700,840) 0 + (701,323) 0 + (702,777) 0 + (703,132) 0 + (704,374) 0 + (705,579) 0 + (706,511) 0 + (707,76) 0 + (708,259) 0 + (708,925) 0 + (709,872) 0 + (709,873) 0 + (710,107) 0 + (710,293) 0 + (711,210) 0 + (711,462) 0 + (714,475) 0 + (715,172) 0 + (715,751) 0 + (716,697) 0 + (717,234) 0 + (717,620) 0 + (718,848) 0 + (719,331) 0 + (720,201) 0 + (720,725) 0 + (721,806) 0 + (722,415) 0 + (722,934) 0 + (723,675) 0 + (724,480) 0 + (726,337) 0 + (727,177) 0 + (728,797) 0 + (729,884) 0 + (730,767) 0 + (731,275) 0 + (732,910) 0 + (733,763) 0 + (734,574) 0 + (735,268) 0 + (736,115) 0 + (737,146) 0 + (737,912) 0 + (738,1023) 0 + (739,335) 0 + (740,596) 0 + (741,365) 0 + (741,630) 0 + (742,485) 0 + (743,186) 0 + (745,621) 0 + (745,645) 0 + (746,273) 0 + (747,91) 0 + (748,886) 0 + (749,59) 0 + (749,755) 0 + (751,348) 0 + (752,313) 0 + (752,742) 0 + (752,745) 0 + (753,472) 0 + (753,592) 0 + (754,1007) 0 + (756,633) 0 + (758,847) 0 + (759,500) 0 + (760,340) 0 + (760,381) 0 + (762,962) 0 + (763,954) 0 + (764,392) 0 + (764,913) 0 + (766,915) 0 + (766,936) 0 + (767,372) 0 + (768,307) 0 + (770,458) 0 + (771,103) 0 + (771,487) 0 + (773,56) 0 + (774,773) 0 + (775,115) 0 + (776,537) 0 + (777,392) 0 + (778,893) 0 + (779,644) 0 + (780,256) 0 + (781,899) 0 + (782,399) 0 + (782,892) 0 + (783,614) 0 + (784,54) 0 + (785,816) 0 + (786,462) 0 + (787,876) 0 + (788,273) 0 + (789,696) 0 + (790,471) 0 + (791,793) 0 + (792,636) 0 + (792,955) 0 + (793,809) 0 + (794,986) 0 + (795,656) 0 + (796,347) 0 + (797,880) 0 + (798,802) 0 + (799,294) 0 + (800,970) 0 + (801,130) 0 + (803,896) 0 + (804,1022) 0 + (805,32) 0 + (805,479) 0 + (806,889) 0 + (807,504) 0 + (809,719) 0 + (809,737) 0 + (810,646) 0 + (811,547) 0 + (812,375) 0 + (813,200) 0 + (815,408) 0 + (816,902) 0 + (817,430) 0 + (818,985) 0 + (819,688) 0 + (821,839) 0 + (822,747) 0 + (823,39) 0 + (824,886) 0 + (825,406) 0 + (826,814) 0 + (827,625) 0 + (828,407) 0 + (829,511) 0 + (830,915) 0 + (831,982) 0 + (832,1003) 0 + (833,362) 0 + (833,999) 0 + (834,136) 0 + (834,295) 0 + (835,115) 0 + (836,218) 0 + (837,565) 0 + (839,541) 0 + (839,711) 0 + (840,159) 0 + (841,103) 0 + (841,636) 0 + (842,136) 0 + (843,524) 0 + (844,114) 0 + (845,694) 0 + (846,533) 0 + (847,741) 0 + (848,483) 0 + (849,464) 0 + (850,302) 0 + (851,567) 0 + (852,150) 0 + (852,529) 0 + (853,623) 0 + (855,106) 0 + (856,1014) 0 + (857,151) 0 + (857,650) 0 + (858,781) 0 + (858,994) 0 + (859,508) 0 + (859,716) 0 + (862,636) 0 + (863,21) 0 + (864,1022) 0 + (865,972) 0 + (866,97) 0 + (867,48) 0 + (868,303) 0 + (869,364) 0 + (870,506) 0 + (871,453) 0 + (872,775) 0 + (873,173) 0 + (874,485) 0 + (875,168) 0 + (876,357) 0 + (877,722) 0 + (877,990) 0 + (880,176) 0 + (881,23) 0 + (882,608) 0 + (883,929) 0 + (884,643) 0 + (885,687) 0 + (887,487) 0 + (888,110) 0 + (888,943) 0 + (889,892) 0 + (890,628) 0 + (891,679) 0 + (892,653) 0 + (894,33) 0 + (895,37) 0 + (895,695) 0 + (896,390) 0 + (897,42) 0 + (900,687) 0 + (901,146) 0 + (901,605) 0 + (902,57) 0 + (903,1021) 0 + (904,808) 0 + (905,795) 0 + (906,479) 0 + (907,674) 0 + (909,456) 0 + (910,167) 0 + (911,548) 0 + (914,924) 0 + (915,366) 0 + (915,502) 0 + (916,420) 0 + (916,823) 0 + (918,480) 0 + (919,970) 0 + (920,608) 0 + (921,966) 0 + (923,216) 0 + (925,685) 0 + (926,755) 0 + (929,538) 0 + (930,13) 0 + (931,479) 0 + (933,860) 0 + (934,165) 0 + (935,351) 0 + (936,399) 0 + (938,215) 0 + (939,496) 0 + (939,748) 0 + (940,414) 0 + (941,586) 0 + (942,356) 0 + (943,31) 0 + (943,538) 0 + (944,109) 0 + (945,671) 0 + (946,246) 0 + (947,182) 0 + (948,628) 0 + (949,316) 0 + (950,1017) 0 + (951,221) 0 + (955,457) 0 + (955,823) 0 + (956,653) 0 + (957,656) 0 + (958,644) 0 + (959,667) 0 + (960,78) 0 + (961,828) 0 + (962,877) 0 + (963,397) 0 + (964,370) 0 + (965,504) 0 + (966,483) 0 + (967,1023) 0 + (968,400) 0 + (969,564) 0 + (970,856) 0 + (971,875) 0 + (972,549) 0 + (972,630) 0 + (974,934) 0 + (976,980) 0 + (977,347) 0 + (978,123) 0 + (980,371) 0 + (981,175) 0 + (983,58) 0 + (984,449) 0 + (984,582) 0 + (985,72) 0 + (985,743) 0 + (986,323) 0 + (987,120) 0 + (987,340) 0 + (988,172) 0 + (989,585) 0 + (990,514) 0 + (991,660) 0 + (992,15) 0 + (992,531) 0 + (993,87) 0 + (993,674) 0 + (994,992) 0 + (995,170) 0 + (997,946) 0 + (998,678) 0 + (999,167) 0 + (1001,877) 0 + (1002,286) 0 + (1004,250) 0 + (1006,1022) 0 + (1008,159) 0 + (1009,574) 0 + (1012,533) 0 + (1013,574) 0 + (1014,667) 0 + (1015,127) 0 + (1015,613) 0 + (1016,457) 0 + (1017,180) 0 + (1018,254) 0 + (1019,287) 0 + (1020,67) 0 + (1020,151) 0 + (1021,810) 0 + (1022,491) 0 + (1023,840) 0 + + + 1024x1024 GraphBLAS bool matrix, sparse by row + T actual, 1024 entries, memory: 25.2 KB + + (0,478) 1 + (0,574) 1 + (2,376) 1 + (5,560) 1 + (6,996) 1 + (7,183) 1 + (7,666) 1 + (8,896) 1 + (9,187) 1 + (10,446) 1 + (11,46) 1 + (11,955) 1 + (12,397) 1 + (12,953) 1 + (13,192) 1 + (14,421) 1 + (15,568) 1 + (16,788) 1 + (16,904) 1 + (17,928) 1 + (18,103) 1 + (19,821) 1 + (19,886) 1 + (20,474) 1 + (21,479) 1 + (21,975) 1 + (22,569) 1 + (23,310) 1 + (24,905) 1 + ... + work:1024 gpus:0 Getting test data +Creating problem gen +filling matrices +inside fill, using seed 4567 +fill_random nrows=1024ncols=1024 need 1024 values, invsparse = 1024 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling sparse +1024 nonzeroes left to fill.. +35 nonzeroes left to fill.. +inside fill, using seed 4567 +fill_random nrows=1024ncols=1024 need 1024 values, invsparse = 1024 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling sparse +1024 nonzeroes left to fill.. +35 nonzeroes left to fill.. +inside fill, using seed 543210 +fill_random nrows=1024ncols=1024 need 1048576 values, invsparse = 1 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling dense + rmm_wrap_alloc 16384 bytes + rmm_wrap_alloc 16384 bytes + rmm_wrap_alloc 524288 bytes + rmm_wrap_alloc 524288 bytes + rmm_wrap_alloc 1048576 bytes + rmm_wrap_alloc 1048576 bytes + rmm_wrap_alloc 524288 bytes + rmm_wrap_alloc 2097152 bytes + rmm_wrap_alloc 2097152 bytes + rmm_wrap_alloc 1048576 bytes + rmm_wrap_alloc 4194304 bytes + rmm_wrap_alloc 4194304 bytes + rmm_wrap_alloc 2097152 bytes + rmm_wrap_alloc 8388608 bytes + rmm_wrap_alloc 8388608 bytes + rmm_wrap_alloc 4194304 bytes + rmm_wrap_alloc 8388608 bytes + rmm_wrap_alloc 4194304 bytes + rmm_wrap_alloc 8388608 bytes +inside fill, using seed 32 +fill_random nrows=1024ncols=1024 need 5120 values, invsparse = 205 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling sparse +5120 nonzeroes left to fill.. +2026 nonzeroes left to fill.. + rmm_wrap_alloc 16384 bytes + rmm_wrap_alloc 65536 bytes +1024 slots to fill +all pairs to bucket 5, no filling +done assigning buckets +Building semiring factgory + calling stringify semiring: 0x7f1ff5304500 +inside enumify: 0x7f1ff5304500 + + GraphBLAS Semiring: semiring (user-defined) + GraphBLAS Monoid: semiring->add (built-in) + GraphBLAS BinaryOp: monoid->op (built-in) z=plus(x,y) + GraphBLAS type: ztype int32_t size: 4 + GraphBLAS type: xtype int32_t size: 4 + GraphBLAS type: ytype int32_t size: 4 + identity: [ 0 ] + + GraphBLAS BinaryOp: semiring->multiply (built-in) z=times(x,y) + GraphBLAS type: ztype int32_t size: 4 + GraphBLAS type: xtype int32_t size: 4 + GraphBLAS type: ytype int32_t size: 4 +Getting semiring add +Getting semiring mult +Getting semiring add op +Getting types +Getting opcodes +Getting typecodes +Performing asserts +Invoking boolean rename +Invoking boolean rename +Invoking enumify binop +e 14 +Invoking enumify monoid +Calling enumify binop +Inside plus binop code +e 11 +Calling enumify identity +Calling enumify terminal +Done enumify monoid +Done invoking enumify monoid +atype +btype +ctype +Invoking enumify_mask, mtype 0x7f2028b57180 +GB_enumify_mask gets mcode: 1 Mask_struct: 0 Mask_comp: 0 +got mask_ecode: 4 +constructing semiring scode +before: add_ecode: 11, id_ecode: 0, term_ecode: 31, mult_ecode: 14, flipxy: 0, zcode: 6, xcode: 6, ycode: 6, mask_ecode: 4, ccode: 6, acode: 6, bcode: 6, csparsity: 0, msparsity: 0, asparsity: 0, bsparsity: 0 +serialized_scode: 397409434374399488 +done enumify semiring +scode=397409434374399488 +done stringify semiring + returned from stringify semiring + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 8192 bytes +1024 slots to fill +all pairs to bucket 5, no filling +done assigning buckets +bucket 5 has 1024 dots to do +LAUNCHING BUCKET CODE: 5 +Confiring spdnINside get cached file +looking for prog in file /home/cjnolet/.SuiteSparse/GraphBLAS/6.3.0/GB_semiring_397409434374399488.h +opening /home/cjnolet/.SuiteSparse/GraphBLAS/6.3.0/GB_semiring_397409434374399488.h for write +about to close + read cache file /home/cjnolet/.SuiteSparse/GraphBLAS/6.3.0/GB_semiring_397409434374399488.h +successful_read: 1 +Just closed + jit_cache get program GB_jit_AxB_dot3_phase3_spdn +found memory-cached prog GB_jit_AxB_dot3_phase3_spdn + got kernel instance AxB_dot3_phase3_spdn_int32_t_int32_t_int32_t +found memory-cached prog AxB_dot3_phase3_spdn_int32_t_int32_t_int32_t +Launching _Z20AxB_dot3_phase3_spdnIiiiEvxxPxP16GB_Matrix_opaqueS2_S2_S2_i<<<32,32,0,0>>>(long,long,long*,GB_Matrix_opaque*,GB_Matrix_opaque*,GB_Matrix_opaque*,GB_Matrix_opaque*,int) +Printing bucketp +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +Done. +returned from kernel 2.7095ms + + 1024x1024 GraphBLAS int32_t matrix, sparse (jumbled) by row + sparsity control: sparse only + C GPU, 1024 entries, memory: 28.2 KB + pending tuples: 0 max pending: 0 zombies: 131 + + (0,478) 1 + (0,574) 2 + (2,376) zombie + (5,560) 3 + (6,996) 2 + (7,183) 0 + (7,666) 0 + (8,896) 2 + (9,187) 0 + (10,446) 2 + (11,46) 2 + (11,955) 2 + (12,397) 1 + (12,953) 0 + (13,192) 1 + (14,421) 1 + (15,568) 1 + (16,788) 1 + (16,904) 0 + (17,928) 0 + (18,103) zombie + (19,821) 1 + (19,886) 0 + (20,474) 4 + (21,479) 1 + (21,975) 1 + (22,569) 1 + (23,310) 2 + (24,905) 0 + ... + rmm_wrap_alloc 256 bytes +Not using cuda path. M_is_hypersparse: 0, A->iso: 0, B->iso: 0, A_BITMAP: 0, B_BITMAP: 0, GB_IS_FULL(A): 0, GB_IS_FULL(B): 0, semiring header size: 32 + rmm_wrap_alloc 16384 bytes + + 1024x1024 GraphBLAS bool matrix, sparse by row + sparsity control: sparse only + M actual, 1024 entries, memory: 25.2 KB + + (0,478) 1 + (0,574) 1 + (2,376) 1 + (5,560) 1 + (6,996) 1 + (7,183) 1 + (7,666) 1 + (8,896) 1 + (9,187) 1 + (10,446) 1 + (11,46) 1 + (11,955) 1 + (12,397) 1 + (12,953) 1 + (13,192) 1 + (14,421) 1 + (15,568) 1 + (16,788) 1 + (16,904) 1 + (17,928) 1 + (18,103) 1 + (19,821) 1 + (19,886) 1 + (20,474) 1 + (21,479) 1 + (21,975) 1 + (22,569) 1 + (23,310) 1 + (24,905) 1 + ... + rmm_wrap_alloc 16384 bytes + + 1024x1024 GraphBLAS int32_t matrix, sparse by row + sparsity control: sparse only + C GPU, 893 entries, memory: 28.2 KB + + (0,478) 1 + (0,574) 2 + (5,560) 3 + (6,996) 2 + (7,183) 0 + (7,666) 0 + (8,896) 2 + (9,187) 0 + (10,446) 2 + (11,46) 2 + (11,955) 2 + (12,397) 1 + (12,953) 0 + (13,192) 1 + (14,421) 1 + (15,568) 1 + (16,788) 1 + (16,904) 0 + (17,928) 0 + (19,821) 1 + (19,886) 0 + (20,474) 4 + (21,479) 1 + (21,975) 1 + (22,569) 1 + (23,310) 2 + (24,905) 0 + (25,241) 0 + (26,428) 0 + (28,107) 2 + (32,121) 0 + (33,81) 2 + (37,609) 2 + (39,698) 1 + (41,568) 1 + (42,324) 0 + (43,798) 1 + (46,208) 0 + (47,70) 1 + (48,336) 1 + (49,476) 1 + (50,35) 0 + (51,556) 0 + (52,999) 1 + (53,940) 1 + (54,558) 0 + (54,960) 1 + (55,979) 1 + (56,90) 2 + (57,846) 3 + (57,893) 0 + (58,35) 0 + (59,108) 3 + (60,479) 1 + (61,590) 2 + (62,771) 0 + (63,50) 0 + (64,268) 3 + (66,719) 2 + (67,411) 2 + (68,324) 0 + (69,477) 0 + (70,539) 1 + (71,228) 3 + (72,297) 3 + (73,665) 0 + (75,855) 0 + (76,248) 0 + (77,433) 4 + (78,90) 3 + (81,754) 4 + (82,243) 2 + (84,253) 1 + (86,104) 3 + (87,657) 0 + (89,825) 2 + (90,37) 4 + (91,234) 1 + (91,519) 1 + (92,74) 3 + (92,218) 1 + (92,690) 1 + (93,486) 2 + (94,637) 0 + (94,722) 1 + (96,564) 1 + (99,326) 2 + (100,281) 1 + (102,609) 2 + (104,644) 0 + (106,652) 1 + (107,239) 0 + (107,522) 2 + (108,131) 1 + (109,884) 2 + (110,402) 3 + (111,905) 2 + (112,127) 0 + (112,779) 0 + (113,278) 0 + (114,519) 1 + (115,240) 4 + (117,219) 0 + (117,338) 2 + (118,99) 4 + (120,477) 1 + (121,554) 3 + (121,715) 3 + (122,151) 3 + (125,177) 5 + (128,820) 6 + (129,660) 0 + (130,623) 1 + (131,253) 1 + (131,355) 1 + (133,492) 1 + (134,821) 0 + (135,295) 2 + (136,108) 3 + (137,834) 2 + (138,288) 1 + (139,284) 2 + (139,945) 0 + (141,199) 1 + (142,87) 4 + (142,225) 1 + (143,123) 0 + (144,574) 0 + (146,194) 3 + (148,357) 0 + (149,949) 1 + (150,717) 2 + (151,484) 2 + (156,290) 2 + (157,714) 0 + (157,974) 1 + (160,297) 1 + (162,601) 2 + (163,816) 3 + (164,221) 1 + (165,396) 1 + (166,801) 3 + (167,879) 3 + (168,321) 0 + (169,901) 3 + (172,951) 1 + (176,108) 1 + (176,188) 1 + (176,614) 2 + (176,781) 1 + (178,631) 1 + (179,932) 2 + (180,830) 3 + (182,675) 1 + (182,1001) 2 + (183,692) 1 + (184,143) 2 + (185,450) 1 + (186,779) 0 + (187,997) 3 + (188,357) 1 + (189,111) 2 + (190,990) 1 + (192,644) 0 + (192,953) 0 + (193,135) 1 + (194,137) 4 + (195,922) 4 + (197,859) 1 + (198,910) 1 + (199,531) 3 + (201,907) 0 + (202,863) 1 + (203,865) 4 + (204,614) 3 + (207,826) 1 + (208,985) 2 + (209,808) 3 + (211,71) 4 + (211,931) 3 + (212,426) 0 + (213,152) 0 + (214,928) 0 + (215,268) 3 + (216,550) 3 + (217,921) 0 + (218,704) 2 + (218,922) 2 + (219,66) 1 + (220,704) 2 + (221,56) 1 + (221,551) 2 + (222,545) 1 + (223,1016) 2 + (224,721) 1 + (225,935) 1 + (226,727) 0 + (228,743) 4 + (229,535) 2 + (231,551) 3 + (232,897) 2 + (234,520) 2 + (235,522) 2 + (236,221) 3 + (237,755) 2 + (238,964) 2 + (239,82) 0 + (240,388) 0 + (241,500) 2 + (242,124) 3 + (242,193) 0 + (243,300) 0 + (244,588) 0 + (244,1004) 3 + (245,494) 0 + (246,326) 1 + (247,115) 1 + (247,147) 1 + (248,233) 0 + (250,485) 6 + (251,708) 0 + (252,197) 1 + (253,485) 5 + (254,40) 3 + (254,238) 0 + (255,895) 3 + (256,114) 0 + (257,461) 2 + (257,796) 0 + (258,233) 1 + (260,884) 2 + (261,945) 1 + (262,368) 2 + (264,755) 1 + (265,124) 1 + (266,352) 3 + (267,10) 1 + (268,234) 1 + (269,400) 1 + (270,877) 0 + (270,924) 0 + (271,944) 0 + (272,67) 3 + (273,100) 1 + (274,979) 4 + (276,333) 2 + (277,377) 0 + (279,877) 1 + (280,18) 3 + (281,449) 3 + (282,179) 2 + (283,1007) 2 + (285,32) 1 + (286,37) 2 + (287,394) 3 + (288,848) 0 + (290,317) 0 + (291,594) 1 + (294,466) 2 + (294,960) 0 + (295,1) 0 + (295,106) 2 + (296,109) 2 + (296,183) 0 + (296,245) 0 + (297,912) 1 + (299,159) 1 + (300,554) 1 + (301,774) 1 + (302,30) 1 + (303,645) 1 + (304,229) 1 + (305,622) 0 + (307,264) 3 + (308,28) 0 + (309,328) 4 + (309,627) 0 + (310,357) 1 + (311,355) 1 + (312,61) 2 + (314,571) 3 + (315,177) 3 + (315,741) 0 + (316,177) 3 + (316,308) 4 + (320,468) 1 + (321,73) 0 + (322,235) 2 + (323,375) 3 + (323,651) 3 + (324,549) 2 + (325,306) 1 + (325,487) 1 + (326,649) 2 + (327,704) 0 + (329,176) 2 + (330,848) 1 + (330,965) 2 + (332,795) 1 + (334,695) 1 + (336,808) 4 + (337,608) 1 + (338,993) 2 + (339,680) 0 + (340,849) 1 + (342,723) 2 + (343,678) 2 + (344,384) 3 + (344,680) 0 + (345,75) 0 + (347,996) 3 + (348,60) 3 + (348,821) 1 + (350,282) 1 + (352,160) 2 + (353,536) 1 + (355,352) 5 + (356,340) 2 + (358,678) 2 + (360,679) 1 + (361,794) 0 + (361,989) 3 + (362,816) 2 + (363,206) 4 + (364,629) 0 + (365,990) 0 + (366,841) 1 + (366,971) 0 + (367,888) 2 + (368,587) 0 + (369,684) 3 + (370,270) 1 + (372,471) 1 + (373,88) 1 + (375,992) 2 + (376,336) 3 + (377,86) 1 + (378,882) 1 + (379,592) 2 + (380,77) 2 + (380,643) 2 + (381,1012) 2 + (382,816) 2 + (383,711) 2 + (385,670) 1 + (386,537) 1 + (387,347) 2 + (388,494) 1 + (389,328) 3 + (390,551) 1 + (391,59) 2 + (391,600) 1 + (394,692) 4 + (396,645) 2 + (398,107) 3 + (398,246) 2 + (399,436) 3 + (400,172) 0 + (401,790) 3 + (402,320) 2 + (403,40) 2 + (404,641) 0 + (405,49) 0 + (405,475) 1 + (407,320) 3 + (408,61) 4 + (410,754) 3 + (411,643) 2 + (412,949) 1 + (413,94) 5 + (415,26) 1 + (416,575) 0 + (417,366) 3 + (418,160) 0 + (419,209) 1 + (421,614) 1 + (422,177) 2 + (423,873) 1 + (424,542) 3 + (425,263) 0 + (426,377) 0 + (427,149) 0 + (429,305) 0 + (430,718) 1 + (431,51) 0 + (432,857) 2 + (434,604) 0 + (435,152) 2 + (436,356) 1 + (437,105) 3 + (440,338) 0 + (441,982) 2 + (442,880) 1 + (443,753) 1 + (446,741) 0 + (448,646) 0 + (448,744) 2 + (450,579) 1 + (451,147) 0 + (451,1017) 0 + (452,868) 3 + (453,26) 1 + (454,415) 1 + (454,668) 0 + (455,43) 0 + (456,849) 1 + (456,985) 2 + (457,218) 2 + (458,510) 4 + (459,737) 2 + (460,836) 2 + (461,849) 0 + (461,917) 2 + (462,900) 1 + (463,316) 1 + (464,762) 1 + (465,355) 1 + (465,801) 1 + (466,673) 0 + (468,288) 1 + (470,889) 2 + (471,650) 1 + (473,121) 1 + (473,127) 2 + (474,487) 0 + (476,44) 0 + (477,342) 1 + (480,667) 1 + (481,558) 0 + (482,680) 1 + (483,517) 1 + (484,961) 1 + (485,274) 0 + (486,1015) 3 + (487,194) 1 + (489,802) 2 + (490,811) 1 + (491,319) 4 + (492,377) 1 + (494,432) 1 + (495,809) 0 + (496,267) 2 + (496,902) 1 + (498,194) 1 + (500,84) 0 + (501,704) 2 + (503,519) 2 + (504,510) 3 + (505,574) 1 + (507,643) 3 + (508,449) 3 + (512,892) 3 + (513,271) 2 + (517,369) 1 + (518,293) 2 + (520,270) 1 + (521,1013) 1 + (522,284) 1 + (524,945) 1 + (525,94) 5 + (525,362) 2 + (526,52) 1 + (527,61) 3 + (529,998) 0 + (531,908) 1 + (533,674) 4 + (535,660) 1 + (535,776) 1 + (536,500) 3 + (537,799) 2 + (538,492) 2 + (538,861) 1 + (540,245) 0 + (542,137) 2 + (545,658) 0 + (546,213) 1 + (547,767) 1 + (547,912) 3 + (547,1018) 1 + (548,46) 2 + (548,697) 0 + (549,602) 2 + (550,927) 2 + (553,391) 1 + (554,351) 2 + (555,10) 2 + (556,26) 2 + (557,910) 0 + (560,792) 0 + (562,182) 0 + (562,862) 1 + (563,877) 0 + (564,310) 3 + (564,609) 3 + (565,490) 0 + (566,564) 2 + (566,607) 1 + (569,872) 0 + (570,465) 1 + (571,271) 3 + (571,919) 1 + (574,603) 0 + (576,256) 4 + (579,274) 0 + (580,182) 0 + (581,445) 0 + (582,177) 3 + (583,118) 0 + (584,399) 1 + (585,433) 4 + (587,254) 2 + (588,914) 2 + (589,1016) 3 + (590,95) 3 + (590,802) 2 + (592,527) 0 + (593,143) 2 + (594,430) 0 + (595,787) 2 + (598,788) 1 + (599,127) 3 + (601,478) 2 + (602,218) 0 + (603,759) 1 + (604,270) 1 + (605,76) 3 + (606,930) 0 + (608,832) 1 + (609,287) 1 + (610,794) 0 + (611,759) 1 + (613,398) 3 + (614,386) 4 + (615,115) 0 + (616,928) 0 + (617,30) 2 + (618,361) 5 + (619,996) 4 + (620,5) 3 + (621,41) 0 + (623,44) 2 + (624,19) 1 + (624,242) 2 + (624,524) 1 + (626,51) 0 + (627,361) 1 + (628,396) 3 + (629,882) 1 + (630,341) 1 + (631,49) 1 + (631,585) 1 + (632,73) 1 + (634,912) 2 + (635,882) 1 + (636,617) 1 + (637,716) 0 + (638,113) 1 + (639,616) 5 + (640,837) 2 + (641,457) 1 + (643,934) 3 + (647,783) 2 + (648,195) 1 + (649,614) 1 + (650,957) 1 + (651,281) 2 + (652,973) 1 + (653,60) 1 + (653,333) 2 + (654,605) 3 + (655,910) 0 + (656,349) 3 + (660,591) 4 + (661,512) 2 + (663,767) 0 + (665,77) 3 + (666,503) 4 + (667,951) 2 + (668,365) 4 + (669,300) 1 + (671,141) 1 + (671,565) 2 + (672,819) 1 + (674,819) 1 + (675,454) 0 + (676,242) 2 + (677,289) 4 + (678,802) 3 + (680,398) 1 + (681,390) 1 + (682,117) 4 + (683,110) 2 + (684,907) 0 + (686,202) 0 + (687,45) 1 + (688,287) 2 + (689,502) 3 + (690,299) 3 + (691,392) 2 + (692,600) 0 + (694,378) 1 + (695,702) 1 + (696,102) 2 + (698,631) 0 + (699,152) 1 + (700,840) 1 + (702,777) 1 + (703,132) 1 + (704,374) 1 + (705,579) 1 + (706,511) 3 + (707,76) 3 + (708,259) 2 + (708,925) 0 + (709,872) 1 + (709,873) 1 + (710,107) 3 + (710,293) 2 + (711,462) 0 + (714,475) 2 + (715,172) 0 + (715,751) 2 + (716,697) 0 + (717,234) 0 + (718,848) 2 + (719,331) 1 + (720,201) 1 + (720,725) 2 + (722,415) 2 + (722,934) 2 + (723,675) 2 + (724,480) 3 + (727,177) 4 + (728,797) 1 + (729,884) 1 + (730,767) 0 + (731,275) 1 + (732,910) 0 + (733,763) 5 + (734,574) 0 + (735,268) 3 + (736,115) 1 + (737,912) 2 + (738,1023) 2 + (739,335) 0 + (740,596) 3 + (741,365) 1 + (742,485) 5 + (743,186) 1 + (745,645) 2 + (746,273) 3 + (747,91) 5 + (748,886) 0 + (749,59) 2 + (749,755) 2 + (751,348) 0 + (752,313) 2 + (752,742) 0 + (752,745) 1 + (753,472) 1 + (753,592) 1 + (754,1007) 0 + (756,633) 1 + (758,847) 2 + (759,500) 3 + (760,340) 2 + (760,381) 2 + (762,962) 3 + (763,954) 0 + (764,392) 1 + (764,913) 3 + (766,915) 3 + (766,936) 0 + (767,372) 1 + (768,307) 0 + (770,458) 0 + (771,487) 0 + (773,56) 1 + (774,773) 0 + (775,115) 1 + (776,537) 1 + (777,392) 1 + (778,893) 0 + (779,644) 0 + (780,256) 2 + (782,399) 1 + (782,892) 2 + (783,614) 2 + (785,816) 1 + (786,462) 1 + (787,876) 1 + (788,273) 4 + (789,696) 2 + (790,471) 1 + (791,793) 3 + (792,636) 3 + (792,955) 3 + (793,809) 0 + (794,986) 1 + (795,656) 0 + (796,347) 3 + (797,880) 2 + (798,802) 0 + (801,130) 1 + (803,896) 3 + (804,1022) 3 + (805,32) 1 + (805,479) 1 + (806,889) 2 + (807,504) 3 + (809,719) 1 + (809,737) 2 + (810,646) 0 + (812,375) 3 + (813,200) 2 + (815,408) 3 + (816,902) 1 + (817,430) 1 + (818,985) 5 + (819,688) 1 + (821,839) 1 + (822,747) 1 + (823,39) 1 + (824,886) 0 + (825,406) 0 + (828,407) 2 + (829,511) 1 + (830,915) 2 + (831,982) 1 + (832,1003) 2 + (833,362) 2 + (833,999) 2 + (834,136) 2 + (834,295) 1 + (835,115) 1 + (836,218) 2 + (837,565) 4 + (839,541) 0 + (839,711) 0 + (840,159) 1 + (841,636) 1 + (842,136) 2 + (843,524) 0 + (844,114) 0 + (846,533) 1 + (847,741) 0 + (848,483) 1 + (849,464) 3 + (850,302) 0 + (851,567) 1 + (852,150) 4 + (852,529) 0 + (853,623) 1 + (855,106) 2 + (856,1014) 1 + (857,151) 2 + (857,650) 1 + (858,781) 1 + (858,994) 0 + (859,508) 0 + (859,716) 0 + (862,636) 2 + (863,21) 4 + (864,1022) 2 + (866,97) 0 + (867,48) 1 + (868,303) 1 + (869,364) 4 + (871,453) 1 + (873,173) 0 + (874,485) 7 + (875,168) 1 + (876,357) 0 + (877,722) 1 + (877,990) 0 + (880,176) 2 + (881,23) 1 + (882,608) 0 + (884,643) 3 + (885,687) 0 + (887,487) 0 + (888,110) 2 + (888,943) 0 + (889,892) 3 + (890,628) 2 + (891,679) 1 + (892,653) 2 + (894,33) 0 + (895,37) 2 + (895,695) 0 + (896,390) 0 + (897,42) 2 + (900,687) 0 + (901,605) 2 + (902,57) 1 + (903,1021) 1 + (904,808) 4 + (905,795) 3 + (906,479) 0 + (907,674) 2 + (909,456) 2 + (911,548) 1 + (914,924) 1 + (915,366) 2 + (915,502) 3 + (916,420) 3 + (916,823) 1 + (918,480) 3 + (920,608) 1 + (925,685) 0 + (926,755) 4 + (929,538) 0 + (930,13) 1 + (931,479) 3 + (933,860) 0 + (934,165) 0 + (935,351) 2 + (936,399) 1 + (938,215) 0 + (939,496) 0 + (940,414) 0 + (941,586) 5 + (942,356) 1 + (943,31) 4 + (943,538) 0 + (944,109) 3 + (945,671) 1 + (946,246) 3 + (947,182) 0 + (948,628) 2 + (949,316) 0 + (950,1017) 0 + (951,221) 2 + (955,457) 1 + (955,823) 0 + (956,653) 2 + (957,656) 0 + (958,644) 0 + (959,667) 2 + (960,78) 3 + (961,828) 4 + (962,877) 1 + (963,397) 1 + (964,370) 1 + (965,504) 3 + (966,483) 2 + (967,1023) 2 + (968,400) 0 + (969,564) 1 + (970,856) 1 + (971,875) 1 + (972,549) 1 + (974,934) 2 + (977,347) 3 + (978,123) 0 + (981,175) 3 + (983,58) 1 + (984,449) 1 + (984,582) 2 + (985,72) 1 + (985,743) 2 + (987,120) 2 + (987,340) 4 + (988,172) 0 + (989,585) 2 + (991,660) 1 + (992,531) 3 + (993,87) 2 + (993,674) 2 + (994,992) 2 + (995,170) 2 + (997,946) 1 + (998,678) 2 + (1001,877) 1 + (1002,286) 2 + (1004,250) 3 + (1006,1022) 3 + (1008,159) 1 + (1009,574) 0 + (1012,533) 1 + (1013,574) 1 + (1014,667) 3 + (1015,127) 1 + (1015,613) 2 + (1016,457) 1 + (1017,180) 2 + (1018,254) 2 + (1019,287) 3 + (1020,67) 3 + (1020,151) 2 + (1021,810) 1 + (1022,491) 0 + (1023,840) 2 + + + 1024x1024 GraphBLAS int32_t matrix, sparse by row + C_actual, 893 entries, memory: 28.2 KB + + (0,478) 1 + (0,574) 2 + (5,560) 3 + (6,996) 2 + (7,183) 0 + (7,666) 0 + (8,896) 2 + (9,187) 0 + (10,446) 2 + (11,46) 2 + (11,955) 2 + (12,397) 1 + (12,953) 0 + (13,192) 1 + (14,421) 1 + (15,568) 1 + (16,788) 1 + (16,904) 0 + (17,928) 0 + (19,821) 1 + (19,886) 0 + (20,474) 4 + (21,479) 1 + (21,975) 1 + (22,569) 1 + (23,310) 2 + (24,905) 0 + (25,241) 0 + (26,428) 0 + (28,107) 2 + (32,121) 0 + (33,81) 2 + (37,609) 2 + (39,698) 1 + (41,568) 1 + (42,324) 0 + (43,798) 1 + (46,208) 0 + (47,70) 1 + (48,336) 1 + (49,476) 1 + (50,35) 0 + (51,556) 0 + (52,999) 1 + (53,940) 1 + (54,558) 0 + (54,960) 1 + (55,979) 1 + (56,90) 2 + (57,846) 3 + (57,893) 0 + (58,35) 0 + (59,108) 3 + (60,479) 1 + (61,590) 2 + (62,771) 0 + (63,50) 0 + (64,268) 3 + (66,719) 2 + (67,411) 2 + (68,324) 0 + (69,477) 0 + (70,539) 1 + (71,228) 3 + (72,297) 3 + (73,665) 0 + (75,855) 0 + (76,248) 0 + (77,433) 4 + (78,90) 3 + (81,754) 4 + (82,243) 2 + (84,253) 1 + (86,104) 3 + (87,657) 0 + (89,825) 2 + (90,37) 4 + (91,234) 1 + (91,519) 1 + (92,74) 3 + (92,218) 1 + (92,690) 1 + (93,486) 2 + (94,637) 0 + (94,722) 1 + (96,564) 1 + (99,326) 2 + (100,281) 1 + (102,609) 2 + (104,644) 0 + (106,652) 1 + (107,239) 0 + (107,522) 2 + (108,131) 1 + (109,884) 2 + (110,402) 3 + (111,905) 2 + (112,127) 0 + (112,779) 0 + (113,278) 0 + (114,519) 1 + (115,240) 4 + (117,219) 0 + (117,338) 2 + (118,99) 4 + (120,477) 1 + (121,554) 3 + (121,715) 3 + (122,151) 3 + (125,177) 5 + (128,820) 6 + (129,660) 0 + (130,623) 1 + (131,253) 1 + (131,355) 1 + (133,492) 1 + (134,821) 0 + (135,295) 2 + (136,108) 3 + (137,834) 2 + (138,288) 1 + (139,284) 2 + (139,945) 0 + (141,199) 1 + (142,87) 4 + (142,225) 1 + (143,123) 0 + (144,574) 0 + (146,194) 3 + (148,357) 0 + (149,949) 1 + (150,717) 2 + (151,484) 2 + (156,290) 2 + (157,714) 0 + (157,974) 1 + (160,297) 1 + (162,601) 2 + (163,816) 3 + (164,221) 1 + (165,396) 1 + (166,801) 3 + (167,879) 3 + (168,321) 0 + (169,901) 3 + (172,951) 1 + (176,108) 1 + (176,188) 1 + (176,614) 2 + (176,781) 1 + (178,631) 1 + (179,932) 2 + (180,830) 3 + (182,675) 1 + (182,1001) 2 + (183,692) 1 + (184,143) 2 + (185,450) 1 + (186,779) 0 + (187,997) 3 + (188,357) 1 + (189,111) 2 + (190,990) 1 + (192,644) 0 + (192,953) 0 + (193,135) 1 + (194,137) 4 + (195,922) 4 + (197,859) 1 + (198,910) 1 + (199,531) 3 + (201,907) 0 + (202,863) 1 + (203,865) 4 + (204,614) 3 + (207,826) 1 + (208,985) 2 + (209,808) 3 + (211,71) 4 + (211,931) 3 + (212,426) 0 + (213,152) 0 + (214,928) 0 + (215,268) 3 + (216,550) 3 + (217,921) 0 + (218,704) 2 + (218,922) 2 + (219,66) 1 + (220,704) 2 + (221,56) 1 + (221,551) 2 + (222,545) 1 + (223,1016) 2 + (224,721) 1 + (225,935) 1 + (226,727) 0 + (228,743) 4 + (229,535) 2 + (231,551) 3 + (232,897) 2 + (234,520) 2 + (235,522) 2 + (236,221) 3 + (237,755) 2 + (238,964) 2 + (239,82) 0 + (240,388) 0 + (241,500) 2 + (242,124) 3 + (242,193) 0 + (243,300) 0 + (244,588) 0 + (244,1004) 3 + (245,494) 0 + (246,326) 1 + (247,115) 1 + (247,147) 1 + (248,233) 0 + (250,485) 6 + (251,708) 0 + (252,197) 1 + (253,485) 5 + (254,40) 3 + (254,238) 0 + (255,895) 3 + (256,114) 0 + (257,461) 2 + (257,796) 0 + (258,233) 1 + (260,884) 2 + (261,945) 1 + (262,368) 2 + (264,755) 1 + (265,124) 1 + (266,352) 3 + (267,10) 1 + (268,234) 1 + (269,400) 1 + (270,877) 0 + (270,924) 0 + (271,944) 0 + (272,67) 3 + (273,100) 1 + (274,979) 4 + (276,333) 2 + (277,377) 0 + (279,877) 1 + (280,18) 3 + (281,449) 3 + (282,179) 2 + (283,1007) 2 + (285,32) 1 + (286,37) 2 + (287,394) 3 + (288,848) 0 + (290,317) 0 + (291,594) 1 + (294,466) 2 + (294,960) 0 + (295,1) 0 + (295,106) 2 + (296,109) 2 + (296,183) 0 + (296,245) 0 + (297,912) 1 + (299,159) 1 + (300,554) 1 + (301,774) 1 + (302,30) 1 + (303,645) 1 + (304,229) 1 + (305,622) 0 + (307,264) 3 + (308,28) 0 + (309,328) 4 + (309,627) 0 + (310,357) 1 + (311,355) 1 + (312,61) 2 + (314,571) 3 + (315,177) 3 + (315,741) 0 + (316,177) 3 + (316,308) 4 + (320,468) 1 + (321,73) 0 + (322,235) 2 + (323,375) 3 + (323,651) 3 + (324,549) 2 + (325,306) 1 + (325,487) 1 + (326,649) 2 + (327,704) 0 + (329,176) 2 + (330,848) 1 + (330,965) 2 + (332,795) 1 + (334,695) 1 + (336,808) 4 + (337,608) 1 + (338,993) 2 + (339,680) 0 + (340,849) 1 + (342,723) 2 + (343,678) 2 + (344,384) 3 + (344,680) 0 + (345,75) 0 + (347,996) 3 + (348,60) 3 + (348,821) 1 + (350,282) 1 + (352,160) 2 + (353,536) 1 + (355,352) 5 + (356,340) 2 + (358,678) 2 + (360,679) 1 + (361,794) 0 + (361,989) 3 + (362,816) 2 + (363,206) 4 + (364,629) 0 + (365,990) 0 + (366,841) 1 + (366,971) 0 + (367,888) 2 + (368,587) 0 + (369,684) 3 + (370,270) 1 + (372,471) 1 + (373,88) 1 + (375,992) 2 + (376,336) 3 + (377,86) 1 + (378,882) 1 + (379,592) 2 + (380,77) 2 + (380,643) 2 + (381,1012) 2 + (382,816) 2 + (383,711) 2 + (385,670) 1 + (386,537) 1 + (387,347) 2 + (388,494) 1 + (389,328) 3 + (390,551) 1 + (391,59) 2 + (391,600) 1 + (394,692) 4 + (396,645) 2 + (398,107) 3 + (398,246) 2 + (399,436) 3 + (400,172) 0 + (401,790) 3 + (402,320) 2 + (403,40) 2 + (404,641) 0 + (405,49) 0 + (405,475) 1 + (407,320) 3 + (408,61) 4 + (410,754) 3 + (411,643) 2 + (412,949) 1 + (413,94) 5 + (415,26) 1 + (416,575) 0 + (417,366) 3 + (418,160) 0 + (419,209) 1 + (421,614) 1 + (422,177) 2 + (423,873) 1 + (424,542) 3 + (425,263) 0 + (426,377) 0 + (427,149) 0 + (429,305) 0 + (430,718) 1 + (431,51) 0 + (432,857) 2 + (434,604) 0 + (435,152) 2 + (436,356) 1 + (437,105) 3 + (440,338) 0 + (441,982) 2 + (442,880) 1 + (443,753) 1 + (446,741) 0 + (448,646) 0 + (448,744) 2 + (450,579) 1 + (451,147) 0 + (451,1017) 0 + (452,868) 3 + (453,26) 1 + (454,415) 1 + (454,668) 0 + (455,43) 0 + (456,849) 1 + (456,985) 2 + (457,218) 2 + (458,510) 4 + (459,737) 2 + (460,836) 2 + (461,849) 0 + (461,917) 2 + (462,900) 1 + (463,316) 1 + (464,762) 1 + (465,355) 1 + (465,801) 1 + (466,673) 0 + (468,288) 1 + (470,889) 2 + (471,650) 1 + (473,121) 1 + (473,127) 2 + (474,487) 0 + (476,44) 0 + (477,342) 1 + (480,667) 1 + (481,558) 0 + (482,680) 1 + (483,517) 1 + (484,961) 1 + (485,274) 0 + (486,1015) 3 + (487,194) 1 + (489,802) 2 + (490,811) 1 + (491,319) 4 + (492,377) 1 + (494,432) 1 + (495,809) 0 + (496,267) 2 + (496,902) 1 + (498,194) 1 + (500,84) 0 + (501,704) 2 + (503,519) 2 + (504,510) 3 + (505,574) 1 + (507,643) 3 + (508,449) 3 + (512,892) 3 + (513,271) 2 + (517,369) 1 + (518,293) 2 + (520,270) 1 + (521,1013) 1 + (522,284) 1 + (524,945) 1 + (525,94) 5 + (525,362) 2 + (526,52) 1 + (527,61) 3 + (529,998) 0 + (531,908) 1 + (533,674) 4 + (535,660) 1 + (535,776) 1 + (536,500) 3 + (537,799) 2 + (538,492) 2 + (538,861) 1 + (540,245) 0 + (542,137) 2 + (545,658) 0 + (546,213) 1 + (547,767) 1 + (547,912) 3 + (547,1018) 1 + (548,46) 2 + (548,697) 0 + (549,602) 2 + (550,927) 2 + (553,391) 1 + (554,351) 2 + (555,10) 2 + (556,26) 2 + (557,910) 0 + (560,792) 0 + (562,182) 0 + (562,862) 1 + (563,877) 0 + (564,310) 3 + (564,609) 3 + (565,490) 0 + (566,564) 2 + (566,607) 1 + (569,872) 0 + (570,465) 1 + (571,271) 3 + (571,919) 1 + (574,603) 0 + (576,256) 4 + (579,274) 0 + (580,182) 0 + (581,445) 0 + (582,177) 3 + (583,118) 0 + (584,399) 1 + (585,433) 4 + (587,254) 2 + (588,914) 2 + (589,1016) 3 + (590,95) 3 + (590,802) 2 + (592,527) 0 + (593,143) 2 + (594,430) 0 + (595,787) 2 + (598,788) 1 + (599,127) 3 + (601,478) 2 + (602,218) 0 + (603,759) 1 + (604,270) 1 + (605,76) 3 + (606,930) 0 + (608,832) 1 + (609,287) 1 + (610,794) 0 + (611,759) 1 + (613,398) 3 + (614,386) 4 + (615,115) 0 + (616,928) 0 + (617,30) 2 + (618,361) 5 + (619,996) 4 + (620,5) 3 + (621,41) 0 + (623,44) 2 + (624,19) 1 + (624,242) 2 + (624,524) 1 + (626,51) 0 + (627,361) 1 + (628,396) 3 + (629,882) 1 + (630,341) 1 + (631,49) 1 + (631,585) 1 + (632,73) 1 + (634,912) 2 + (635,882) 1 + (636,617) 1 + (637,716) 0 + (638,113) 1 + (639,616) 5 + (640,837) 2 + (641,457) 1 + (643,934) 3 + (647,783) 2 + (648,195) 1 + (649,614) 1 + (650,957) 1 + (651,281) 2 + (652,973) 1 + (653,60) 1 + (653,333) 2 + (654,605) 3 + (655,910) 0 + (656,349) 3 + (660,591) 4 + (661,512) 2 + (663,767) 0 + (665,77) 3 + (666,503) 4 + (667,951) 2 + (668,365) 4 + (669,300) 1 + (671,141) 1 + (671,565) 2 + (672,819) 1 + (674,819) 1 + (675,454) 0 + (676,242) 2 + (677,289) 4 + (678,802) 3 + (680,398) 1 + (681,390) 1 + (682,117) 4 + (683,110) 2 + (684,907) 0 + (686,202) 0 + (687,45) 1 + (688,287) 2 + (689,502) 3 + (690,299) 3 + (691,392) 2 + (692,600) 0 + (694,378) 1 + (695,702) 1 + (696,102) 2 + (698,631) 0 + (699,152) 1 + (700,840) 1 + (702,777) 1 + (703,132) 1 + (704,374) 1 + (705,579) 1 + (706,511) 3 + (707,76) 3 + (708,259) 2 + (708,925) 0 + (709,872) 1 + (709,873) 1 + (710,107) 3 + (710,293) 2 + (711,462) 0 + (714,475) 2 + (715,172) 0 + (715,751) 2 + (716,697) 0 + (717,234) 0 + (718,848) 2 + (719,331) 1 + (720,201) 1 + (720,725) 2 + (722,415) 2 + (722,934) 2 + (723,675) 2 + (724,480) 3 + (727,177) 4 + (728,797) 1 + (729,884) 1 + (730,767) 0 + (731,275) 1 + (732,910) 0 + (733,763) 5 + (734,574) 0 + (735,268) 3 + (736,115) 1 + (737,912) 2 + (738,1023) 2 + (739,335) 0 + (740,596) 3 + (741,365) 1 + (742,485) 5 + (743,186) 1 + (745,645) 2 + (746,273) 3 + (747,91) 5 + (748,886) 0 + (749,59) 2 + (749,755) 2 + (751,348) 0 + (752,313) 2 + (752,742) 0 + (752,745) 1 + (753,472) 1 + (753,592) 1 + (754,1007) 0 + (756,633) 1 + (758,847) 2 + (759,500) 3 + (760,340) 2 + (760,381) 2 + (762,962) 3 + (763,954) 0 + (764,392) 1 + (764,913) 3 + (766,915) 3 + (766,936) 0 + (767,372) 1 + (768,307) 0 + (770,458) 0 + (771,487) 0 + (773,56) 1 + (774,773) 0 + (775,115) 1 + (776,537) 1 + (777,392) 1 + (778,893) 0 + (779,644) 0 + (780,256) 2 + (782,399) 1 + (782,892) 2 + (783,614) 2 + (785,816) 1 + (786,462) 1 + (787,876) 1 + (788,273) 4 + (789,696) 2 + (790,471) 1 + (791,793) 3 + (792,636) 3 + (792,955) 3 + (793,809) 0 + (794,986) 1 + (795,656) 0 + (796,347) 3 + (797,880) 2 + (798,802) 0 + (801,130) 1 + (803,896) 3 + (804,1022) 3 + (805,32) 1 + (805,479) 1 + (806,889) 2 + (807,504) 3 + (809,719) 1 + (809,737) 2 + (810,646) 0 + (812,375) 3 + (813,200) 2 + (815,408) 3 + (816,902) 1 + (817,430) 1 + (818,985) 5 + (819,688) 1 + (821,839) 1 + (822,747) 1 + (823,39) 1 + (824,886) 0 + (825,406) 0 + (828,407) 2 + (829,511) 1 + (830,915) 2 + (831,982) 1 + (832,1003) 2 + (833,362) 2 + (833,999) 2 + (834,136) 2 + (834,295) 1 + (835,115) 1 + (836,218) 2 + (837,565) 4 + (839,541) 0 + (839,711) 0 + (840,159) 1 + (841,636) 1 + (842,136) 2 + (843,524) 0 + (844,114) 0 + (846,533) 1 + (847,741) 0 + (848,483) 1 + (849,464) 3 + (850,302) 0 + (851,567) 1 + (852,150) 4 + (852,529) 0 + (853,623) 1 + (855,106) 2 + (856,1014) 1 + (857,151) 2 + (857,650) 1 + (858,781) 1 + (858,994) 0 + (859,508) 0 + (859,716) 0 + (862,636) 2 + (863,21) 4 + (864,1022) 2 + (866,97) 0 + (867,48) 1 + (868,303) 1 + (869,364) 4 + (871,453) 1 + (873,173) 0 + (874,485) 7 + (875,168) 1 + (876,357) 0 + (877,722) 1 + (877,990) 0 + (880,176) 2 + (881,23) 1 + (882,608) 0 + (884,643) 3 + (885,687) 0 + (887,487) 0 + (888,110) 2 + (888,943) 0 + (889,892) 3 + (890,628) 2 + (891,679) 1 + (892,653) 2 + (894,33) 0 + (895,37) 2 + (895,695) 0 + (896,390) 0 + (897,42) 2 + (900,687) 0 + (901,605) 2 + (902,57) 1 + (903,1021) 1 + (904,808) 4 + (905,795) 3 + (906,479) 0 + (907,674) 2 + (909,456) 2 + (911,548) 1 + (914,924) 1 + (915,366) 2 + (915,502) 3 + (916,420) 3 + (916,823) 1 + (918,480) 3 + (920,608) 1 + (925,685) 0 + (926,755) 4 + (929,538) 0 + (930,13) 1 + (931,479) 3 + (933,860) 0 + (934,165) 0 + (935,351) 2 + (936,399) 1 + (938,215) 0 + (939,496) 0 + (940,414) 0 + (941,586) 5 + (942,356) 1 + (943,31) 4 + (943,538) 0 + (944,109) 3 + (945,671) 1 + (946,246) 3 + (947,182) 0 + (948,628) 2 + (949,316) 0 + (950,1017) 0 + (951,221) 2 + (955,457) 1 + (955,823) 0 + (956,653) 2 + (957,656) 0 + (958,644) 0 + (959,667) 2 + (960,78) 3 + (961,828) 4 + (962,877) 1 + (963,397) 1 + (964,370) 1 + (965,504) 3 + (966,483) 2 + (967,1023) 2 + (968,400) 0 + (969,564) 1 + (970,856) 1 + (971,875) 1 + (972,549) 1 + (974,934) 2 + (977,347) 3 + (978,123) 0 + (981,175) 3 + (983,58) 1 + (984,449) 1 + (984,582) 2 + (985,72) 1 + (985,743) 2 + (987,120) 2 + (987,340) 4 + (988,172) 0 + (989,585) 2 + (991,660) 1 + (992,531) 3 + (993,87) 2 + (993,674) 2 + (994,992) 2 + (995,170) 2 + (997,946) 1 + (998,678) 2 + (1001,877) 1 + (1002,286) 2 + (1004,250) 3 + (1006,1022) 3 + (1008,159) 1 + (1009,574) 0 + (1012,533) 1 + (1013,574) 1 + (1014,667) 3 + (1015,127) 1 + (1015,613) 2 + (1016,457) 1 + (1017,180) 2 + (1018,254) 2 + (1019,287) 3 + (1020,67) 3 + (1020,151) 2 + (1021,810) 1 + (1022,491) 0 + (1023,840) 2 + + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes + + 1024x1024 GraphBLAS double matrix, sparse by row + Diff actual, 893 entries, memory: 32.2 KB + + (0,478) 0 + (0,574) 0 + (5,560) 0 + (6,996) 0 + (7,183) 0 + (7,666) 0 + (8,896) 0 + (9,187) 0 + (10,446) 0 + (11,46) 0 + (11,955) 0 + (12,397) 0 + (12,953) 0 + (13,192) 0 + (14,421) 0 + (15,568) 0 + (16,788) 0 + (16,904) 0 + (17,928) 0 + (19,821) 0 + (19,886) 0 + (20,474) 0 + (21,479) 0 + (21,975) 0 + (22,569) 0 + (23,310) 0 + (24,905) 0 + (25,241) 0 + (26,428) 0 + (28,107) 0 + (32,121) 0 + (33,81) 0 + (37,609) 0 + (39,698) 0 + (41,568) 0 + (42,324) 0 + (43,798) 0 + (46,208) 0 + (47,70) 0 + (48,336) 0 + (49,476) 0 + (50,35) 0 + (51,556) 0 + (52,999) 0 + (53,940) 0 + (54,558) 0 + (54,960) 0 + (55,979) 0 + (56,90) 0 + (57,846) 0 + (57,893) 0 + (58,35) 0 + (59,108) 0 + (60,479) 0 + (61,590) 0 + (62,771) 0 + (63,50) 0 + (64,268) 0 + (66,719) 0 + (67,411) 0 + (68,324) 0 + (69,477) 0 + (70,539) 0 + (71,228) 0 + (72,297) 0 + (73,665) 0 + (75,855) 0 + (76,248) 0 + (77,433) 0 + (78,90) 0 + (81,754) 0 + (82,243) 0 + (84,253) 0 + (86,104) 0 + (87,657) 0 + (89,825) 0 + (90,37) 0 + (91,234) 0 + (91,519) 0 + (92,74) 0 + (92,218) 0 + (92,690) 0 + (93,486) 0 + (94,637) 0 + (94,722) 0 + (96,564) 0 + (99,326) 0 + (100,281) 0 + (102,609) 0 + (104,644) 0 + (106,652) 0 + (107,239) 0 + (107,522) 0 + (108,131) 0 + (109,884) 0 + (110,402) 0 + (111,905) 0 + (112,127) 0 + (112,779) 0 + (113,278) 0 + (114,519) 0 + (115,240) 0 + (117,219) 0 + (117,338) 0 + (118,99) 0 + (120,477) 0 + (121,554) 0 + (121,715) 0 + (122,151) 0 + (125,177) 0 + (128,820) 0 + (129,660) 0 + (130,623) 0 + (131,253) 0 + (131,355) 0 + (133,492) 0 + (134,821) 0 + (135,295) 0 + (136,108) 0 + (137,834) 0 + (138,288) 0 + (139,284) 0 + (139,945) 0 + (141,199) 0 + (142,87) 0 + (142,225) 0 + (143,123) 0 + (144,574) 0 + (146,194) 0 + (148,357) 0 + (149,949) 0 + (150,717) 0 + (151,484) 0 + (156,290) 0 + (157,714) 0 + (157,974) 0 + (160,297) 0 + (162,601) 0 + (163,816) 0 + (164,221) 0 + (165,396) 0 + (166,801) 0 + (167,879) 0 + (168,321) 0 + (169,901) 0 + (172,951) 0 + (176,108) 0 + (176,188) 0 + (176,614) 0 + (176,781) 0 + (178,631) 0 + (179,932) 0 + (180,830) 0 + (182,675) 0 + (182,1001) 0 + (183,692) 0 + (184,143) 0 + (185,450) 0 + (186,779) 0 + (187,997) 0 + (188,357) 0 + (189,111) 0 + (190,990) 0 + (192,644) 0 + (192,953) 0 + (193,135) 0 + (194,137) 0 + (195,922) 0 + (197,859) 0 + (198,910) 0 + (199,531) 0 + (201,907) 0 + (202,863) 0 + (203,865) 0 + (204,614) 0 + (207,826) 0 + (208,985) 0 + (209,808) 0 + (211,71) 0 + (211,931) 0 + (212,426) 0 + (213,152) 0 + (214,928) 0 + (215,268) 0 + (216,550) 0 + (217,921) 0 + (218,704) 0 + (218,922) 0 + (219,66) 0 + (220,704) 0 + (221,56) 0 + (221,551) 0 + (222,545) 0 + (223,1016) 0 + (224,721) 0 + (225,935) 0 + (226,727) 0 + (228,743) 0 + (229,535) 0 + (231,551) 0 + (232,897) 0 + (234,520) 0 + (235,522) 0 + (236,221) 0 + (237,755) 0 + (238,964) 0 + (239,82) 0 + (240,388) 0 + (241,500) 0 + (242,124) 0 + (242,193) 0 + (243,300) 0 + (244,588) 0 + (244,1004) 0 + (245,494) 0 + (246,326) 0 + (247,115) 0 + (247,147) 0 + (248,233) 0 + (250,485) 0 + (251,708) 0 + (252,197) 0 + (253,485) 0 + (254,40) 0 + (254,238) 0 + (255,895) 0 + (256,114) 0 + (257,461) 0 + (257,796) 0 + (258,233) 0 + (260,884) 0 + (261,945) 0 + (262,368) 0 + (264,755) 0 + (265,124) 0 + (266,352) 0 + (267,10) 0 + (268,234) 0 + (269,400) 0 + (270,877) 0 + (270,924) 0 + (271,944) 0 + (272,67) 0 + (273,100) 0 + (274,979) 0 + (276,333) 0 + (277,377) 0 + (279,877) 0 + (280,18) 0 + (281,449) 0 + (282,179) 0 + (283,1007) 0 + (285,32) 0 + (286,37) 0 + (287,394) 0 + (288,848) 0 + (290,317) 0 + (291,594) 0 + (294,466) 0 + (294,960) 0 + (295,1) 0 + (295,106) 0 + (296,109) 0 + (296,183) 0 + (296,245) 0 + (297,912) 0 + (299,159) 0 + (300,554) 0 + (301,774) 0 + (302,30) 0 + (303,645) 0 + (304,229) 0 + (305,622) 0 + (307,264) 0 + (308,28) 0 + (309,328) 0 + (309,627) 0 + (310,357) 0 + (311,355) 0 + (312,61) 0 + (314,571) 0 + (315,177) 0 + (315,741) 0 + (316,177) 0 + (316,308) 0 + (320,468) 0 + (321,73) 0 + (322,235) 0 + (323,375) 0 + (323,651) 0 + (324,549) 0 + (325,306) 0 + (325,487) 0 + (326,649) 0 + (327,704) 0 + (329,176) 0 + (330,848) 0 + (330,965) 0 + (332,795) 0 + (334,695) 0 + (336,808) 0 + (337,608) 0 + (338,993) 0 + (339,680) 0 + (340,849) 0 + (342,723) 0 + (343,678) 0 + (344,384) 0 + (344,680) 0 + (345,75) 0 + (347,996) 0 + (348,60) 0 + (348,821) 0 + (350,282) 0 + (352,160) 0 + (353,536) 0 + (355,352) 0 + (356,340) 0 + (358,678) 0 + (360,679) 0 + (361,794) 0 + (361,989) 0 + (362,816) 0 + (363,206) 0 + (364,629) 0 + (365,990) 0 + (366,841) 0 + (366,971) 0 + (367,888) 0 + (368,587) 0 + (369,684) 0 + (370,270) 0 + (372,471) 0 + (373,88) 0 + (375,992) 0 + (376,336) 0 + (377,86) 0 + (378,882) 0 + (379,592) 0 + (380,77) 0 + (380,643) 0 + (381,1012) 0 + (382,816) 0 + (383,711) 0 + (385,670) 0 + (386,537) 0 + (387,347) 0 + (388,494) 0 + (389,328) 0 + (390,551) 0 + (391,59) 0 + (391,600) 0 + (394,692) 0 + (396,645) 0 + (398,107) 0 + (398,246) 0 + (399,436) 0 + (400,172) 0 + (401,790) 0 + (402,320) 0 + (403,40) 0 + (404,641) 0 + (405,49) 0 + (405,475) 0 + (407,320) 0 + (408,61) 0 + (410,754) 0 + (411,643) 0 + (412,949) 0 + (413,94) 0 + (415,26) 0 + (416,575) 0 + (417,366) 0 + (418,160) 0 + (419,209) 0 + (421,614) 0 + (422,177) 0 + (423,873) 0 + (424,542) 0 + (425,263) 0 + (426,377) 0 + (427,149) 0 + (429,305) 0 + (430,718) 0 + (431,51) 0 + (432,857) 0 + (434,604) 0 + (435,152) 0 + (436,356) 0 + (437,105) 0 + (440,338) 0 + (441,982) 0 + (442,880) 0 + (443,753) 0 + (446,741) 0 + (448,646) 0 + (448,744) 0 + (450,579) 0 + (451,147) 0 + (451,1017) 0 + (452,868) 0 + (453,26) 0 + (454,415) 0 + (454,668) 0 + (455,43) 0 + (456,849) 0 + (456,985) 0 + (457,218) 0 + (458,510) 0 + (459,737) 0 + (460,836) 0 + (461,849) 0 + (461,917) 0 + (462,900) 0 + (463,316) 0 + (464,762) 0 + (465,355) 0 + (465,801) 0 + (466,673) 0 + (468,288) 0 + (470,889) 0 + (471,650) 0 + (473,121) 0 + (473,127) 0 + (474,487) 0 + (476,44) 0 + (477,342) 0 + (480,667) 0 + (481,558) 0 + (482,680) 0 + (483,517) 0 + (484,961) 0 + (485,274) 0 + (486,1015) 0 + (487,194) 0 + (489,802) 0 + (490,811) 0 + (491,319) 0 + (492,377) 0 + (494,432) 0 + (495,809) 0 + (496,267) 0 + (496,902) 0 + (498,194) 0 + (500,84) 0 + (501,704) 0 + (503,519) 0 + (504,510) 0 + (505,574) 0 + (507,643) 0 + (508,449) 0 + (512,892) 0 + (513,271) 0 + (517,369) 0 + (518,293) 0 + (520,270) 0 + (521,1013) 0 + (522,284) 0 + (524,945) 0 + (525,94) 0 + (525,362) 0 + (526,52) 0 + (527,61) 0 + (529,998) 0 + (531,908) 0 + (533,674) 0 + (535,660) 0 + (535,776) 0 + (536,500) 0 + (537,799) 0 + (538,492) 0 + (538,861) 0 + (540,245) 0 + (542,137) 0 + (545,658) 0 + (546,213) 0 + (547,767) 0 + (547,912) 0 + (547,1018) 0 + (548,46) 0 + (548,697) 0 + (549,602) 0 + (550,927) 0 + (553,391) 0 + (554,351) 0 + (555,10) 0 + (556,26) 0 + (557,910) 0 + (560,792) 0 + (562,182) 0 + (562,862) 0 + (563,877) 0 + (564,310) 0 + (564,609) 0 + (565,490) 0 + (566,564) 0 + (566,607) 0 + (569,872) 0 + (570,465) 0 + (571,271) 0 + (571,919) 0 + (574,603) 0 + (576,256) 0 + (579,274) 0 + (580,182) 0 + (581,445) 0 + (582,177) 0 + (583,118) 0 + (584,399) 0 + (585,433) 0 + (587,254) 0 + (588,914) 0 + (589,1016) 0 + (590,95) 0 + (590,802) 0 + (592,527) 0 + (593,143) 0 + (594,430) 0 + (595,787) 0 + (598,788) 0 + (599,127) 0 + (601,478) 0 + (602,218) 0 + (603,759) 0 + (604,270) 0 + (605,76) 0 + (606,930) 0 + (608,832) 0 + (609,287) 0 + (610,794) 0 + (611,759) 0 + (613,398) 0 + (614,386) 0 + (615,115) 0 + (616,928) 0 + (617,30) 0 + (618,361) 0 + (619,996) 0 + (620,5) 0 + (621,41) 0 + (623,44) 0 + (624,19) 0 + (624,242) 0 + (624,524) 0 + (626,51) 0 + (627,361) 0 + (628,396) 0 + (629,882) 0 + (630,341) 0 + (631,49) 0 + (631,585) 0 + (632,73) 0 + (634,912) 0 + (635,882) 0 + (636,617) 0 + (637,716) 0 + (638,113) 0 + (639,616) 0 + (640,837) 0 + (641,457) 0 + (643,934) 0 + (647,783) 0 + (648,195) 0 + (649,614) 0 + (650,957) 0 + (651,281) 0 + (652,973) 0 + (653,60) 0 + (653,333) 0 + (654,605) 0 + (655,910) 0 + (656,349) 0 + (660,591) 0 + (661,512) 0 + (663,767) 0 + (665,77) 0 + (666,503) 0 + (667,951) 0 + (668,365) 0 + (669,300) 0 + (671,141) 0 + (671,565) 0 + (672,819) 0 + (674,819) 0 + (675,454) 0 + (676,242) 0 + (677,289) 0 + (678,802) 0 + (680,398) 0 + (681,390) 0 + (682,117) 0 + (683,110) 0 + (684,907) 0 + (686,202) 0 + (687,45) 0 + (688,287) 0 + (689,502) 0 + (690,299) 0 + (691,392) 0 + (692,600) 0 + (694,378) 0 + (695,702) 0 + (696,102) 0 + (698,631) 0 + (699,152) 0 + (700,840) 0 + (702,777) 0 + (703,132) 0 + (704,374) 0 + (705,579) 0 + (706,511) 0 + (707,76) 0 + (708,259) 0 + (708,925) 0 + (709,872) 0 + (709,873) 0 + (710,107) 0 + (710,293) 0 + (711,462) 0 + (714,475) 0 + (715,172) 0 + (715,751) 0 + (716,697) 0 + (717,234) 0 + (718,848) 0 + (719,331) 0 + (720,201) 0 + (720,725) 0 + (722,415) 0 + (722,934) 0 + (723,675) 0 + (724,480) 0 + (727,177) 0 + (728,797) 0 + (729,884) 0 + (730,767) 0 + (731,275) 0 + (732,910) 0 + (733,763) 0 + (734,574) 0 + (735,268) 0 + (736,115) 0 + (737,912) 0 + (738,1023) 0 + (739,335) 0 + (740,596) 0 + (741,365) 0 + (742,485) 0 + (743,186) 0 + (745,645) 0 + (746,273) 0 + (747,91) 0 + (748,886) 0 + (749,59) 0 + (749,755) 0 + (751,348) 0 + (752,313) 0 + (752,742) 0 + (752,745) 0 + (753,472) 0 + (753,592) 0 + (754,1007) 0 + (756,633) 0 + (758,847) 0 + (759,500) 0 + (760,340) 0 + (760,381) 0 + (762,962) 0 + (763,954) 0 + (764,392) 0 + (764,913) 0 + (766,915) 0 + (766,936) 0 + (767,372) 0 + (768,307) 0 + (770,458) 0 + (771,487) 0 + (773,56) 0 + (774,773) 0 + (775,115) 0 + (776,537) 0 + (777,392) 0 + (778,893) 0 + (779,644) 0 + (780,256) 0 + (782,399) 0 + (782,892) 0 + (783,614) 0 + (785,816) 0 + (786,462) 0 + (787,876) 0 + (788,273) 0 + (789,696) 0 + (790,471) 0 + (791,793) 0 + (792,636) 0 + (792,955) 0 + (793,809) 0 + (794,986) 0 + (795,656) 0 + (796,347) 0 + (797,880) 0 + (798,802) 0 + (801,130) 0 + (803,896) 0 + (804,1022) 0 + (805,32) 0 + (805,479) 0 + (806,889) 0 + (807,504) 0 + (809,719) 0 + (809,737) 0 + (810,646) 0 + (812,375) 0 + (813,200) 0 + (815,408) 0 + (816,902) 0 + (817,430) 0 + (818,985) 0 + (819,688) 0 + (821,839) 0 + (822,747) 0 + (823,39) 0 + (824,886) 0 + (825,406) 0 + (828,407) 0 + (829,511) 0 + (830,915) 0 + (831,982) 0 + (832,1003) 0 + (833,362) 0 + (833,999) 0 + (834,136) 0 + (834,295) 0 + (835,115) 0 + (836,218) 0 + (837,565) 0 + (839,541) 0 + (839,711) 0 + (840,159) 0 + (841,636) 0 + (842,136) 0 + (843,524) 0 + (844,114) 0 + (846,533) 0 + (847,741) 0 + (848,483) 0 + (849,464) 0 + (850,302) 0 + (851,567) 0 + (852,150) 0 + (852,529) 0 + (853,623) 0 + (855,106) 0 + (856,1014) 0 + (857,151) 0 + (857,650) 0 + (858,781) 0 + (858,994) 0 + (859,508) 0 + (859,716) 0 + (862,636) 0 + (863,21) 0 + (864,1022) 0 + (866,97) 0 + (867,48) 0 + (868,303) 0 + (869,364) 0 + (871,453) 0 + (873,173) 0 + (874,485) 0 + (875,168) 0 + (876,357) 0 + (877,722) 0 + (877,990) 0 + (880,176) 0 + (881,23) 0 + (882,608) 0 + (884,643) 0 + (885,687) 0 + (887,487) 0 + (888,110) 0 + (888,943) 0 + (889,892) 0 + (890,628) 0 + (891,679) 0 + (892,653) 0 + (894,33) 0 + (895,37) 0 + (895,695) 0 + (896,390) 0 + (897,42) 0 + (900,687) 0 + (901,605) 0 + (902,57) 0 + (903,1021) 0 + (904,808) 0 + (905,795) 0 + (906,479) 0 + (907,674) 0 + (909,456) 0 + (911,548) 0 + (914,924) 0 + (915,366) 0 + (915,502) 0 + (916,420) 0 + (916,823) 0 + (918,480) 0 + (920,608) 0 + (925,685) 0 + (926,755) 0 + (929,538) 0 + (930,13) 0 + (931,479) 0 + (933,860) 0 + (934,165) 0 + (935,351) 0 + (936,399) 0 + (938,215) 0 + (939,496) 0 + (940,414) 0 + (941,586) 0 + (942,356) 0 + (943,31) 0 + (943,538) 0 + (944,109) 0 + (945,671) 0 + (946,246) 0 + (947,182) 0 + (948,628) 0 + (949,316) 0 + (950,1017) 0 + (951,221) 0 + (955,457) 0 + (955,823) 0 + (956,653) 0 + (957,656) 0 + (958,644) 0 + (959,667) 0 + (960,78) 0 + (961,828) 0 + (962,877) 0 + (963,397) 0 + (964,370) 0 + (965,504) 0 + (966,483) 0 + (967,1023) 0 + (968,400) 0 + (969,564) 0 + (970,856) 0 + (971,875) 0 + (972,549) 0 + (974,934) 0 + (977,347) 0 + (978,123) 0 + (981,175) 0 + (983,58) 0 + (984,449) 0 + (984,582) 0 + (985,72) 0 + (985,743) 0 + (987,120) 0 + (987,340) 0 + (988,172) 0 + (989,585) 0 + (991,660) 0 + (992,531) 0 + (993,87) 0 + (993,674) 0 + (994,992) 0 + (995,170) 0 + (997,946) 0 + (998,678) 0 + (1001,877) 0 + (1002,286) 0 + (1004,250) 0 + (1006,1022) 0 + (1008,159) 0 + (1009,574) 0 + (1012,533) 0 + (1013,574) 0 + (1014,667) 0 + (1015,127) 0 + (1015,613) 0 + (1016,457) 0 + (1017,180) 0 + (1018,254) 0 + (1019,287) 0 + (1020,67) 0 + (1020,151) 0 + (1021,810) 0 + (1022,491) 0 + (1023,840) 0 + + + 1024x1024 GraphBLAS bool matrix, sparse by row + T actual, 893 entries, memory: 25.2 KB + + (0,478) 1 + (0,574) 1 + (5,560) 1 + (6,996) 1 + (7,183) 1 + (7,666) 1 + (8,896) 1 + (9,187) 1 + (10,446) 1 + (11,46) 1 + (11,955) 1 + (12,397) 1 + (12,953) 1 + (13,192) 1 + (14,421) 1 + (15,568) 1 + (16,788) 1 + (16,904) 1 + (17,928) 1 + (19,821) 1 + (19,886) 1 + (20,474) 1 + (21,479) 1 + (21,975) 1 + (22,569) 1 + (23,310) 1 + (24,905) 1 + (25,241) 1 + (26,428) 1 + ... + work:893 gpus:0 Getting test data +Creating problem gen +filling matrices +inside fill, using seed 4567 +fill_random nrows=1024ncols=1024 need 1024 values, invsparse = 1024 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling sparse +1024 nonzeroes left to fill.. +35 nonzeroes left to fill.. + rmm_wrap_alloc 8192 bytes +inside fill, using seed 4567 +fill_random nrows=1024ncols=1024 need 1024 values, invsparse = 1024 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling sparse +1024 nonzeroes left to fill.. +35 nonzeroes left to fill.. + rmm_wrap_alloc 4096 bytes +inside fill, using seed 543210 +fill_random nrows=1024ncols=1024 need 2048 values, invsparse = 512 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling sparse +2048 nonzeroes left to fill.. +504 nonzeroes left to fill.. + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 16384 bytes + rmm_wrap_alloc 16384 bytes + rmm_wrap_alloc 16384 bytes + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 8192 bytes + rmm_wrap_alloc 8192 bytes +inside fill, using seed 32 +fill_random nrows=1024ncols=1024 need 10240 values, invsparse = 103 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling sparse +10240 nonzeroes left to fill.. +4633 nonzeroes left to fill.. + rmm_wrap_alloc 16384 bytes + rmm_wrap_alloc 131072 bytes + rmm_wrap_alloc 256 bytes +1024 slots to fill +all pairs to bucket 6, no filling +done assigning buckets +Building semiring factgory + calling stringify semiring: 0x7f1ff534e400 +inside enumify: 0x7f1ff534e400 + + GraphBLAS Semiring: semiring (user-defined) + GraphBLAS Monoid: semiring->add (built-in) + GraphBLAS BinaryOp: monoid->op (built-in) z=plus(x,y) + GraphBLAS type: ztype int32_t size: 4 + GraphBLAS type: xtype int32_t size: 4 + GraphBLAS type: ytype int32_t size: 4 + identity: [ 0 ] + + GraphBLAS BinaryOp: semiring->multiply (built-in) z=times(x,y) + GraphBLAS type: ztype int32_t size: 4 + GraphBLAS type: xtype int32_t size: 4 + GraphBLAS type: ytype int32_t size: 4 +Getting semiring add +Getting semiring mult +Getting semiring add op +Getting types +Getting opcodes +Getting typecodes +Performing asserts +Invoking boolean rename +Invoking boolean rename +Invoking enumify binop +e 14 +Invoking enumify monoid +Calling enumify binop +Inside plus binop code +e 11 +Calling enumify identity +Calling enumify terminal +Done enumify monoid +Done invoking enumify monoid +atype +btype +ctype +Invoking enumify_mask, mtype 0x7f2028b57180 +GB_enumify_mask gets mcode: 1 Mask_struct: 0 Mask_comp: 0 +got mask_ecode: 4 +constructing semiring scode +before: add_ecode: 11, id_ecode: 0, term_ecode: 31, mult_ecode: 14, flipxy: 0, zcode: 6, xcode: 6, ycode: 6, mask_ecode: 4, ccode: 6, acode: 6, bcode: 6, csparsity: 0, msparsity: 0, asparsity: 0, bsparsity: 0 +serialized_scode: 397409434374399488 +done enumify semiring +scode=397409434374399488 +done stringify semiring + returned from stringify semiring + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 8192 bytes +1024 slots to fill +all pairs to bucket 6, no filling +done assigning buckets +bucket 6 has 1024 dots to do +LAUNCHING BUCKET CODE: 6 +INside get cached file +looking for prog in file /home/cjnolet/.SuiteSparse/GraphBLAS/6.3.0/GB_semiring_397409434374399488.h +opening /home/cjnolet/.SuiteSparse/GraphBLAS/6.3.0/GB_semiring_397409434374399488.h for write +about to close + read cache file /home/cjnolet/.SuiteSparse/GraphBLAS/6.3.0/GB_semiring_397409434374399488.h +successful_read: 1 +Just closed + jit_cache get program GB_jit_AxB_dot3_phase3_vssp +found memory-cached prog GB_jit_AxB_dot3_phase3_vssp + got kernel instance AxB_dot3_phase3_vssp_int32_t_int32_t_int32_t +found memory-cached prog AxB_dot3_phase3_vssp_int32_t_int32_t_int32_t +Launching _Z20AxB_dot3_phase3_vsspIiiiEvxxPxP16GB_Matrix_opaqueS2_S2_S2_i<<<32,32,0,0>>>(long,long,long*,GB_Matrix_opaque*,GB_Matrix_opaque*,GB_Matrix_opaque*,GB_Matrix_opaque*,int) +Printing bucketp +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +Done. +returned from kernel 0.908288ms + + 1024x1024 GraphBLAS int32_t matrix, sparse by row + sparsity control: sparse only + C GPU, 1024 entries, memory: 28.2 KB + pending tuples: 0 max pending: 0 zombies: 1001 + + (0,478) zombie + (0,574) zombie + (2,376) zombie + (5,560) zombie + (6,996) zombie + (7,183) zombie + (7,666) zombie + (8,896) zombie + (9,187) zombie + (10,446) zombie + (11,46) zombie + (11,955) zombie + (12,397) zombie + (12,953) zombie + (13,192) zombie + (14,421) zombie + (15,568) zombie + (16,788) zombie + (16,904) zombie + (17,928) zombie + (18,103) zombie + (19,821) zombie + (19,886) zombie + (20,474) zombie + (21,479) zombie + (21,975) zombie + (22,569) zombie + (23,310) zombie + (24,905) zombie + ... + rmm_wrap_alloc 256 bytes +Not using cuda path. M_is_hypersparse: 0, A->iso: 0, B->iso: 0, A_BITMAP: 0, B_BITMAP: 0, GB_IS_FULL(A): 0, GB_IS_FULL(B): 0, semiring header size: 32 + rmm_wrap_alloc 16384 bytes + + 1024x1024 GraphBLAS bool matrix, sparse by row + sparsity control: sparse only + M actual, 1024 entries, memory: 25.2 KB + + (0,478) 1 + (0,574) 1 + (2,376) 1 + (5,560) 1 + (6,996) 1 + (7,183) 1 + (7,666) 1 + (8,896) 1 + (9,187) 1 + (10,446) 1 + (11,46) 1 + (11,955) 1 + (12,397) 1 + (12,953) 1 + (13,192) 1 + (14,421) 1 + (15,568) 1 + (16,788) 1 + (16,904) 1 + (17,928) 1 + (18,103) 1 + (19,821) 1 + (19,886) 1 + (20,474) 1 + (21,479) 1 + (21,975) 1 + (22,569) 1 + (23,310) 1 + (24,905) 1 + ... + rmm_wrap_alloc 16384 bytes + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes + + 1024x1024 GraphBLAS int32_t matrix, sparse by row + sparsity control: sparse only + C GPU, 23 entries, memory: 16.6 KB + + (42,324) 0 + (73,665) 1 + (106,652) 0 + (138,288) 1 + (242,124) 1 + (295,1) 0 + (300,554) 1 + (312,61) 0 + (344,384) 0 + (496,267) 0 + (587,254) 1 + (686,202) 0 + (708,925) 1 + (715,751) 0 + (729,884) 0 + (741,365) 1 + (751,348) 1 + (792,636) 0 + (857,151) 0 + (876,357) 0 + (940,414) 0 + (945,671) 0 + (968,400) 1 + + + 1024x1024 GraphBLAS int32_t matrix, hypersparse by row + C_actual, 23 entries, memory: 1.1 KB + + (42,324) 0 + (73,665) 1 + (106,652) 0 + (138,288) 1 + (242,124) 1 + (295,1) 0 + (300,554) 1 + (312,61) 0 + (344,384) 0 + (496,267) 0 + (587,254) 1 + (686,202) 0 + (708,925) 1 + (715,751) 0 + (729,884) 0 + (741,365) 1 + (751,348) 1 + (792,636) 0 + (857,151) 0 + (876,357) 0 + (940,414) 0 + (945,671) 0 + (968,400) 1 + + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes + + 1024x1024 GraphBLAS double matrix, hypersparse by row + Diff actual, 23 entries, memory: 1.2 KB + + (42,324) 0 + (73,665) 0 + (106,652) 0 + (138,288) 0 + (242,124) 0 + (295,1) 0 + (300,554) 0 + (312,61) 0 + (344,384) 0 + (496,267) 0 + (587,254) 0 + (686,202) 0 + (708,925) 0 + (715,751) 0 + (729,884) 0 + (741,365) 0 + (751,348) 0 + (792,636) 0 + (857,151) 0 + (876,357) 0 + (940,414) 0 + (945,671) 0 + (968,400) 0 + + + 1024x1024 GraphBLAS bool matrix, hypersparse by row + T actual, 23 entries, memory: 1.0 KB + + (42,324) 1 + (73,665) 1 + (106,652) 1 + (138,288) 1 + (242,124) 1 + (295,1) 1 + (300,554) 1 + (312,61) 1 + (344,384) 1 + (496,267) 1 + (587,254) 1 + (686,202) 1 + (708,925) 1 + (715,751) 1 + (729,884) 1 + (741,365) 1 + (751,348) 1 + (792,636) 1 + (857,151) 1 + (876,357) 1 + (940,414) 1 + (945,671) 1 + (968,400) 1 + work:23 gpus:0 Getting test data +Creating problem gen +filling matrices +inside fill, using seed 4567 +fill_random nrows=1024ncols=1024 need 1024 values, invsparse = 1024 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling sparse +1024 nonzeroes left to fill.. +35 nonzeroes left to fill.. +inside fill, using seed 4567 +fill_random nrows=1024ncols=1024 need 1024 values, invsparse = 1024 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling sparse +1024 nonzeroes left to fill.. +35 nonzeroes left to fill.. +inside fill, using seed 543210 +fill_random nrows=1024ncols=1024 need 2048 values, invsparse = 512 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling sparse +2048 nonzeroes left to fill.. +504 nonzeroes left to fill.. + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 16384 bytes +inside fill, using seed 32 +fill_random nrows=1024ncols=1024 need 4096 values, invsparse = 256 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling sparse +4096 nonzeroes left to fill.. +1491 nonzeroes left to fill.. + rmm_wrap_alloc 16384 bytes + rmm_wrap_alloc 32768 bytes +1024 slots to fill +all pairs to bucket 7, no filling +done assigning buckets +Building semiring factgory + calling stringify semiring: 0x7f1ff5382700 +inside enumify: 0x7f1ff5382700 + + GraphBLAS Semiring: semiring (user-defined) + GraphBLAS Monoid: semiring->add (built-in) + GraphBLAS BinaryOp: monoid->op (built-in) z=plus(x,y) + GraphBLAS type: ztype int32_t size: 4 + GraphBLAS type: xtype int32_t size: 4 + GraphBLAS type: ytype int32_t size: 4 + identity: [ 0 ] + + GraphBLAS BinaryOp: semiring->multiply (built-in) z=times(x,y) + GraphBLAS type: ztype int32_t size: 4 + GraphBLAS type: xtype int32_t size: 4 + GraphBLAS type: ytype int32_t size: 4 +Getting semiring add +Getting semiring mult +Getting semiring add op +Getting types +Getting opcodes +Getting typecodes +Performing asserts +Invoking boolean rename +Invoking boolean rename +Invoking enumify binop +e 14 +Invoking enumify monoid +Calling enumify binop +Inside plus binop code +e 11 +Calling enumify identity +Calling enumify terminal +Done enumify monoid +Done invoking enumify monoid +atype +btype +ctype +Invoking enumify_mask, mtype 0x7f2028b57180 +GB_enumify_mask gets mcode: 1 Mask_struct: 0 Mask_comp: 0 +got mask_ecode: 4 +constructing semiring scode +before: add_ecode: 11, id_ecode: 0, term_ecode: 31, mult_ecode: 14, flipxy: 0, zcode: 6, xcode: 6, ycode: 6, mask_ecode: 4, ccode: 6, acode: 6, bcode: 6, csparsity: 0, msparsity: 0, asparsity: 0, bsparsity: 0 +serialized_scode: 397409434374399488 +done enumify semiring +scode=397409434374399488 +done stringify semiring + returned from stringify semiring + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 8192 bytes +1024 slots to fill +all pairs to bucket 7, no filling +done assigning buckets +bucket 7 has 1024 dots to do +LAUNCHING BUCKET CODE: 7 +INside get cached file +looking for prog in file /home/cjnolet/.SuiteSparse/GraphBLAS/6.3.0/GB_semiring_397409434374399488.h +opening /home/cjnolet/.SuiteSparse/GraphBLAS/6.3.0/GB_semiring_397409434374399488.h for write +about to close + read cache file /home/cjnolet/.SuiteSparse/GraphBLAS/6.3.0/GB_semiring_397409434374399488.h +successful_read: 1 +Just closed + jit_cache get program GB_jit_AxB_dot3_phase3_vsvs +found memory-cached prog GB_jit_AxB_dot3_phase3_vsvs + got kernel instance AxB_dot3_phase3_vsvs_int32_t_int32_t_int32_t +found memory-cached prog AxB_dot3_phase3_vsvs_int32_t_int32_t_int32_t +Launching _Z20AxB_dot3_phase3_vsvsIiiiEvxxPxP16GB_Matrix_opaqueS2_S2_S2_i<<<2,512,0,0>>>(long,long,long*,GB_Matrix_opaque*,GB_Matrix_opaque*,GB_Matrix_opaque*,GB_Matrix_opaque*,int) +Printing bucketp +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +Done. +returned from kernel 0.616448ms + + 1024x1024 GraphBLAS int32_t matrix, sparse (jumbled) by row + sparsity control: sparse only + C GPU, 1024 entries, memory: 28.2 KB + pending tuples: 0 max pending: 0 zombies: 1010 + + (0,478) zombie + (0,574) zombie + (2,376) zombie + (5,560) zombie + (6,996) zombie + (7,183) zombie + (7,666) zombie + (8,896) zombie + (9,187) zombie + (10,446) zombie + (11,46) zombie + (11,955) zombie + (12,397) zombie + (12,953) zombie + (13,192) zombie + (14,421) zombie + (15,568) zombie + (16,788) zombie + (16,904) zombie + (17,928) zombie + (18,103) zombie + (19,821) zombie + (19,886) zombie + (20,474) zombie + (21,479) zombie + (21,975) zombie + (22,569) zombie + (23,310) zombie + (24,905) zombie + ... +Not using cuda path. M_is_hypersparse: 0, A->iso: 0, B->iso: 0, A_BITMAP: 0, B_BITMAP: 0, GB_IS_FULL(A): 0, GB_IS_FULL(B): 0, semiring header size: 32 + rmm_wrap_alloc 16384 bytes + rmm_wrap_alloc 16384 bytes + + 1024x1024 GraphBLAS bool matrix, sparse by row + sparsity control: sparse only + M actual, 1024 entries, memory: 25.2 KB + + (0,478) 1 + (0,574) 1 + (2,376) 1 + (5,560) 1 + (6,996) 1 + (7,183) 1 + (7,666) 1 + (8,896) 1 + (9,187) 1 + (10,446) 1 + (11,46) 1 + (11,955) 1 + (12,397) 1 + (12,953) 1 + (13,192) 1 + (14,421) 1 + (15,568) 1 + (16,788) 1 + (16,904) 1 + (17,928) 1 + (18,103) 1 + (19,821) 1 + (19,886) 1 + (20,474) 1 + (21,479) 1 + (21,975) 1 + (22,569) 1 + (23,310) 1 + (24,905) 1 + ... + rmm_wrap_alloc 16384 bytes + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes + + 1024x1024 GraphBLAS int32_t matrix, sparse by row + sparsity control: sparse only + C GPU, 14 entries, memory: 16.4 KB + + (99,326) 0 + (115,240) 0 + (176,614) 0 + (180,830) 1 + (343,678) 0 + (398,246) 0 + (411,643) 0 + (557,910) 0 + (590,95) 0 + (601,478) 0 + (623,44) 0 + (729,884) 0 + (825,406) 1 + (891,679) 0 + + + 1024x1024 GraphBLAS int32_t matrix, hypersparse by row + C_actual, 14 entries, memory: 704 bytes + + (99,326) 0 + (115,240) 0 + (176,614) 0 + (180,830) 1 + (343,678) 0 + (398,246) 0 + (411,643) 0 + (557,910) 0 + (590,95) 0 + (601,478) 0 + (623,44) 0 + (729,884) 0 + (825,406) 1 + (891,679) 0 + + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes + + 1024x1024 GraphBLAS double matrix, hypersparse by row + Diff actual, 14 entries, memory: 768 bytes + + (99,326) 0 + (115,240) 0 + (176,614) 0 + (180,830) 0 + (343,678) 0 + (398,246) 0 + (411,643) 0 + (557,910) 0 + (590,95) 0 + (601,478) 0 + (623,44) 0 + (729,884) 0 + (825,406) 0 + (891,679) 0 + + + 1024x1024 GraphBLAS bool matrix, hypersparse by row + T actual, 14 entries, memory: 656 bytes + + (99,326) 1 + (115,240) 1 + (176,614) 1 + (180,830) 1 + (343,678) 1 + (398,246) 1 + (411,643) 1 + (557,910) 1 + (590,95) 1 + (601,478) 1 + (623,44) 1 + (729,884) 1 + (825,406) 1 + (891,679) 1 + work:14 gpus:0 Getting test data +Creating problem gen +filling matrices +inside fill, using seed 4567 +fill_random nrows=1024ncols=1024 need 1024 values, invsparse = 1024 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling sparse +1024 nonzeroes left to fill.. +35 nonzeroes left to fill.. +inside fill, using seed 4567 +fill_random nrows=1024ncols=1024 need 1024 values, invsparse = 1024 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling sparse +1024 nonzeroes left to fill.. +35 nonzeroes left to fill.. +inside fill, using seed 543210 +fill_random nrows=1024ncols=1024 need 2048 values, invsparse = 512 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling sparse +2048 nonzeroes left to fill.. +504 nonzeroes left to fill.. + rmm_wrap_alloc 256 bytes +inside fill, using seed 32 +fill_random nrows=1024ncols=1024 need 4096 values, invsparse = 256 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling sparse +4096 nonzeroes left to fill.. +1491 nonzeroes left to fill.. + rmm_wrap_alloc 16384 bytes +1024 slots to fill +all pairs to bucket 8, no filling +done assigning buckets +Building semiring factgory + calling stringify semiring: 0x7f1ff5382800 +inside enumify: 0x7f1ff5382800 + + GraphBLAS Semiring: semiring (user-defined) + GraphBLAS Monoid: semiring->add (built-in) + GraphBLAS BinaryOp: monoid->op (built-in) z=plus(x,y) + GraphBLAS type: ztype int32_t size: 4 + GraphBLAS type: xtype int32_t size: 4 + GraphBLAS type: ytype int32_t size: 4 + identity: [ 0 ] + + GraphBLAS BinaryOp: semiring->multiply (built-in) z=times(x,y) + GraphBLAS type: ztype int32_t size: 4 + GraphBLAS type: xtype int32_t size: 4 + GraphBLAS type: ytype int32_t size: 4 +Getting semiring add +Getting semiring mult +Getting semiring add op +Getting types +Getting opcodes +Getting typecodes +Performing asserts +Invoking boolean rename +Invoking boolean rename +Invoking enumify binop +e 14 +Invoking enumify monoid +Calling enumify binop +Inside plus binop code +e 11 +Calling enumify identity +Calling enumify terminal +Done enumify monoid +Done invoking enumify monoid +atype +btype +ctype +Invoking enumify_mask, mtype 0x7f2028b57180 +GB_enumify_mask gets mcode: 1 Mask_struct: 0 Mask_comp: 0 +got mask_ecode: 4 +constructing semiring scode +before: add_ecode: 11, id_ecode: 0, term_ecode: 31, mult_ecode: 14, flipxy: 0, zcode: 6, xcode: 6, ycode: 6, mask_ecode: 4, ccode: 6, acode: 6, bcode: 6, csparsity: 0, msparsity: 0, asparsity: 0, bsparsity: 0 +serialized_scode: 397409434374399488 +done enumify semiring +scode=397409434374399488 +done stringify semiring + returned from stringify semiring + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 8192 bytes +1024 slots to fill +all pairs to bucket 8, no filling +done assigning buckets +bucket 8 has 1024 dots to do +LAUNCHING BUCKET CODE: 8 +INside get cached file +looking for prog in file /home/cjnolet/.SuiteSparse/GraphBLAS/6.3.0/GB_semiring_397409434374399488.h +opening /home/cjnolet/.SuiteSparse/GraphBLAS/6.3.0/GB_semiring_397409434374399488.h for write +about to close + read cache file /home/cjnolet/.SuiteSparse/GraphBLAS/6.3.0/GB_semiring_397409434374399488.h +successful_read: 1 +Just closed + jit_cache get program GB_jit_AxB_dot3_phase3_vsvs +found memory-cached prog GB_jit_AxB_dot3_phase3_vsvs + got kernel instance AxB_dot3_phase3_vsvs_int32_t_int32_t_int32_t +found memory-cached prog AxB_dot3_phase3_vsvs_int32_t_int32_t_int32_t +Launching _Z20AxB_dot3_phase3_vsvsIiiiEvxxPxP16GB_Matrix_opaqueS2_S2_S2_i<<<2,512,0,0>>>(long,long,long*,GB_Matrix_opaque*,GB_Matrix_opaque*,GB_Matrix_opaque*,GB_Matrix_opaque*,int) +Printing bucketp +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +Done. +returned from kernel 0.801792ms + + 1024x1024 GraphBLAS int32_t matrix, sparse (jumbled) by row + sparsity control: sparse only + C GPU, 1024 entries, memory: 28.2 KB + pending tuples: 0 max pending: 0 zombies: 1010 + + (0,478) zombie + (0,574) zombie + (2,376) zombie + (5,560) zombie + (6,996) zombie + (7,183) zombie + (7,666) zombie + (8,896) zombie + (9,187) zombie + (10,446) zombie + (11,46) zombie + (11,955) zombie + (12,397) zombie + (12,953) zombie + (13,192) zombie + (14,421) zombie + (15,568) zombie + (16,788) zombie + (16,904) zombie + (17,928) zombie + (18,103) zombie + (19,821) zombie + (19,886) zombie + (20,474) zombie + (21,479) zombie + (21,975) zombie + (22,569) zombie + (23,310) zombie + (24,905) zombie + ... + rmm_wrap_alloc 256 bytes +Not using cuda path. M_is_hypersparse: 0, A->iso: 0, B->iso: 0, A_BITMAP: 0, B_BITMAP: 0, GB_IS_FULL(A): 0, GB_IS_FULL(B): 0, semiring header size: 32 + rmm_wrap_alloc 16384 bytes + rmm_wrap_alloc 16384 bytes + + 1024x1024 GraphBLAS bool matrix, sparse by row + sparsity control: sparse only + M actual, 1024 entries, memory: 25.2 KB + + (0,478) 1 + (0,574) 1 + (2,376) 1 + (5,560) 1 + (6,996) 1 + (7,183) 1 + (7,666) 1 + (8,896) 1 + (9,187) 1 + (10,446) 1 + (11,46) 1 + (11,955) 1 + (12,397) 1 + (12,953) 1 + (13,192) 1 + (14,421) 1 + (15,568) 1 + (16,788) 1 + (16,904) 1 + (17,928) 1 + (18,103) 1 + (19,821) 1 + (19,886) 1 + (20,474) 1 + (21,479) 1 + (21,975) 1 + (22,569) 1 + (23,310) 1 + (24,905) 1 + ... + rmm_wrap_alloc 16384 bytes + + 1024x1024 GraphBLAS int32_t matrix, sparse by row + sparsity control: sparse only + C GPU, 14 entries, memory: 16.4 KB + + (99,326) 0 + (115,240) 0 + (176,614) 0 + (180,830) 1 + (343,678) 0 + (398,246) 0 + (411,643) 0 + (557,910) 0 + (590,95) 0 + (601,478) 0 + (623,44) 0 + (729,884) 0 + (825,406) 1 + (891,679) 0 + + + 1024x1024 GraphBLAS int32_t matrix, hypersparse by row + C_actual, 14 entries, memory: 704 bytes + + (99,326) 0 + (115,240) 0 + (176,614) 0 + (180,830) 1 + (343,678) 0 + (398,246) 0 + (411,643) 0 + (557,910) 0 + (590,95) 0 + (601,478) 0 + (623,44) 0 + (729,884) 0 + (825,406) 1 + (891,679) 0 + + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes + + 1024x1024 GraphBLAS double matrix, hypersparse by row + Diff actual, 14 entries, memory: 768 bytes + + (99,326) 0 + (115,240) 0 + (176,614) 0 + (180,830) 0 + (343,678) 0 + (398,246) 0 + (411,643) 0 + (557,910) 0 + (590,95) 0 + (601,478) 0 + (623,44) 0 + (729,884) 0 + (825,406) 0 + (891,679) 0 + + + 1024x1024 GraphBLAS bool matrix, hypersparse by row + T actual, 14 entries, memory: 656 bytes + + (99,326) 1 + (115,240) 1 + (176,614) 1 + (180,830) 1 + (343,678) 1 + (398,246) 1 + (411,643) 1 + (557,910) 1 + (590,95) 1 + (601,478) 1 + (623,44) 1 + (729,884) 1 + (825,406) 1 + (891,679) 1 + work:14 gpus:0 Getting test data +Creating problem gen +filling matrices +inside fill, using seed 4567 +fill_random nrows=1024ncols=1024 need 1024 values, invsparse = 1024 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling sparse +1024 nonzeroes left to fill.. +35 nonzeroes left to fill.. +inside fill, using seed 4567 +fill_random nrows=1024ncols=1024 need 1024 values, invsparse = 1024 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling sparse +1024 nonzeroes left to fill.. +35 nonzeroes left to fill.. +inside fill, using seed 543210 +fill_random nrows=1024ncols=1024 need 2048 values, invsparse = 512 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling sparse +2048 nonzeroes left to fill.. +504 nonzeroes left to fill.. + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes +inside fill, using seed 32 +fill_random nrows=1024ncols=1024 need 4096 values, invsparse = 256 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling sparse +4096 nonzeroes left to fill.. +1491 nonzeroes left to fill.. + rmm_wrap_alloc 16384 bytes + rmm_wrap_alloc 256 bytes +1024 slots to fill +all pairs to bucket 9, no filling +done assigning buckets +Building semiring factgory + calling stringify semiring: 0x7f1ff5383200 +inside enumify: 0x7f1ff5383200 + + GraphBLAS Semiring: semiring (user-defined) + GraphBLAS Monoid: semiring->add (built-in) + GraphBLAS BinaryOp: monoid->op (built-in) z=plus(x,y) + GraphBLAS type: ztype int32_t size: 4 + GraphBLAS type: xtype int32_t size: 4 + GraphBLAS type: ytype int32_t size: 4 + identity: [ 0 ] + + GraphBLAS BinaryOp: semiring->multiply (built-in) z=times(x,y) + GraphBLAS type: ztype int32_t size: 4 + GraphBLAS type: xtype int32_t size: 4 + GraphBLAS type: ytype int32_t size: 4 +Getting semiring add +Getting semiring mult +Getting semiring add op +Getting types +Getting opcodes +Getting typecodes +Performing asserts +Invoking boolean rename +Invoking boolean rename +Invoking enumify binop +e 14 +Invoking enumify monoid +Calling enumify binop +Inside plus binop code +e 11 +Calling enumify identity +Calling enumify terminal +Done enumify monoid +Done invoking enumify monoid +atype +btype +ctype +Invoking enumify_mask, mtype 0x7f2028b57180 +GB_enumify_mask gets mcode: 1 Mask_struct: 0 Mask_comp: 0 +got mask_ecode: 4 +constructing semiring scode +before: add_ecode: 11, id_ecode: 0, term_ecode: 31, mult_ecode: 14, flipxy: 0, zcode: 6, xcode: 6, ycode: 6, mask_ecode: 4, ccode: 6, acode: 6, bcode: 6, csparsity: 0, msparsity: 0, asparsity: 0, bsparsity: 0 +serialized_scode: 397409434374399488 +done enumify semiring +scode=397409434374399488 +done stringify semiring + returned from stringify semiring + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 8192 bytes +1024 slots to fill +all pairs to bucket 9, no filling +done assigning buckets +bucket 9 has 1024 dots to do +LAUNCHING BUCKET CODE: 9 +INside get cached file +looking for prog in file /home/cjnolet/.SuiteSparse/GraphBLAS/6.3.0/GB_semiring_397409434374399488.h +opening /home/cjnolet/.SuiteSparse/GraphBLAS/6.3.0/GB_semiring_397409434374399488.h for write +about to close + read cache file /home/cjnolet/.SuiteSparse/GraphBLAS/6.3.0/GB_semiring_397409434374399488.h +successful_read: 1 +Just closed + jit_cache get program GB_jit_AxB_dot3_phase3_vsvs +found memory-cached prog GB_jit_AxB_dot3_phase3_vsvs + got kernel instance AxB_dot3_phase3_vsvs_int32_t_int32_t_int32_t +found memory-cached prog AxB_dot3_phase3_vsvs_int32_t_int32_t_int32_t +Launching _Z20AxB_dot3_phase3_vsvsIiiiEvxxPxP16GB_Matrix_opaqueS2_S2_S2_i<<<2,512,0,0>>>(long,long,long*,GB_Matrix_opaque*,GB_Matrix_opaque*,GB_Matrix_opaque*,GB_Matrix_opaque*,int) +Printing bucketp +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +Done. +returned from kernel 0.820224ms + + 1024x1024 GraphBLAS int32_t matrix, sparse (jumbled) by row + sparsity control: sparse only + C GPU, 1024 entries, memory: 28.2 KB + pending tuples: 0 max pending: 0 zombies: 1010 + + (0,478) zombie + (0,574) zombie + (2,376) zombie + (5,560) zombie + (6,996) zombie + (7,183) zombie + (7,666) zombie + (8,896) zombie + (9,187) zombie + (10,446) zombie + (11,46) zombie + (11,955) zombie + (12,397) zombie + (12,953) zombie + (13,192) zombie + (14,421) zombie + (15,568) zombie + (16,788) zombie + (16,904) zombie + (17,928) zombie + (18,103) zombie + (19,821) zombie + (19,886) zombie + (20,474) zombie + (21,479) zombie + (21,975) zombie + (22,569) zombie + (23,310) zombie + (24,905) zombie + ... + rmm_wrap_alloc 256 bytes +Not using cuda path. M_is_hypersparse: 0, A->iso: 0, B->iso: 0, A_BITMAP: 0, B_BITMAP: 0, GB_IS_FULL(A): 0, GB_IS_FULL(B): 0, semiring header size: 32 + rmm_wrap_alloc 16384 bytes + rmm_wrap_alloc 16384 bytes + + 1024x1024 GraphBLAS bool matrix, sparse by row + sparsity control: sparse only + M actual, 1024 entries, memory: 25.2 KB + + (0,478) 1 + (0,574) 1 + (2,376) 1 + (5,560) 1 + (6,996) 1 + (7,183) 1 + (7,666) 1 + (8,896) 1 + (9,187) 1 + (10,446) 1 + (11,46) 1 + (11,955) 1 + (12,397) 1 + (12,953) 1 + (13,192) 1 + (14,421) 1 + (15,568) 1 + (16,788) 1 + (16,904) 1 + (17,928) 1 + (18,103) 1 + (19,821) 1 + (19,886) 1 + (20,474) 1 + (21,479) 1 + (21,975) 1 + (22,569) 1 + (23,310) 1 + (24,905) 1 + ... + rmm_wrap_alloc 16384 bytes + + 1024x1024 GraphBLAS int32_t matrix, sparse by row + sparsity control: sparse only + C GPU, 14 entries, memory: 16.4 KB + + (99,326) 0 + (115,240) 0 + (176,614) 0 + (180,830) 1 + (343,678) 0 + (398,246) 0 + (411,643) 0 + (557,910) 0 + (590,95) 0 + (601,478) 0 + (623,44) 0 + (729,884) 0 + (825,406) 1 + (891,679) 0 + + + 1024x1024 GraphBLAS int32_t matrix, hypersparse by row + C_actual, 14 entries, memory: 704 bytes + + (99,326) 0 + (115,240) 0 + (176,614) 0 + (180,830) 1 + (343,678) 0 + (398,246) 0 + (411,643) 0 + (557,910) 0 + (590,95) 0 + (601,478) 0 + (623,44) 0 + (729,884) 0 + (825,406) 1 + (891,679) 0 + + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes + + 1024x1024 GraphBLAS double matrix, hypersparse by row + Diff actual, 14 entries, memory: 768 bytes + + (99,326) 0 + (115,240) 0 + (176,614) 0 + (180,830) 0 + (343,678) 0 + (398,246) 0 + (411,643) 0 + (557,910) 0 + (590,95) 0 + (601,478) 0 + (623,44) 0 + (729,884) 0 + (825,406) 0 + (891,679) 0 + + + 1024x1024 GraphBLAS bool matrix, hypersparse by row + T actual, 14 entries, memory: 656 bytes + + (99,326) 1 + (115,240) 1 + (176,614) 1 + (180,830) 1 + (343,678) 1 + (398,246) 1 + (411,643) 1 + (557,910) 1 + (590,95) 1 + (601,478) 1 + (623,44) 1 + (729,884) 1 + (825,406) 1 + (891,679) 1 + work:14 gpus:0 Getting test data +Creating problem gen +filling matrices +inside fill, using seed 4567 +fill_random nrows=1024ncols=1024 need 1024 values, invsparse = 1024 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling sparse +1024 nonzeroes left to fill.. +35 nonzeroes left to fill.. +inside fill, using seed 4567 +fill_random nrows=1024ncols=1024 need 1024 values, invsparse = 1024 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling sparse +1024 nonzeroes left to fill.. +35 nonzeroes left to fill.. +inside fill, using seed 543210 +fill_random nrows=1024ncols=1024 need 2048 values, invsparse = 512 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling sparse +2048 nonzeroes left to fill.. +504 nonzeroes left to fill.. + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes +inside fill, using seed 32 +fill_random nrows=1024ncols=1024 need 4096 values, invsparse = 256 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling sparse +4096 nonzeroes left to fill.. +1491 nonzeroes left to fill.. + rmm_wrap_alloc 16384 bytes + rmm_wrap_alloc 256 bytes +1024 slots to fill +all pairs to bucket 10, no filling +done assigning buckets +Building semiring factgory + calling stringify semiring: 0x7f1ff5383b00 +inside enumify: 0x7f1ff5383b00 + + GraphBLAS Semiring: semiring (user-defined) + GraphBLAS Monoid: semiring->add (built-in) + GraphBLAS BinaryOp: monoid->op (built-in) z=plus(x,y) + GraphBLAS type: ztype int32_t size: 4 + GraphBLAS type: xtype int32_t size: 4 + GraphBLAS type: ytype int32_t size: 4 + identity: [ 0 ] + + GraphBLAS BinaryOp: semiring->multiply (built-in) z=times(x,y) + GraphBLAS type: ztype int32_t size: 4 + GraphBLAS type: xtype int32_t size: 4 + GraphBLAS type: ytype int32_t size: 4 +Getting semiring add +Getting semiring mult +Getting semiring add op +Getting types +Getting opcodes +Getting typecodes +Performing asserts +Invoking boolean rename +Invoking boolean rename +Invoking enumify binop +e 14 +Invoking enumify monoid +Calling enumify binop +Inside plus binop code +e 11 +Calling enumify identity +Calling enumify terminal +Done enumify monoid +Done invoking enumify monoid +atype +btype +ctype +Invoking enumify_mask, mtype 0x7f2028b57180 +GB_enumify_mask gets mcode: 1 Mask_struct: 0 Mask_comp: 0 +got mask_ecode: 4 +constructing semiring scode +before: add_ecode: 11, id_ecode: 0, term_ecode: 31, mult_ecode: 14, flipxy: 0, zcode: 6, xcode: 6, ycode: 6, mask_ecode: 4, ccode: 6, acode: 6, bcode: 6, csparsity: 0, msparsity: 0, asparsity: 0, bsparsity: 0 +serialized_scode: 397409434374399488 +done enumify semiring +scode=397409434374399488 +done stringify semiring + returned from stringify semiring + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 8192 bytes +1024 slots to fill +all pairs to bucket 10, no filling +done assigning buckets +bucket 10 has 1024 dots to do +LAUNCHING BUCKET CODE: 10 +INside get cached file +looking for prog in file /home/cjnolet/.SuiteSparse/GraphBLAS/6.3.0/GB_semiring_397409434374399488.h +opening /home/cjnolet/.SuiteSparse/GraphBLAS/6.3.0/GB_semiring_397409434374399488.h for write +about to close + read cache file /home/cjnolet/.SuiteSparse/GraphBLAS/6.3.0/GB_semiring_397409434374399488.h +successful_read: 1 +Just closed + jit_cache get program GB_jit_AxB_dot3_phase3_vsvs +found memory-cached prog GB_jit_AxB_dot3_phase3_vsvs + got kernel instance AxB_dot3_phase3_vsvs_int32_t_int32_t_int32_t +found memory-cached prog AxB_dot3_phase3_vsvs_int32_t_int32_t_int32_t +Launching _Z20AxB_dot3_phase3_vsvsIiiiEvxxPxP16GB_Matrix_opaqueS2_S2_S2_i<<<2,512,0,0>>>(long,long,long*,GB_Matrix_opaque*,GB_Matrix_opaque*,GB_Matrix_opaque*,GB_Matrix_opaque*,int) +Printing bucketp +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +Done. +returned from kernel 1.00762ms + + 1024x1024 GraphBLAS int32_t matrix, sparse (jumbled) by row + sparsity control: sparse only + C GPU, 1024 entries, memory: 28.2 KB + pending tuples: 0 max pending: 0 zombies: 1010 + + (0,478) zombie + (0,574) zombie + (2,376) zombie + (5,560) zombie + (6,996) zombie + (7,183) zombie + (7,666) zombie + (8,896) zombie + (9,187) zombie + (10,446) zombie + (11,46) zombie + (11,955) zombie + (12,397) zombie + (12,953) zombie + (13,192) zombie + (14,421) zombie + (15,568) zombie + (16,788) zombie + (16,904) zombie + (17,928) zombie + (18,103) zombie + (19,821) zombie + (19,886) zombie + (20,474) zombie + (21,479) zombie + (21,975) zombie + (22,569) zombie + (23,310) zombie + (24,905) zombie + ... + rmm_wrap_alloc 256 bytes +Not using cuda path. M_is_hypersparse: 0, A->iso: 0, B->iso: 0, A_BITMAP: 0, B_BITMAP: 0, GB_IS_FULL(A): 0, GB_IS_FULL(B): 0, semiring header size: 32 + rmm_wrap_alloc 16384 bytes + rmm_wrap_alloc 16384 bytes + + 1024x1024 GraphBLAS bool matrix, sparse by row + sparsity control: sparse only + M actual, 1024 entries, memory: 25.2 KB + + (0,478) 1 + (0,574) 1 + (2,376) 1 + (5,560) 1 + (6,996) 1 + (7,183) 1 + (7,666) 1 + (8,896) 1 + (9,187) 1 + (10,446) 1 + (11,46) 1 + (11,955) 1 + (12,397) 1 + (12,953) 1 + (13,192) 1 + (14,421) 1 + (15,568) 1 + (16,788) 1 + (16,904) 1 + (17,928) 1 + (18,103) 1 + (19,821) 1 + (19,886) 1 + (20,474) 1 + (21,479) 1 + (21,975) 1 + (22,569) 1 + (23,310) 1 + (24,905) 1 + ... + rmm_wrap_alloc 16384 bytes + + 1024x1024 GraphBLAS int32_t matrix, sparse by row + sparsity control: sparse only + C GPU, 14 entries, memory: 16.4 KB + + (99,326) 0 + (115,240) 0 + (176,614) 0 + (180,830) 1 + (343,678) 0 + (398,246) 0 + (411,643) 0 + (557,910) 0 + (590,95) 0 + (601,478) 0 + (623,44) 0 + (729,884) 0 + (825,406) 1 + (891,679) 0 + + + 1024x1024 GraphBLAS int32_t matrix, hypersparse by row + C_actual, 14 entries, memory: 704 bytes + + (99,326) 0 + (115,240) 0 + (176,614) 0 + (180,830) 1 + (343,678) 0 + (398,246) 0 + (411,643) 0 + (557,910) 0 + (590,95) 0 + (601,478) 0 + (623,44) 0 + (729,884) 0 + (825,406) 1 + (891,679) 0 + + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes + + 1024x1024 GraphBLAS double matrix, hypersparse by row + Diff actual, 14 entries, memory: 768 bytes + + (99,326) 0 + (115,240) 0 + (176,614) 0 + (180,830) 0 + (343,678) 0 + (398,246) 0 + (411,643) 0 + (557,910) 0 + (590,95) 0 + (601,478) 0 + (623,44) 0 + (729,884) 0 + (825,406) 0 + (891,679) 0 + + + 1024x1024 GraphBLAS bool matrix, hypersparse by row + T actual, 14 entries, memory: 656 bytes + + (99,326) 1 + (115,240) 1 + (176,614) 1 + (180,830) 1 + (343,678) 1 + (398,246) 1 + (411,643) 1 + (557,910) 1 + (590,95) 1 + (601,478) 1 + (623,44) 1 + (729,884) 1 + (825,406) 1 + (891,679) 1 + work:14 gpus:0 Getting test data +Creating problem gen +filling matrices +inside fill, using seed 4567 +fill_random nrows=1024ncols=1024 need 1024 values, invsparse = 1024 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling sparse +1024 nonzeroes left to fill.. +35 nonzeroes left to fill.. +inside fill, using seed 4567 +fill_random nrows=1024ncols=1024 need 1024 values, invsparse = 1024 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling sparse +1024 nonzeroes left to fill.. +35 nonzeroes left to fill.. +inside fill, using seed 543210 +fill_random nrows=1024ncols=1024 need 5120 values, invsparse = 205 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling sparse +5120 nonzeroes left to fill.. +2091 nonzeroes left to fill.. + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes +inside fill, using seed 32 +fill_random nrows=1024ncols=1024 need 2048 values, invsparse = 512 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling sparse +2048 nonzeroes left to fill.. +569 nonzeroes left to fill.. + rmm_wrap_alloc 16384 bytes + rmm_wrap_alloc 256 bytes +1024 slots to fill +all pairs to bucket 11, no filling +done assigning buckets +Building semiring factgory + calling stringify semiring: 0x7f1ff539d500 +inside enumify: 0x7f1ff539d500 + + GraphBLAS Semiring: semiring (user-defined) + GraphBLAS Monoid: semiring->add (built-in) + GraphBLAS BinaryOp: monoid->op (built-in) z=plus(x,y) + GraphBLAS type: ztype int32_t size: 4 + GraphBLAS type: xtype int32_t size: 4 + GraphBLAS type: ytype int32_t size: 4 + identity: [ 0 ] + + GraphBLAS BinaryOp: semiring->multiply (built-in) z=times(x,y) + GraphBLAS type: ztype int32_t size: 4 + GraphBLAS type: xtype int32_t size: 4 + GraphBLAS type: ytype int32_t size: 4 +Getting semiring add +Getting semiring mult +Getting semiring add op +Getting types +Getting opcodes +Getting typecodes +Performing asserts +Invoking boolean rename +Invoking boolean rename +Invoking enumify binop +e 14 +Invoking enumify monoid +Calling enumify binop +Inside plus binop code +e 11 +Calling enumify identity +Calling enumify terminal +Done enumify monoid +Done invoking enumify monoid +atype +btype +ctype +Invoking enumify_mask, mtype 0x7f2028b57180 +GB_enumify_mask gets mcode: 1 Mask_struct: 0 Mask_comp: 0 +got mask_ecode: 4 +constructing semiring scode +before: add_ecode: 11, id_ecode: 0, term_ecode: 31, mult_ecode: 14, flipxy: 0, zcode: 6, xcode: 6, ycode: 6, mask_ecode: 4, ccode: 6, acode: 6, bcode: 6, csparsity: 0, msparsity: 0, asparsity: 0, bsparsity: 0 +serialized_scode: 397409434374399488 +done enumify semiring +scode=397409434374399488 +done stringify semiring + returned from stringify semiring + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 8192 bytes +1024 slots to fill +all pairs to bucket 11, no filling +done assigning buckets +bucket 11 has 1024 dots to do +LAUNCHING BUCKET CODE: 11 +INside get cached file +looking for prog in file /home/cjnolet/.SuiteSparse/GraphBLAS/6.3.0/GB_semiring_397409434374399488.h +opening /home/cjnolet/.SuiteSparse/GraphBLAS/6.3.0/GB_semiring_397409434374399488.h for write +about to close + read cache file /home/cjnolet/.SuiteSparse/GraphBLAS/6.3.0/GB_semiring_397409434374399488.h +successful_read: 1 +Just closed + jit_cache get program GB_jit_AxB_dot3_phase3_mp +found memory-cached prog GB_jit_AxB_dot3_phase3_mp + got kernel instance AxB_dot3_phase3_mp_int32_t_int32_t_int32_t +found memory-cached prog AxB_dot3_phase3_mp_int32_t_int32_t_int32_t +Launching _Z18AxB_dot3_phase3_mpIiiiEvxxPxP16GB_Matrix_opaqueS2_S2_S2_i<<<32,32,0,0>>>(long,long,long*,GB_Matrix_opaque*,GB_Matrix_opaque*,GB_Matrix_opaque*,GB_Matrix_opaque*,int) +warp 27 zombie count = 32, nzombies = 0 +warp 11 zombie count = 32, nzombies = 0 +warp 21 zombie count = 32, nzombies = 0 +warp 3 zombie count = 32, nzombies = 0 +warp 6 zombie count = 32, nzombies = 0 +warp 2 zombie count = 32, nzombies = 0 +warp 14 zombie count = 32, nzombies = 0 +warp 7 zombie count = 31, nzombies = 0 +warp 18 zombie count = 32, nzombies = 0 +warp 5 zombie count = 32, nzombies = 0 +warp 10 zombie count = 32, nzombies = 0 +warp 1 zombie count = 32, nzombies = 0 +warp 24 zombie count = 32, nzombies = 0 +warp 15 zombie count = 32, nzombies = 0 +warp 20 zombie count = 32, nzombies = 0 +warp 28 zombie count = 31, nzombies = 0 +warp 4 zombie count = 31, nzombies = 0 +warp 25 zombie count = 32, nzombies = 0 +warp 17 zombie count = 32, nzombies = 0 +warp 12 zombie count = 32, nzombies = 0 +warp 31 zombie count = 32, nzombies = 0 +warp 16 zombie count = 32, nzombies = 0 +warp 30 zombie count = 32, nzombies = 0 +warp 22 zombie count = 31, nzombies = 0 +warp 0 zombie count = 32, nzombies = 0 +warp 23 zombie count = 32, nzombies = 0 +warp 26 zombie count = 31, nzombies = 0 +warp 9 zombie count = 31, nzombies = 0 + Czombie = 32 + Czombie = 96 + Czombie = 96 + Czombie = 128 + Czombie = 160 + Czombie = 192 +warp 29 zombie count = 32, nzombies = 128 + Czombie = 224 +warp 13 zombie count = 31, nzombies = 160 + Czombie = 255 + Czombie = 287 + Czombie = 383 + Czombie = 383 + Czombie = 383 +warp 19 zombie count = 32, nzombies = 319 + Czombie = 479 + Czombie = 479 + Czombie = 479 + Czombie = 510 + Czombie = 573 + Czombie = 573 + Czombie = 669 + Czombie = 669 + Czombie = 669 + Czombie = 701 + Czombie = 764 + Czombie = 764 + Czombie = 796 + Czombie = 859 + Czombie = 859 + Czombie = 890 + Czombie = 953 + Czombie = 953 + Czombie = 985 +warp 8 zombie count = 31, nzombies = 985 + Czombie = 1016 +Printing bucketp +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +Done. +returned from kernel 1.59027ms + + 1024x1024 GraphBLAS int32_t matrix, sparse (jumbled) by row + sparsity control: sparse only + C GPU, 1024 entries, memory: 28.2 KB + pending tuples: 0 max pending: 0 zombies: 1016 + + (0,478) zombie + (0,574) zombie + (2,376) zombie + (5,560) zombie + (6,996) zombie + (7,183) zombie + (7,666) zombie + (8,896) zombie + (9,187) zombie + (10,446) zombie + (11,46) zombie + (11,955) zombie + (12,397) zombie + (12,953) zombie + (13,192) zombie + (14,421) zombie + (15,568) zombie + (16,788) zombie + (16,904) zombie + (17,928) zombie + (18,103) zombie + (19,821) zombie + (19,886) zombie + (20,474) zombie + (21,479) zombie + (21,975) zombie + (22,569) zombie + (23,310) zombie + (24,905) zombie + ... + rmm_wrap_alloc 256 bytes +Not using cuda path. M_is_hypersparse: 0, A->iso: 0, B->iso: 0, A_BITMAP: 0, B_BITMAP: 0, GB_IS_FULL(A): 0, GB_IS_FULL(B): 0, semiring header size: 32 + rmm_wrap_alloc 16384 bytes + + 1024x1024 GraphBLAS bool matrix, sparse by row + sparsity control: sparse only + M actual, 1024 entries, memory: 25.2 KB + + (0,478) 1 + (0,574) 1 + (2,376) 1 + (5,560) 1 + (6,996) 1 + (7,183) 1 + (7,666) 1 + (8,896) 1 + (9,187) 1 + (10,446) 1 + (11,46) 1 + (11,955) 1 + (12,397) 1 + (12,953) 1 + (13,192) 1 + (14,421) 1 + (15,568) 1 + (16,788) 1 + (16,904) 1 + (17,928) 1 + (18,103) 1 + (19,821) 1 + (19,886) 1 + (20,474) 1 + (21,479) 1 + (21,975) 1 + (22,569) 1 + (23,310) 1 + (24,905) 1 + ... + rmm_wrap_alloc 16384 bytes + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes + + 1024x1024 GraphBLAS int32_t matrix, sparse by row + sparsity control: sparse only + C GPU, 8 entries, memory: 16.3 KB + + (235,522) 1 + (309,328) 1 + (417,366) 0 + (565,490) 0 + (611,759) 0 + (714,475) 1 + (766,915) 0 + (877,722) 0 + + + 1024x1024 GraphBLAS int32_t matrix, hypersparse by row + C_actual, 8 entries, memory: 544 bytes + + (235,522) 1 + (309,328) 1 + (417,366) 0 + (565,490) 0 + (611,759) 0 + (714,475) 1 + (766,915) 0 + (877,722) 0 + + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes + + 1024x1024 GraphBLAS double matrix, hypersparse by row + Diff actual, 8 entries, memory: 576 bytes + + (235,522) 0 + (309,328) 0 + (417,366) 0 + (565,490) 0 + (611,759) 0 + (714,475) 0 + (766,915) 0 + (877,722) 0 + + + 1024x1024 GraphBLAS bool matrix, hypersparse by row + T actual, 8 entries, memory: 520 bytes + + (235,522) 1 + (309,328) 1 + (417,366) 1 + (565,490) 1 + (611,759) 1 + (714,475) 1 + (766,915) 1 + (877,722) 1 + work:8 gpus:0 [ OK ] AxB_dot3_tests_PLUS_TIMES_3.smallxsmallPLUS_TIMES_Cint32_tMboolAint32_tBint32_tXint32_tYint32_tZint32_t (475 ms) +[ RUN ] AxB_dot3_tests_PLUS_TIMES_3.tinyxtinyPLUS_TIMES_Cint32_tMint32_tAint32_tBint32_tXint32_tYint32_tZint32_t +Getting test data +Creating problem gen +filling matrices +inside fill, using seed 4567 +fill_random nrows=32ncols=32 need 32 values, invsparse = 32 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling sparse +32 nonzeroes left to fill.. +2 nonzeroes left to fill.. + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes +inside fill, using seed 4567 +fill_random nrows=32ncols=32 need 32 values, invsparse = 32 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling sparse +32 nonzeroes left to fill.. +2 nonzeroes left to fill.. + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes +inside fill, using seed 543210 +fill_random nrows=32ncols=32 need 1024 values, invsparse = 1 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling dense + rmm_wrap_alloc 512 bytes +inside fill, using seed 32 +fill_random nrows=32ncols=32 need 1024 values, invsparse = 1 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling dense + rmm_wrap_alloc 512 bytes +32 slots to fill +all pairs to bucket 1, no filling +done assigning buckets +Building semiring factgory + calling stringify semiring: 0x7f1ff5384400 +inside enumify: 0x7f1ff5384400 + + GraphBLAS Semiring: semiring (user-defined) + GraphBLAS Monoid: semiring->add (built-in) + GraphBLAS BinaryOp: monoid->op (built-in) z=plus(x,y) + GraphBLAS type: ztype int32_t size: 4 + GraphBLAS type: xtype int32_t size: 4 + GraphBLAS type: ytype int32_t size: 4 + identity: [ 0 ] + + GraphBLAS BinaryOp: semiring->multiply (built-in) z=times(x,y) + GraphBLAS type: ztype int32_t size: 4 + GraphBLAS type: xtype int32_t size: 4 + GraphBLAS type: ytype int32_t size: 4 +Getting semiring add +Getting semiring mult +Getting semiring add op +Getting types +Getting opcodes +Getting typecodes +Performing asserts +Invoking boolean rename +Invoking boolean rename +Invoking enumify binop +e 14 +Invoking enumify monoid +Calling enumify binop +Inside plus binop code +e 11 +Calling enumify identity +Calling enumify terminal +Done enumify monoid +Done invoking enumify monoid +atype +btype +ctype +Invoking enumify_mask, mtype 0x7f2028b56f40 +GB_enumify_mask gets mcode: 6 Mask_struct: 0 Mask_comp: 0 +got mask_ecode: 8 +constructing semiring scode +before: add_ecode: 11, id_ecode: 0, term_ecode: 31, mult_ecode: 14, flipxy: 0, zcode: 6, xcode: 6, ycode: 6, mask_ecode: 8, ccode: 6, acode: 6, bcode: 6, csparsity: 0, msparsity: 0, asparsity: 0, bsparsity: 0 +serialized_scode: 397409434378593792 +done enumify semiring +scode=397409434378593792 +done stringify semiring + returned from stringify semiring + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes +32 slots to fill +all pairs to bucket 1, no filling +done assigning buckets +bucket 1 has 32 dots to do +LAUNCHING BUCKET CODE: 1 +INside get cached file +looking for prog in file /home/cjnolet/.SuiteSparse/GraphBLAS/6.3.0/GB_semiring_397409434378593792.h +opening /home/cjnolet/.SuiteSparse/GraphBLAS/6.3.0/GB_semiring_397409434378593792.h for write +about to close + read cache file /home/cjnolet/.SuiteSparse/GraphBLAS/6.3.0/GB_semiring_397409434378593792.h +successful_read: 1 +Just closed + jit_cache get program GB_jit_AxB_dot3_phase3_dndn +found memory-cached prog GB_jit_AxB_dot3_phase3_dndn + got kernel instance AxB_dot3_phase3_dndn_int32_t_int32_t_int32_t +found memory-cached prog AxB_dot3_phase3_dndn_int32_t_int32_t_int32_t +Launching _Z20AxB_dot3_phase3_dndnIiiiEvxxPxP16GB_Matrix_opaqueS2_S2_S2_i<<<1,32,0,0>>>(long,long,long*,GB_Matrix_opaque*,GB_Matrix_opaque*,GB_Matrix_opaque*,GB_Matrix_opaque*,int) +tid=0, i,j = 6,0 nnzA= 32, nnzB=32 +tid=0, i,j = 1,1 nnzA= 32, nnzB=32 +tid=0, i,j = 12,3 nnzA= 32, nnzB=32 +tid=0, i,j = 17,3 nnzA= 32, nnzB=32 +tid=0, i,j = 19,4 nnzA= 32, nnzB=32 +tid=0, i,j = 19,5 nnzA= 32, nnzB=32 +tid=0, i,j = 22,6 nnzA= 32, nnzB=32 +tid=0, i,j = 24,6 nnzA= 32, nnzB=32 +tid=0, i,j = 10,8 nnzA= 32, nnzB=32 +tid=0, i,j = 19,9 nnzA= 32, nnzB=32 +tid=0, i,j = 31,9 nnzA= 32, nnzB=32 +tid=0, i,j = 13,11 nnzA= 32, nnzB=32 +tid=0, i,j = 11,12 nnzA= 32, nnzB=32 +tid=0, i,j = 24,14 nnzA= 32, nnzB=32 +tid=0, i,j = 30,15 nnzA= 32, nnzB=32 +tid=0, i,j = 20,16 nnzA= 32, nnzB=32 +tid=0, i,j = 30,17 nnzA= 32, nnzB=32 +tid=0, i,j = 18,18 nnzA= 32, nnzB=32 +tid=0, i,j = 1,19 nnzA= 32, nnzB=32 +tid=0, i,j = 25,20 nnzA= 32, nnzB=32 +tid=0, i,j = 24,21 nnzA= 32, nnzB=32 +tid=0, i,j = 27,21 nnzA= 32, nnzB=32 +tid=0, i,j = 30,22 nnzA= 32, nnzB=32 +tid=0, i,j = 30,23 nnzA= 32, nnzB=32 +tid=0, i,j = 14,24 nnzA= 32, nnzB=32 +tid=0, i,j = 4,25 nnzA= 32, nnzB=32 +tid=0, i,j = 15,26 nnzA= 32, nnzB=32 +tid=0, i,j = 28,27 nnzA= 32, nnzB=32 +tid=0, i,j = 16,28 nnzA= 32, nnzB=32 +tid=0, i,j = 9,29 nnzA= 32, nnzB=32 +tid=0, i,j = 24,30 nnzA= 32, nnzB=32 +tid=0, i,j = 31,31 nnzA= 32, nnzB=32 +Printing bucketp +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +Done. +returned from kernel 3.29933ms + + 32x32 GraphBLAS int32_t matrix, sparse (jumbled) by row + sparsity control: sparse only + C GPU, 32 entries, memory: 1.1 KB + + (0,6) 11 + (1,1) 10 + (3,12) 7 + (3,17) 6 + (4,19) 8 + (5,19) 10 + (6,22) 6 + (6,24) 9 + (8,10) 7 + (9,19) 8 + (9,31) 6 + (11,13) 8 + (12,11) 6 + (14,24) 10 + (15,30) 9 + (16,20) 5 + (17,30) 7 + (18,18) 12 + (19,1) 6 + (20,25) 7 + (21,24) 9 + (21,27) 6 + (22,30) 8 + (23,30) 11 + (24,14) 7 + (25,4) 9 + (26,15) 4 + (27,28) 5 + (28,16) 4 + ... +Not using cuda path. M_is_hypersparse: 0, A->iso: 0, B->iso: 0, A_BITMAP: 0, B_BITMAP: 0, GB_IS_FULL(A): 0, GB_IS_FULL(B): 0, semiring header size: 32 + rmm_wrap_alloc 512 bytes + rmm_wrap_alloc 512 bytes + + 32x32 GraphBLAS int32_t matrix, sparse by row + sparsity control: sparse only + M actual, 32 entries, memory: 1.1 KB + + (0,6) 1 + (1,1) 1 + (3,12) 1 + (3,17) 0 + (4,19) 1 + (5,19) 1 + (6,22) 0 + (6,24) 1 + (8,10) 0 + (9,19) 0 + (9,31) 0 + (11,13) 0 + (12,11) 1 + (14,24) 1 + (15,30) 1 + (16,20) 0 + (17,30) 0 + (18,18) 1 + (19,1) 0 + (20,25) 0 + (21,24) 0 + (21,27) 0 + (22,30) 0 + (23,30) 1 + (24,14) 1 + (25,4) 1 + (26,15) 1 + (27,28) 1 + (28,16) 1 + ... + + 32x32 GraphBLAS int32_t matrix, sparse by row + sparsity control: sparse only + C GPU, 32 entries, memory: 1.1 KB + + (0,6) 11 + (1,1) 10 + (3,12) 7 + (3,17) 6 + (4,19) 8 + (5,19) 10 + (6,22) 6 + (6,24) 9 + (8,10) 7 + (9,19) 8 + (9,31) 6 + (11,13) 8 + (12,11) 6 + (14,24) 10 + (15,30) 9 + (16,20) 5 + (17,30) 7 + (18,18) 12 + (19,1) 6 + (20,25) 7 + (21,24) 9 + (21,27) 6 + (22,30) 8 + (23,30) 11 + (24,14) 7 + (25,4) 9 + (26,15) 4 + (27,28) 5 + (28,16) 4 + (29,9) 7 + (30,24) 10 + (31,31) 10 + + + 32x32 GraphBLAS int32_t matrix, sparse by row + C_actual, 32 entries, memory: 1.5 KB + + (0,6) 11 + (1,1) 10 + (3,12) 7 + (3,17) 6 + (4,19) 8 + (5,19) 10 + (6,22) 6 + (6,24) 9 + (8,10) 7 + (9,19) 8 + (9,31) 6 + (11,13) 8 + (12,11) 6 + (14,24) 10 + (15,30) 9 + (16,20) 5 + (17,30) 7 + (18,18) 12 + (19,1) 6 + (20,25) 7 + (21,24) 9 + (21,27) 6 + (22,30) 8 + (23,30) 11 + (24,14) 7 + (25,4) 9 + (26,15) 4 + (27,28) 5 + (28,16) 4 + (29,9) 7 + (30,24) 10 + (31,31) 10 + + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 512 bytes + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes + + 32x32 GraphBLAS double matrix, sparse by row + Diff actual, 32 entries, memory: 1.2 KB + + (0,6) 0 + (1,1) 0 + (3,12) 0 + (3,17) 0 + (4,19) 0 + (5,19) 0 + (6,22) 0 + (6,24) 0 + (8,10) 0 + (9,19) 0 + (9,31) 0 + (11,13) 0 + (12,11) 0 + (14,24) 0 + (15,30) 0 + (16,20) 0 + (17,30) 0 + (18,18) 0 + (19,1) 0 + (20,25) 0 + (21,24) 0 + (21,27) 0 + (22,30) 0 + (23,30) 0 + (24,14) 0 + (25,4) 0 + (26,15) 0 + (27,28) 0 + (28,16) 0 + (29,9) 0 + (30,24) 0 + (31,31) 0 + + + 32x32 GraphBLAS bool matrix, sparse by row + T actual, 32 entries, memory: 1.0 KB + + (0,6) 1 + (1,1) 1 + (3,12) 1 + (3,17) 1 + (4,19) 1 + (5,19) 1 + (6,22) 1 + (6,24) 1 + (8,10) 1 + (9,19) 1 + (9,31) 1 + (11,13) 1 + (12,11) 1 + (14,24) 1 + (15,30) 1 + (16,20) 1 + (17,30) 1 + (18,18) 1 + (19,1) 1 + (20,25) 1 + (21,24) 1 + (21,27) 1 + (22,30) 1 + (23,30) 1 + (24,14) 1 + (25,4) 1 + (26,15) 1 + (27,28) 1 + (28,16) 1 + ... + work:32 gpus:0 Getting test data +Creating problem gen +filling matrices +inside fill, using seed 4567 +fill_random nrows=32ncols=32 need 32 values, invsparse = 32 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling sparse +32 nonzeroes left to fill.. +2 nonzeroes left to fill.. + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes +inside fill, using seed 4567 +fill_random nrows=32ncols=32 need 32 values, invsparse = 32 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling sparse +32 nonzeroes left to fill.. +2 nonzeroes left to fill.. + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes +inside fill, using seed 543210 +fill_random nrows=32ncols=32 need 1024 values, invsparse = 1 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling dense + rmm_wrap_alloc 512 bytes + rmm_wrap_alloc 512 bytes +inside fill, using seed 32 +fill_random nrows=32ncols=32 need 160 values, invsparse = 7 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling sparse +160 nonzeroes left to fill.. +62 nonzeroes left to fill.. + rmm_wrap_alloc 256 bytes +32 slots to fill +all pairs to bucket 5, no filling +done assigning buckets +Building semiring factgory + calling stringify semiring: 0x7f1ff539d800 +inside enumify: 0x7f1ff539d800 + + GraphBLAS Semiring: semiring (user-defined) + GraphBLAS Monoid: semiring->add (built-in) + GraphBLAS BinaryOp: monoid->op (built-in) z=plus(x,y) + GraphBLAS type: ztype int32_t size: 4 + GraphBLAS type: xtype int32_t size: 4 + GraphBLAS type: ytype int32_t size: 4 + identity: [ 0 ] + + GraphBLAS BinaryOp: semiring->multiply (built-in) z=times(x,y) + GraphBLAS type: ztype int32_t size: 4 + GraphBLAS type: xtype int32_t size: 4 + GraphBLAS type: ytype int32_t size: 4 +Getting semiring add +Getting semiring mult +Getting semiring add op +Getting types +Getting opcodes +Getting typecodes +Performing asserts +Invoking boolean rename +Invoking boolean rename +Invoking enumify binop +e 14 +Invoking enumify monoid +Calling enumify binop +Inside plus binop code +e 11 +Calling enumify identity +Calling enumify terminal +Done enumify monoid +Done invoking enumify monoid +atype +btype +ctype +Invoking enumify_mask, mtype 0x7f2028b56f40 +GB_enumify_mask gets mcode: 6 Mask_struct: 0 Mask_comp: 0 +got mask_ecode: 8 +constructing semiring scode +before: add_ecode: 11, id_ecode: 0, term_ecode: 31, mult_ecode: 14, flipxy: 0, zcode: 6, xcode: 6, ycode: 6, mask_ecode: 8, ccode: 6, acode: 6, bcode: 6, csparsity: 0, msparsity: 0, asparsity: 0, bsparsity: 0 +serialized_scode: 397409434378593792 +done enumify semiring +scode=397409434378593792 +done stringify semiring + returned from stringify semiring + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes +32 slots to fill +all pairs to bucket 5, no filling +done assigning buckets +bucket 5 has 32 dots to do +LAUNCHING BUCKET CODE: 5 +Confiring spdnINside get cached file +looking for prog in file /home/cjnolet/.SuiteSparse/GraphBLAS/6.3.0/GB_semiring_397409434378593792.h +opening /home/cjnolet/.SuiteSparse/GraphBLAS/6.3.0/GB_semiring_397409434378593792.h for write +about to close + read cache file /home/cjnolet/.SuiteSparse/GraphBLAS/6.3.0/GB_semiring_397409434378593792.h +successful_read: 1 +Just closed + jit_cache get program GB_jit_AxB_dot3_phase3_spdn +found memory-cached prog GB_jit_AxB_dot3_phase3_spdn + got kernel instance AxB_dot3_phase3_spdn_int32_t_int32_t_int32_t +found memory-cached prog AxB_dot3_phase3_spdn_int32_t_int32_t_int32_t +Launching _Z20AxB_dot3_phase3_spdnIiiiEvxxPxP16GB_Matrix_opaqueS2_S2_S2_i<<<1,32,0,0>>>(long,long,long*,GB_Matrix_opaque*,GB_Matrix_opaque*,GB_Matrix_opaque*,GB_Matrix_opaque*,int) +Printing bucketp +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +Done. +returned from kernel 0.500736ms + + 32x32 GraphBLAS int32_t matrix, sparse (jumbled) by row + sparsity control: sparse only + C GPU, 32 entries, memory: 1.1 KB + pending tuples: 0 max pending: 0 zombies: 6 + + (0,6) zombie + (1,1) 2 + (3,12) 2 + (3,17) 2 + (4,19) zombie + (5,19) zombie + (6,22) 2 + (6,24) 0 + (8,10) 0 + (9,19) zombie + (9,31) 4 + (11,13) 3 + (12,11) 2 + (14,24) 1 + (15,30) 1 + (16,20) 0 + (17,30) 0 + (18,18) zombie + (19,1) 3 + (20,25) zombie + (21,24) 1 + (21,27) 0 + (22,30) 1 + (23,30) 0 + (24,14) 1 + (25,4) 0 + (26,15) 1 + (27,28) 2 + (28,16) 1 + ... +Not using cuda path. M_is_hypersparse: 0, A->iso: 0, B->iso: 0, A_BITMAP: 0, B_BITMAP: 0, GB_IS_FULL(A): 0, GB_IS_FULL(B): 0, semiring header size: 32 + rmm_wrap_alloc 512 bytes + rmm_wrap_alloc 512 bytes + rmm_wrap_alloc 512 bytes + + 32x32 GraphBLAS int32_t matrix, sparse by row + sparsity control: sparse only + M actual, 32 entries, memory: 1.1 KB + + (0,6) 1 + (1,1) 1 + (3,12) 1 + (3,17) 0 + (4,19) 1 + (5,19) 1 + (6,22) 0 + (6,24) 1 + (8,10) 0 + (9,19) 0 + (9,31) 0 + (11,13) 0 + (12,11) 1 + (14,24) 1 + (15,30) 1 + (16,20) 0 + (17,30) 0 + (18,18) 1 + (19,1) 0 + (20,25) 0 + (21,24) 0 + (21,27) 0 + (22,30) 0 + (23,30) 1 + (24,14) 1 + (25,4) 1 + (26,15) 1 + (27,28) 1 + (28,16) 1 + ... + + 32x32 GraphBLAS int32_t matrix, sparse by row + sparsity control: sparse only + C GPU, 26 entries, memory: 1.1 KB + + (1,1) 2 + (3,12) 2 + (3,17) 2 + (6,22) 2 + (6,24) 0 + (8,10) 0 + (9,31) 4 + (11,13) 3 + (12,11) 2 + (14,24) 1 + (15,30) 1 + (16,20) 0 + (17,30) 0 + (19,1) 3 + (21,24) 1 + (21,27) 0 + (22,30) 1 + (23,30) 0 + (24,14) 1 + (25,4) 0 + (26,15) 1 + (27,28) 2 + (28,16) 1 + (29,9) 0 + (30,24) 1 + (31,31) 2 + + + 32x32 GraphBLAS int32_t matrix, sparse by row + C_actual, 26 entries, memory: 1.1 KB + + (1,1) 2 + (3,12) 2 + (3,17) 2 + (6,22) 2 + (6,24) 0 + (8,10) 0 + (9,31) 4 + (11,13) 3 + (12,11) 2 + (14,24) 1 + (15,30) 1 + (16,20) 0 + (17,30) 0 + (19,1) 3 + (21,24) 1 + (21,27) 0 + (22,30) 1 + (23,30) 0 + (24,14) 1 + (25,4) 0 + (26,15) 1 + (27,28) 2 + (28,16) 1 + (29,9) 0 + (30,24) 1 + (31,31) 2 + + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes + + 32x32 GraphBLAS double matrix, sparse by row + Diff actual, 26 entries, memory: 1.2 KB + + (1,1) 0 + (3,12) 0 + (3,17) 0 + (6,22) 0 + (6,24) 0 + (8,10) 0 + (9,31) 0 + (11,13) 0 + (12,11) 0 + (14,24) 0 + (15,30) 0 + (16,20) 0 + (17,30) 0 + (19,1) 0 + (21,24) 0 + (21,27) 0 + (22,30) 0 + (23,30) 0 + (24,14) 0 + (25,4) 0 + (26,15) 0 + (27,28) 0 + (28,16) 0 + (29,9) 0 + (30,24) 0 + (31,31) 0 + + + 32x32 GraphBLAS bool matrix, sparse by row + T actual, 26 entries, memory: 1.0 KB + + (1,1) 1 + (3,12) 1 + (3,17) 1 + (6,22) 1 + (6,24) 1 + (8,10) 1 + (9,31) 1 + (11,13) 1 + (12,11) 1 + (14,24) 1 + (15,30) 1 + (16,20) 1 + (17,30) 1 + (19,1) 1 + (21,24) 1 + (21,27) 1 + (22,30) 1 + (23,30) 1 + (24,14) 1 + (25,4) 1 + (26,15) 1 + (27,28) 1 + (28,16) 1 + (29,9) 1 + (30,24) 1 + (31,31) 1 + work:26 gpus:0 Getting test data +Creating problem gen +filling matrices +inside fill, using seed 4567 +fill_random nrows=32ncols=32 need 32 values, invsparse = 32 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling sparse +32 nonzeroes left to fill.. +2 nonzeroes left to fill.. + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes +inside fill, using seed 4567 +fill_random nrows=32ncols=32 need 32 values, invsparse = 32 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling sparse +32 nonzeroes left to fill.. +2 nonzeroes left to fill.. + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes +inside fill, using seed 543210 +fill_random nrows=32ncols=32 need 64 values, invsparse = 16 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling sparse +64 nonzeroes left to fill.. +21 nonzeroes left to fill.. + rmm_wrap_alloc 512 bytes + rmm_wrap_alloc 512 bytes + rmm_wrap_alloc 512 bytes + rmm_wrap_alloc 512 bytes +inside fill, using seed 32 +fill_random nrows=32ncols=32 need 320 values, invsparse = 4 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling sparse +320 nonzeroes left to fill.. +140 nonzeroes left to fill.. + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes +32 slots to fill +all pairs to bucket 6, no filling +done assigning buckets +Building semiring factgory + calling stringify semiring: 0x7f1ff539f100 +inside enumify: 0x7f1ff539f100 + + GraphBLAS Semiring: semiring (user-defined) + GraphBLAS Monoid: semiring->add (built-in) + GraphBLAS BinaryOp: monoid->op (built-in) z=plus(x,y) + GraphBLAS type: ztype int32_t size: 4 + GraphBLAS type: xtype int32_t size: 4 + GraphBLAS type: ytype int32_t size: 4 + identity: [ 0 ] + + GraphBLAS BinaryOp: semiring->multiply (built-in) z=times(x,y) + GraphBLAS type: ztype int32_t size: 4 + GraphBLAS type: xtype int32_t size: 4 + GraphBLAS type: ytype int32_t size: 4 +Getting semiring add +Getting semiring mult +Getting semiring add op +Getting types +Getting opcodes +Getting typecodes +Performing asserts +Invoking boolean rename +Invoking boolean rename +Invoking enumify binop +e 14 +Invoking enumify monoid +Calling enumify binop +Inside plus binop code +e 11 +Calling enumify identity +Calling enumify terminal +Done enumify monoid +Done invoking enumify monoid +atype +btype +ctype +Invoking enumify_mask, mtype 0x7f2028b56f40 +GB_enumify_mask gets mcode: 6 Mask_struct: 0 Mask_comp: 0 +got mask_ecode: 8 +constructing semiring scode +before: add_ecode: 11, id_ecode: 0, term_ecode: 31, mult_ecode: 14, flipxy: 0, zcode: 6, xcode: 6, ycode: 6, mask_ecode: 8, ccode: 6, acode: 6, bcode: 6, csparsity: 0, msparsity: 0, asparsity: 0, bsparsity: 0 +serialized_scode: 397409434378593792 +done enumify semiring +scode=397409434378593792 +done stringify semiring + returned from stringify semiring + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes +32 slots to fill +all pairs to bucket 6, no filling +done assigning buckets +bucket 6 has 32 dots to do +LAUNCHING BUCKET CODE: 6 +INside get cached file +looking for prog in file /home/cjnolet/.SuiteSparse/GraphBLAS/6.3.0/GB_semiring_397409434378593792.h +opening /home/cjnolet/.SuiteSparse/GraphBLAS/6.3.0/GB_semiring_397409434378593792.h for write +about to close + read cache file /home/cjnolet/.SuiteSparse/GraphBLAS/6.3.0/GB_semiring_397409434378593792.h +successful_read: 1 +Just closed + jit_cache get program GB_jit_AxB_dot3_phase3_vssp +found memory-cached prog GB_jit_AxB_dot3_phase3_vssp + got kernel instance AxB_dot3_phase3_vssp_int32_t_int32_t_int32_t +found memory-cached prog AxB_dot3_phase3_vssp_int32_t_int32_t_int32_t +Launching _Z20AxB_dot3_phase3_vsspIiiiEvxxPxP16GB_Matrix_opaqueS2_S2_S2_i<<<1,32,0,0>>>(long,long,long*,GB_Matrix_opaque*,GB_Matrix_opaque*,GB_Matrix_opaque*,GB_Matrix_opaque*,int) +Printing bucketp +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +Done. +returned from kernel 0.628736ms + + 32x32 GraphBLAS int32_t matrix, sparse by row + sparsity control: sparse only + C GPU, 32 entries, memory: 1.1 KB + pending tuples: 0 max pending: 0 zombies: 15 + + (0,6) zombie + (1,1) 1 + (3,12) zombie + (3,17) 0 + (4,19) zombie + (5,19) zombie + (6,22) 0 + (6,24) 0 + (8,10) 0 + (9,19) zombie + (9,31) 1 + (11,13) zombie + (12,11) 0 + (14,24) 1 + (15,30) 1 + (16,20) zombie + (17,30) zombie + (18,18) zombie + (19,1) 1 + (20,25) zombie + (21,24) zombie + (21,27) 1 + (22,30) 0 + (23,30) 0 + (24,14) zombie + (25,4) zombie + (26,15) 0 + (27,28) zombie + (28,16) 0 + ... +Not using cuda path. M_is_hypersparse: 0, A->iso: 0, B->iso: 0, A_BITMAP: 0, B_BITMAP: 0, GB_IS_FULL(A): 0, GB_IS_FULL(B): 0, semiring header size: 32 + rmm_wrap_alloc 512 bytes + rmm_wrap_alloc 512 bytes + + 32x32 GraphBLAS int32_t matrix, sparse by row + sparsity control: sparse only + M actual, 32 entries, memory: 1.1 KB + + (0,6) 1 + (1,1) 1 + (3,12) 1 + (3,17) 0 + (4,19) 1 + (5,19) 1 + (6,22) 0 + (6,24) 1 + (8,10) 0 + (9,19) 0 + (9,31) 0 + (11,13) 0 + (12,11) 1 + (14,24) 1 + (15,30) 1 + (16,20) 0 + (17,30) 0 + (18,18) 1 + (19,1) 0 + (20,25) 0 + (21,24) 0 + (21,27) 0 + (22,30) 0 + (23,30) 1 + (24,14) 1 + (25,4) 1 + (26,15) 1 + (27,28) 1 + (28,16) 1 + ... + + 32x32 GraphBLAS int32_t matrix, sparse by row + sparsity control: sparse only + C GPU, 17 entries, memory: 1.1 KB + + (1,1) 1 + (3,17) 0 + (6,22) 0 + (6,24) 0 + (8,10) 0 + (9,31) 1 + (12,11) 0 + (14,24) 1 + (15,30) 1 + (19,1) 1 + (21,27) 1 + (22,30) 0 + (23,30) 0 + (26,15) 0 + (28,16) 0 + (30,24) 0 + (31,31) 0 + + + 32x32 GraphBLAS int32_t matrix, sparse by row + C_actual, 17 entries, memory: 1.1 KB + + (1,1) 1 + (3,17) 0 + (6,22) 0 + (6,24) 0 + (8,10) 0 + (9,31) 1 + (12,11) 0 + (14,24) 1 + (15,30) 1 + (19,1) 1 + (21,27) 1 + (22,30) 0 + (23,30) 0 + (26,15) 0 + (28,16) 0 + (30,24) 0 + (31,31) 0 + + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes + + 32x32 GraphBLAS double matrix, sparse by row + Diff actual, 17 entries, memory: 1.2 KB + + (1,1) 0 + (3,17) 0 + (6,22) 0 + (6,24) 0 + (8,10) 0 + (9,31) 0 + (12,11) 0 + (14,24) 0 + (15,30) 0 + (19,1) 0 + (21,27) 0 + (22,30) 0 + (23,30) 0 + (26,15) 0 + (28,16) 0 + (30,24) 0 + (31,31) 0 + + + 32x32 GraphBLAS bool matrix, sparse by row + T actual, 17 entries, memory: 1.0 KB + + (1,1) 1 + (3,17) 1 + (6,22) 1 + (6,24) 1 + (8,10) 1 + (9,31) 1 + (12,11) 1 + (14,24) 1 + (15,30) 1 + (19,1) 1 + (21,27) 1 + (22,30) 1 + (23,30) 1 + (26,15) 1 + (28,16) 1 + (30,24) 1 + (31,31) 1 + work:17 gpus:0 Getting test data +Creating problem gen +filling matrices +inside fill, using seed 4567 +fill_random nrows=32ncols=32 need 32 values, invsparse = 32 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling sparse +32 nonzeroes left to fill.. +2 nonzeroes left to fill.. + rmm_wrap_alloc 256 bytes +inside fill, using seed 4567 +fill_random nrows=32ncols=32 need 32 values, invsparse = 32 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling sparse +32 nonzeroes left to fill.. +2 nonzeroes left to fill.. + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes +inside fill, using seed 543210 +fill_random nrows=32ncols=32 need 64 values, invsparse = 16 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling sparse +64 nonzeroes left to fill.. +21 nonzeroes left to fill.. + rmm_wrap_alloc 512 bytes + rmm_wrap_alloc 512 bytes + rmm_wrap_alloc 512 bytes +inside fill, using seed 32 +fill_random nrows=32ncols=32 need 128 values, invsparse = 8 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling sparse +128 nonzeroes left to fill.. +43 nonzeroes left to fill.. + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes +32 slots to fill +all pairs to bucket 7, no filling +done assigning buckets +Building semiring factgory + calling stringify semiring: 0x7f1ff539e300 +inside enumify: 0x7f1ff539e300 + + GraphBLAS Semiring: semiring (user-defined) + GraphBLAS Monoid: semiring->add (built-in) + GraphBLAS BinaryOp: monoid->op (built-in) z=plus(x,y) + GraphBLAS type: ztype int32_t size: 4 + GraphBLAS type: xtype int32_t size: 4 + GraphBLAS type: ytype int32_t size: 4 + identity: [ 0 ] + + GraphBLAS BinaryOp: semiring->multiply (built-in) z=times(x,y) + GraphBLAS type: ztype int32_t size: 4 + GraphBLAS type: xtype int32_t size: 4 + GraphBLAS type: ytype int32_t size: 4 +Getting semiring add +Getting semiring mult +Getting semiring add op +Getting types +Getting opcodes +Getting typecodes +Performing asserts +Invoking boolean rename +Invoking boolean rename +Invoking enumify binop +e 14 +Invoking enumify monoid +Calling enumify binop +Inside plus binop code +e 11 +Calling enumify identity +Calling enumify terminal +Done enumify monoid +Done invoking enumify monoid +atype +btype +ctype +Invoking enumify_mask, mtype 0x7f2028b56f40 +GB_enumify_mask gets mcode: 6 Mask_struct: 0 Mask_comp: 0 +got mask_ecode: 8 +constructing semiring scode +before: add_ecode: 11, id_ecode: 0, term_ecode: 31, mult_ecode: 14, flipxy: 0, zcode: 6, xcode: 6, ycode: 6, mask_ecode: 8, ccode: 6, acode: 6, bcode: 6, csparsity: 0, msparsity: 0, asparsity: 0, bsparsity: 0 +serialized_scode: 397409434378593792 +done enumify semiring +scode=397409434378593792 +done stringify semiring + returned from stringify semiring + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes +32 slots to fill +all pairs to bucket 7, no filling +done assigning buckets +bucket 7 has 32 dots to do +LAUNCHING BUCKET CODE: 7 +INside get cached file +looking for prog in file /home/cjnolet/.SuiteSparse/GraphBLAS/6.3.0/GB_semiring_397409434378593792.h +opening /home/cjnolet/.SuiteSparse/GraphBLAS/6.3.0/GB_semiring_397409434378593792.h for write +about to close + read cache file /home/cjnolet/.SuiteSparse/GraphBLAS/6.3.0/GB_semiring_397409434378593792.h +successful_read: 1 +Just closed + jit_cache get program GB_jit_AxB_dot3_phase3_vsvs +found memory-cached prog GB_jit_AxB_dot3_phase3_vsvs + got kernel instance AxB_dot3_phase3_vsvs_int32_t_int32_t_int32_t +found memory-cached prog AxB_dot3_phase3_vsvs_int32_t_int32_t_int32_t +Launching _Z20AxB_dot3_phase3_vsvsIiiiEvxxPxP16GB_Matrix_opaqueS2_S2_S2_i<<<1,512,0,0>>>(long,long,long*,GB_Matrix_opaque*,GB_Matrix_opaque*,GB_Matrix_opaque*,GB_Matrix_opaque*,int) +Printing bucketp +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +Done. +returned from kernel 0.480256ms + + 32x32 GraphBLAS int32_t matrix, sparse (jumbled) by row + sparsity control: sparse only + C GPU, 32 entries, memory: 1.1 KB + pending tuples: 0 max pending: 0 zombies: 24 + + (0,6) zombie + (1,1) 0 + (3,12) zombie + (3,17) 0 + (4,19) zombie + (5,19) zombie + (6,22) zombie + (6,24) zombie + (8,10) zombie + (9,19) zombie + (9,31) 0 + (11,13) zombie + (12,11) zombie + (14,24) zombie + (15,30) 1 + (16,20) zombie + (17,30) zombie + (18,18) zombie + (19,1) 0 + (20,25) zombie + (21,24) zombie + (21,27) zombie + (22,30) zombie + (23,30) zombie + (24,14) zombie + (25,4) 1 + (26,15) zombie + (27,28) zombie + (28,16) zombie + ... +Not using cuda path. M_is_hypersparse: 0, A->iso: 0, B->iso: 0, A_BITMAP: 0, B_BITMAP: 0, GB_IS_FULL(A): 0, GB_IS_FULL(B): 0, semiring header size: 32 + rmm_wrap_alloc 512 bytes + rmm_wrap_alloc 512 bytes + rmm_wrap_alloc 512 bytes + + 32x32 GraphBLAS int32_t matrix, sparse by row + sparsity control: sparse only + M actual, 32 entries, memory: 1.1 KB + + (0,6) 1 + (1,1) 1 + (3,12) 1 + (3,17) 0 + (4,19) 1 + (5,19) 1 + (6,22) 0 + (6,24) 1 + (8,10) 0 + (9,19) 0 + (9,31) 0 + (11,13) 0 + (12,11) 1 + (14,24) 1 + (15,30) 1 + (16,20) 0 + (17,30) 0 + (18,18) 1 + (19,1) 0 + (20,25) 0 + (21,24) 0 + (21,27) 0 + (22,30) 0 + (23,30) 1 + (24,14) 1 + (25,4) 1 + (26,15) 1 + (27,28) 1 + (28,16) 1 + ... + + 32x32 GraphBLAS int32_t matrix, sparse by row + sparsity control: sparse only + C GPU, 8 entries, memory: 864 bytes + + (1,1) 0 + (3,17) 0 + (9,31) 0 + (15,30) 1 + (19,1) 0 + (25,4) 1 + (30,24) 0 + (31,31) 0 + + + 32x32 GraphBLAS int32_t matrix, sparse by row + C_actual, 8 entries, memory: 864 bytes + + (1,1) 0 + (3,17) 0 + (9,31) 0 + (15,30) 1 + (19,1) 0 + (25,4) 1 + (30,24) 0 + (31,31) 0 + + + 32x32 GraphBLAS double matrix, sparse by row + Diff actual, 8 entries, memory: 896 bytes + + (1,1) 0 + (3,17) 0 + (9,31) 0 + (15,30) 0 + (19,1) 0 + (25,4) 0 + (30,24) 0 + (31,31) 0 + + + 32x32 GraphBLAS bool matrix, sparse by row + T actual, 8 entries, memory: 840 bytes + + (1,1) 1 + (3,17) 1 + (9,31) 1 + (15,30) 1 + (19,1) 1 + (25,4) 1 + (30,24) 1 + (31,31) 1 + work:8 gpus:0 Getting test data +Creating problem gen +filling matrices +inside fill, using seed 4567 +fill_random nrows=32ncols=32 need 32 values, invsparse = 32 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling sparse +32 nonzeroes left to fill.. +2 nonzeroes left to fill.. + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes +inside fill, using seed 4567 +fill_random nrows=32ncols=32 need 32 values, invsparse = 32 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling sparse +32 nonzeroes left to fill.. +2 nonzeroes left to fill.. + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes +inside fill, using seed 543210 +fill_random nrows=32ncols=32 need 64 values, invsparse = 16 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling sparse +64 nonzeroes left to fill.. +21 nonzeroes left to fill.. + rmm_wrap_alloc 512 bytes + rmm_wrap_alloc 512 bytes +inside fill, using seed 32 +fill_random nrows=32ncols=32 need 128 values, invsparse = 8 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling sparse +128 nonzeroes left to fill.. +43 nonzeroes left to fill.. + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes +32 slots to fill +all pairs to bucket 8, no filling +done assigning buckets +Building semiring factgory + calling stringify semiring: 0x7f1ff53e0600 +inside enumify: 0x7f1ff53e0600 + + GraphBLAS Semiring: semiring (user-defined) + GraphBLAS Monoid: semiring->add (built-in) + GraphBLAS BinaryOp: monoid->op (built-in) z=plus(x,y) + GraphBLAS type: ztype int32_t size: 4 + GraphBLAS type: xtype int32_t size: 4 + GraphBLAS type: ytype int32_t size: 4 + identity: [ 0 ] + + GraphBLAS BinaryOp: semiring->multiply (built-in) z=times(x,y) + GraphBLAS type: ztype int32_t size: 4 + GraphBLAS type: xtype int32_t size: 4 + GraphBLAS type: ytype int32_t size: 4 +Getting semiring add +Getting semiring mult +Getting semiring add op +Getting types +Getting opcodes +Getting typecodes +Performing asserts +Invoking boolean rename +Invoking boolean rename +Invoking enumify binop +e 14 +Invoking enumify monoid +Calling enumify binop +Inside plus binop code +e 11 +Calling enumify identity +Calling enumify terminal +Done enumify monoid +Done invoking enumify monoid +atype +btype +ctype +Invoking enumify_mask, mtype 0x7f2028b56f40 +GB_enumify_mask gets mcode: 6 Mask_struct: 0 Mask_comp: 0 +got mask_ecode: 8 +constructing semiring scode +before: add_ecode: 11, id_ecode: 0, term_ecode: 31, mult_ecode: 14, flipxy: 0, zcode: 6, xcode: 6, ycode: 6, mask_ecode: 8, ccode: 6, acode: 6, bcode: 6, csparsity: 0, msparsity: 0, asparsity: 0, bsparsity: 0 +serialized_scode: 397409434378593792 +done enumify semiring +scode=397409434378593792 +done stringify semiring + returned from stringify semiring + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes +32 slots to fill +all pairs to bucket 8, no filling +done assigning buckets +bucket 8 has 32 dots to do +LAUNCHING BUCKET CODE: 8 +INside get cached file +looking for prog in file /home/cjnolet/.SuiteSparse/GraphBLAS/6.3.0/GB_semiring_397409434378593792.h +opening /home/cjnolet/.SuiteSparse/GraphBLAS/6.3.0/GB_semiring_397409434378593792.h for write +about to close + read cache file /home/cjnolet/.SuiteSparse/GraphBLAS/6.3.0/GB_semiring_397409434378593792.h +successful_read: 1 +Just closed + jit_cache get program GB_jit_AxB_dot3_phase3_vsvs +found memory-cached prog GB_jit_AxB_dot3_phase3_vsvs + got kernel instance AxB_dot3_phase3_vsvs_int32_t_int32_t_int32_t +found memory-cached prog AxB_dot3_phase3_vsvs_int32_t_int32_t_int32_t +Launching _Z20AxB_dot3_phase3_vsvsIiiiEvxxPxP16GB_Matrix_opaqueS2_S2_S2_i<<<1,512,0,0>>>(long,long,long*,GB_Matrix_opaque*,GB_Matrix_opaque*,GB_Matrix_opaque*,GB_Matrix_opaque*,int) +Printing bucketp +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +Done. +returned from kernel 0.421888ms + + 32x32 GraphBLAS int32_t matrix, sparse (jumbled) by row + sparsity control: sparse only + C GPU, 32 entries, memory: 1.1 KB + pending tuples: 0 max pending: 0 zombies: 24 + + (0,6) zombie + (1,1) 0 + (3,12) zombie + (3,17) 0 + (4,19) zombie + (5,19) zombie + (6,22) zombie + (6,24) zombie + (8,10) zombie + (9,19) zombie + (9,31) 0 + (11,13) zombie + (12,11) zombie + (14,24) zombie + (15,30) 1 + (16,20) zombie + (17,30) zombie + (18,18) zombie + (19,1) 0 + (20,25) zombie + (21,24) zombie + (21,27) zombie + (22,30) zombie + (23,30) zombie + (24,14) zombie + (25,4) 1 + (26,15) zombie + (27,28) zombie + (28,16) zombie + ... +Not using cuda path. M_is_hypersparse: 0, A->iso: 0, B->iso: 0, A_BITMAP: 0, B_BITMAP: 0, GB_IS_FULL(A): 0, GB_IS_FULL(B): 0, semiring header size: 32 + rmm_wrap_alloc 512 bytes + rmm_wrap_alloc 512 bytes + rmm_wrap_alloc 512 bytes + + 32x32 GraphBLAS int32_t matrix, sparse by row + sparsity control: sparse only + M actual, 32 entries, memory: 1.1 KB + + (0,6) 1 + (1,1) 1 + (3,12) 1 + (3,17) 0 + (4,19) 1 + (5,19) 1 + (6,22) 0 + (6,24) 1 + (8,10) 0 + (9,19) 0 + (9,31) 0 + (11,13) 0 + (12,11) 1 + (14,24) 1 + (15,30) 1 + (16,20) 0 + (17,30) 0 + (18,18) 1 + (19,1) 0 + (20,25) 0 + (21,24) 0 + (21,27) 0 + (22,30) 0 + (23,30) 1 + (24,14) 1 + (25,4) 1 + (26,15) 1 + (27,28) 1 + (28,16) 1 + ... + + 32x32 GraphBLAS int32_t matrix, sparse by row + sparsity control: sparse only + C GPU, 8 entries, memory: 864 bytes + + (1,1) 0 + (3,17) 0 + (9,31) 0 + (15,30) 1 + (19,1) 0 + (25,4) 1 + (30,24) 0 + (31,31) 0 + + + 32x32 GraphBLAS int32_t matrix, sparse by row + C_actual, 8 entries, memory: 864 bytes + + (1,1) 0 + (3,17) 0 + (9,31) 0 + (15,30) 1 + (19,1) 0 + (25,4) 1 + (30,24) 0 + (31,31) 0 + + rmm_wrap_alloc 256 bytes + + 32x32 GraphBLAS double matrix, sparse by row + Diff actual, 8 entries, memory: 896 bytes + + (1,1) 0 + (3,17) 0 + (9,31) 0 + (15,30) 0 + (19,1) 0 + (25,4) 0 + (30,24) 0 + (31,31) 0 + + + 32x32 GraphBLAS bool matrix, sparse by row + T actual, 8 entries, memory: 840 bytes + + (1,1) 1 + (3,17) 1 + (9,31) 1 + (15,30) 1 + (19,1) 1 + (25,4) 1 + (30,24) 1 + (31,31) 1 + work:8 gpus:0 Getting test data +Creating problem gen +filling matrices +inside fill, using seed 4567 +fill_random nrows=32ncols=32 need 32 values, invsparse = 32 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling sparse +32 nonzeroes left to fill.. +2 nonzeroes left to fill.. + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes +inside fill, using seed 4567 +fill_random nrows=32ncols=32 need 32 values, invsparse = 32 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling sparse +32 nonzeroes left to fill.. +2 nonzeroes left to fill.. + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes +inside fill, using seed 543210 +fill_random nrows=32ncols=32 need 64 values, invsparse = 16 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling sparse +64 nonzeroes left to fill.. +21 nonzeroes left to fill.. + rmm_wrap_alloc 512 bytes + rmm_wrap_alloc 512 bytes +inside fill, using seed 32 +fill_random nrows=32ncols=32 need 128 values, invsparse = 8 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling sparse +128 nonzeroes left to fill.. +43 nonzeroes left to fill.. + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes +32 slots to fill +all pairs to bucket 9, no filling +done assigning buckets +Building semiring factgory + calling stringify semiring: 0x7f1ff53ead00 +inside enumify: 0x7f1ff53ead00 + + GraphBLAS Semiring: semiring (user-defined) + GraphBLAS Monoid: semiring->add (built-in) + GraphBLAS BinaryOp: monoid->op (built-in) z=plus(x,y) + GraphBLAS type: ztype int32_t size: 4 + GraphBLAS type: xtype int32_t size: 4 + GraphBLAS type: ytype int32_t size: 4 + identity: [ 0 ] + + GraphBLAS BinaryOp: semiring->multiply (built-in) z=times(x,y) + GraphBLAS type: ztype int32_t size: 4 + GraphBLAS type: xtype int32_t size: 4 + GraphBLAS type: ytype int32_t size: 4 +Getting semiring add +Getting semiring mult +Getting semiring add op +Getting types +Getting opcodes +Getting typecodes +Performing asserts +Invoking boolean rename +Invoking boolean rename +Invoking enumify binop +e 14 +Invoking enumify monoid +Calling enumify binop +Inside plus binop code +e 11 +Calling enumify identity +Calling enumify terminal +Done enumify monoid +Done invoking enumify monoid +atype +btype +ctype +Invoking enumify_mask, mtype 0x7f2028b56f40 +GB_enumify_mask gets mcode: 6 Mask_struct: 0 Mask_comp: 0 +got mask_ecode: 8 +constructing semiring scode +before: add_ecode: 11, id_ecode: 0, term_ecode: 31, mult_ecode: 14, flipxy: 0, zcode: 6, xcode: 6, ycode: 6, mask_ecode: 8, ccode: 6, acode: 6, bcode: 6, csparsity: 0, msparsity: 0, asparsity: 0, bsparsity: 0 +serialized_scode: 397409434378593792 +done enumify semiring +scode=397409434378593792 +done stringify semiring + returned from stringify semiring + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes +32 slots to fill +all pairs to bucket 9, no filling +done assigning buckets +bucket 9 has 32 dots to do +LAUNCHING BUCKET CODE: 9 +INside get cached file +looking for prog in file /home/cjnolet/.SuiteSparse/GraphBLAS/6.3.0/GB_semiring_397409434378593792.h +opening /home/cjnolet/.SuiteSparse/GraphBLAS/6.3.0/GB_semiring_397409434378593792.h for write +about to close + read cache file /home/cjnolet/.SuiteSparse/GraphBLAS/6.3.0/GB_semiring_397409434378593792.h +successful_read: 1 +Just closed + jit_cache get program GB_jit_AxB_dot3_phase3_vsvs +found memory-cached prog GB_jit_AxB_dot3_phase3_vsvs + got kernel instance AxB_dot3_phase3_vsvs_int32_t_int32_t_int32_t +found memory-cached prog AxB_dot3_phase3_vsvs_int32_t_int32_t_int32_t +Launching _Z20AxB_dot3_phase3_vsvsIiiiEvxxPxP16GB_Matrix_opaqueS2_S2_S2_i<<<1,512,0,0>>>(long,long,long*,GB_Matrix_opaque*,GB_Matrix_opaque*,GB_Matrix_opaque*,GB_Matrix_opaque*,int) +Printing bucketp +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +Done. +returned from kernel 0.551936ms + + 32x32 GraphBLAS int32_t matrix, sparse (jumbled) by row + sparsity control: sparse only + C GPU, 32 entries, memory: 1.1 KB + pending tuples: 0 max pending: 0 zombies: 24 + + (0,6) zombie + (1,1) 0 + (3,12) zombie + (3,17) 0 + (4,19) zombie + (5,19) zombie + (6,22) zombie + (6,24) zombie + (8,10) zombie + (9,19) zombie + (9,31) 0 + (11,13) zombie + (12,11) zombie + (14,24) zombie + (15,30) 1 + (16,20) zombie + (17,30) zombie + (18,18) zombie + (19,1) 0 + (20,25) zombie + (21,24) zombie + (21,27) zombie + (22,30) zombie + (23,30) zombie + (24,14) zombie + (25,4) 1 + (26,15) zombie + (27,28) zombie + (28,16) zombie + ... +Not using cuda path. M_is_hypersparse: 0, A->iso: 0, B->iso: 0, A_BITMAP: 0, B_BITMAP: 0, GB_IS_FULL(A): 0, GB_IS_FULL(B): 0, semiring header size: 32 + rmm_wrap_alloc 512 bytes + rmm_wrap_alloc 512 bytes + rmm_wrap_alloc 512 bytes + + 32x32 GraphBLAS int32_t matrix, sparse by row + sparsity control: sparse only + M actual, 32 entries, memory: 1.1 KB + + (0,6) 1 + (1,1) 1 + (3,12) 1 + (3,17) 0 + (4,19) 1 + (5,19) 1 + (6,22) 0 + (6,24) 1 + (8,10) 0 + (9,19) 0 + (9,31) 0 + (11,13) 0 + (12,11) 1 + (14,24) 1 + (15,30) 1 + (16,20) 0 + (17,30) 0 + (18,18) 1 + (19,1) 0 + (20,25) 0 + (21,24) 0 + (21,27) 0 + (22,30) 0 + (23,30) 1 + (24,14) 1 + (25,4) 1 + (26,15) 1 + (27,28) 1 + (28,16) 1 + ... + + 32x32 GraphBLAS int32_t matrix, sparse by row + sparsity control: sparse only + C GPU, 8 entries, memory: 864 bytes + + (1,1) 0 + (3,17) 0 + (9,31) 0 + (15,30) 1 + (19,1) 0 + (25,4) 1 + (30,24) 0 + (31,31) 0 + + + 32x32 GraphBLAS int32_t matrix, sparse by row + C_actual, 8 entries, memory: 864 bytes + + (1,1) 0 + (3,17) 0 + (9,31) 0 + (15,30) 1 + (19,1) 0 + (25,4) 1 + (30,24) 0 + (31,31) 0 + + rmm_wrap_alloc 256 bytes + + 32x32 GraphBLAS double matrix, sparse by row + Diff actual, 8 entries, memory: 896 bytes + + (1,1) 0 + (3,17) 0 + (9,31) 0 + (15,30) 0 + (19,1) 0 + (25,4) 0 + (30,24) 0 + (31,31) 0 + + + 32x32 GraphBLAS bool matrix, sparse by row + T actual, 8 entries, memory: 840 bytes + + (1,1) 1 + (3,17) 1 + (9,31) 1 + (15,30) 1 + (19,1) 1 + (25,4) 1 + (30,24) 1 + (31,31) 1 + work:8 gpus:0 Getting test data +Creating problem gen +filling matrices +inside fill, using seed 4567 +fill_random nrows=32ncols=32 need 32 values, invsparse = 32 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling sparse +32 nonzeroes left to fill.. +2 nonzeroes left to fill.. + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes +inside fill, using seed 4567 +fill_random nrows=32ncols=32 need 32 values, invsparse = 32 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling sparse +32 nonzeroes left to fill.. +2 nonzeroes left to fill.. + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes +inside fill, using seed 543210 +fill_random nrows=32ncols=32 need 64 values, invsparse = 16 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling sparse +64 nonzeroes left to fill.. +21 nonzeroes left to fill.. + rmm_wrap_alloc 512 bytes + rmm_wrap_alloc 512 bytes +inside fill, using seed 32 +fill_random nrows=32ncols=32 need 128 values, invsparse = 8 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling sparse +128 nonzeroes left to fill.. +43 nonzeroes left to fill.. + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes +32 slots to fill +all pairs to bucket 10, no filling +done assigning buckets +Building semiring factgory + calling stringify semiring: 0x7f1ff53ebe00 +inside enumify: 0x7f1ff53ebe00 + + GraphBLAS Semiring: semiring (user-defined) + GraphBLAS Monoid: semiring->add (built-in) + GraphBLAS BinaryOp: monoid->op (built-in) z=plus(x,y) + GraphBLAS type: ztype int32_t size: 4 + GraphBLAS type: xtype int32_t size: 4 + GraphBLAS type: ytype int32_t size: 4 + identity: [ 0 ] + + GraphBLAS BinaryOp: semiring->multiply (built-in) z=times(x,y) + GraphBLAS type: ztype int32_t size: 4 + GraphBLAS type: xtype int32_t size: 4 + GraphBLAS type: ytype int32_t size: 4 +Getting semiring add +Getting semiring mult +Getting semiring add op +Getting types +Getting opcodes +Getting typecodes +Performing asserts +Invoking boolean rename +Invoking boolean rename +Invoking enumify binop +e 14 +Invoking enumify monoid +Calling enumify binop +Inside plus binop code +e 11 +Calling enumify identity +Calling enumify terminal +Done enumify monoid +Done invoking enumify monoid +atype +btype +ctype +Invoking enumify_mask, mtype 0x7f2028b56f40 +GB_enumify_mask gets mcode: 6 Mask_struct: 0 Mask_comp: 0 +got mask_ecode: 8 +constructing semiring scode +before: add_ecode: 11, id_ecode: 0, term_ecode: 31, mult_ecode: 14, flipxy: 0, zcode: 6, xcode: 6, ycode: 6, mask_ecode: 8, ccode: 6, acode: 6, bcode: 6, csparsity: 0, msparsity: 0, asparsity: 0, bsparsity: 0 +serialized_scode: 397409434378593792 +done enumify semiring +scode=397409434378593792 +done stringify semiring + returned from stringify semiring + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes +32 slots to fill +all pairs to bucket 10, no filling +done assigning buckets +bucket 10 has 32 dots to do +LAUNCHING BUCKET CODE: 10 +INside get cached file +looking for prog in file /home/cjnolet/.SuiteSparse/GraphBLAS/6.3.0/GB_semiring_397409434378593792.h +opening /home/cjnolet/.SuiteSparse/GraphBLAS/6.3.0/GB_semiring_397409434378593792.h for write +about to close + read cache file /home/cjnolet/.SuiteSparse/GraphBLAS/6.3.0/GB_semiring_397409434378593792.h +successful_read: 1 +Just closed + jit_cache get program GB_jit_AxB_dot3_phase3_vsvs +found memory-cached prog GB_jit_AxB_dot3_phase3_vsvs + got kernel instance AxB_dot3_phase3_vsvs_int32_t_int32_t_int32_t +found memory-cached prog AxB_dot3_phase3_vsvs_int32_t_int32_t_int32_t +Launching _Z20AxB_dot3_phase3_vsvsIiiiEvxxPxP16GB_Matrix_opaqueS2_S2_S2_i<<<1,512,0,0>>>(long,long,long*,GB_Matrix_opaque*,GB_Matrix_opaque*,GB_Matrix_opaque*,GB_Matrix_opaque*,int) +Printing bucketp +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +Done. +returned from kernel 0.4096ms + + 32x32 GraphBLAS int32_t matrix, sparse (jumbled) by row + sparsity control: sparse only + C GPU, 32 entries, memory: 1.1 KB + pending tuples: 0 max pending: 0 zombies: 24 + + (0,6) zombie + (1,1) 0 + (3,12) zombie + (3,17) 0 + (4,19) zombie + (5,19) zombie + (6,22) zombie + (6,24) zombie + (8,10) zombie + (9,19) zombie + (9,31) 0 + (11,13) zombie + (12,11) zombie + (14,24) zombie + (15,30) 1 + (16,20) zombie + (17,30) zombie + (18,18) zombie + (19,1) 0 + (20,25) zombie + (21,24) zombie + (21,27) zombie + (22,30) zombie + (23,30) zombie + (24,14) zombie + (25,4) 1 + (26,15) zombie + (27,28) zombie + (28,16) zombie + ... +Not using cuda path. M_is_hypersparse: 0, A->iso: 0, B->iso: 0, A_BITMAP: 0, B_BITMAP: 0, GB_IS_FULL(A): 0, GB_IS_FULL(B): 0, semiring header size: 32 + rmm_wrap_alloc 512 bytes + rmm_wrap_alloc 512 bytes + rmm_wrap_alloc 512 bytes + + 32x32 GraphBLAS int32_t matrix, sparse by row + sparsity control: sparse only + M actual, 32 entries, memory: 1.1 KB + + (0,6) 1 + (1,1) 1 + (3,12) 1 + (3,17) 0 + (4,19) 1 + (5,19) 1 + (6,22) 0 + (6,24) 1 + (8,10) 0 + (9,19) 0 + (9,31) 0 + (11,13) 0 + (12,11) 1 + (14,24) 1 + (15,30) 1 + (16,20) 0 + (17,30) 0 + (18,18) 1 + (19,1) 0 + (20,25) 0 + (21,24) 0 + (21,27) 0 + (22,30) 0 + (23,30) 1 + (24,14) 1 + (25,4) 1 + (26,15) 1 + (27,28) 1 + (28,16) 1 + ... + + 32x32 GraphBLAS int32_t matrix, sparse by row + sparsity control: sparse only + C GPU, 8 entries, memory: 864 bytes + + (1,1) 0 + (3,17) 0 + (9,31) 0 + (15,30) 1 + (19,1) 0 + (25,4) 1 + (30,24) 0 + (31,31) 0 + + + 32x32 GraphBLAS int32_t matrix, sparse by row + C_actual, 8 entries, memory: 864 bytes + + (1,1) 0 + (3,17) 0 + (9,31) 0 + (15,30) 1 + (19,1) 0 + (25,4) 1 + (30,24) 0 + (31,31) 0 + + rmm_wrap_alloc 256 bytes + + 32x32 GraphBLAS double matrix, sparse by row + Diff actual, 8 entries, memory: 896 bytes + + (1,1) 0 + (3,17) 0 + (9,31) 0 + (15,30) 0 + (19,1) 0 + (25,4) 0 + (30,24) 0 + (31,31) 0 + + + 32x32 GraphBLAS bool matrix, sparse by row + T actual, 8 entries, memory: 840 bytes + + (1,1) 1 + (3,17) 1 + (9,31) 1 + (15,30) 1 + (19,1) 1 + (25,4) 1 + (30,24) 1 + (31,31) 1 + work:8 gpus:0 Getting test data +Creating problem gen +filling matrices +inside fill, using seed 4567 +fill_random nrows=32ncols=32 need 32 values, invsparse = 32 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling sparse +32 nonzeroes left to fill.. +2 nonzeroes left to fill.. + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes +inside fill, using seed 4567 +fill_random nrows=32ncols=32 need 32 values, invsparse = 32 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling sparse +32 nonzeroes left to fill.. +2 nonzeroes left to fill.. + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes +inside fill, using seed 543210 +fill_random nrows=32ncols=32 need 160 values, invsparse = 7 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling sparse +160 nonzeroes left to fill.. +51 nonzeroes left to fill.. +inside fill, using seed 32 +fill_random nrows=32ncols=32 need 64 values, invsparse = 16 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling sparse +64 nonzeroes left to fill.. +20 nonzeroes left to fill.. + rmm_wrap_alloc 512 bytes + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes +32 slots to fill +all pairs to bucket 11, no filling +done assigning buckets +Building semiring factgory + calling stringify semiring: 0x7f1ff53ee300 +inside enumify: 0x7f1ff53ee300 + + GraphBLAS Semiring: semiring (user-defined) + GraphBLAS Monoid: semiring->add (built-in) + GraphBLAS BinaryOp: monoid->op (built-in) z=plus(x,y) + GraphBLAS type: ztype int32_t size: 4 + GraphBLAS type: xtype int32_t size: 4 + GraphBLAS type: ytype int32_t size: 4 + identity: [ 0 ] + + GraphBLAS BinaryOp: semiring->multiply (built-in) z=times(x,y) + GraphBLAS type: ztype int32_t size: 4 + GraphBLAS type: xtype int32_t size: 4 + GraphBLAS type: ytype int32_t size: 4 +Getting semiring add +Getting semiring mult +Getting semiring add op +Getting types +Getting opcodes +Getting typecodes +Performing asserts +Invoking boolean rename +Invoking boolean rename +Invoking enumify binop +e 14 +Invoking enumify monoid +Calling enumify binop +Inside plus binop code +e 11 +Calling enumify identity +Calling enumify terminal +Done enumify monoid +Done invoking enumify monoid +atype +btype +ctype +Invoking enumify_mask, mtype 0x7f2028b56f40 +GB_enumify_mask gets mcode: 6 Mask_struct: 0 Mask_comp: 0 +got mask_ecode: 8 +constructing semiring scode +before: add_ecode: 11, id_ecode: 0, term_ecode: 31, mult_ecode: 14, flipxy: 0, zcode: 6, xcode: 6, ycode: 6, mask_ecode: 8, ccode: 6, acode: 6, bcode: 6, csparsity: 0, msparsity: 0, asparsity: 0, bsparsity: 0 +serialized_scode: 397409434378593792 +done enumify semiring +scode=397409434378593792 +done stringify semiring + returned from stringify semiring + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes +32 slots to fill +all pairs to bucket 11, no filling +done assigning buckets +bucket 11 has 32 dots to do +LAUNCHING BUCKET CODE: 11 +INside get cached file +looking for prog in file /home/cjnolet/.SuiteSparse/GraphBLAS/6.3.0/GB_semiring_397409434378593792.h +opening /home/cjnolet/.SuiteSparse/GraphBLAS/6.3.0/GB_semiring_397409434378593792.h for write +about to close + read cache file /home/cjnolet/.SuiteSparse/GraphBLAS/6.3.0/GB_semiring_397409434378593792.h +successful_read: 1 +Just closed + jit_cache get program GB_jit_AxB_dot3_phase3_mp +found memory-cached prog GB_jit_AxB_dot3_phase3_mp + got kernel instance AxB_dot3_phase3_mp_int32_t_int32_t_int32_t +found memory-cached prog AxB_dot3_phase3_mp_int32_t_int32_t_int32_t +Launching _Z18AxB_dot3_phase3_mpIiiiEvxxPxP16GB_Matrix_opaqueS2_S2_S2_i<<<1,32,0,0>>>(long,long,long*,GB_Matrix_opaque*,GB_Matrix_opaque*,GB_Matrix_opaque*,GB_Matrix_opaque*,int) +warp 0 zombie count = 27, nzombies = 0 + Czombie = 27 +Printing bucketp +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +Done. +returned from kernel 1.20934ms + + 32x32 GraphBLAS int32_t matrix, sparse (jumbled) by row + sparsity control: sparse only + C GPU, 32 entries, memory: 1.1 KB + pending tuples: 0 max pending: 0 zombies: 27 + + (0,6) zombie + (1,1) 1 + (3,12) zombie + (3,17) zombie + (4,19) zombie + (5,19) zombie + (6,22) zombie + (6,24) 0 + (8,10) zombie + (9,19) zombie + (9,31) 0 + (11,13) zombie + (12,11) zombie + (14,24) zombie + (15,30) zombie + (16,20) 0 + (17,30) zombie + (18,18) zombie + (19,1) zombie + (20,25) zombie + (21,24) zombie + (21,27) zombie + (22,30) zombie + (23,30) zombie + (24,14) zombie + (25,4) 1 + (26,15) zombie + (27,28) zombie + (28,16) zombie + ... +Not using cuda path. M_is_hypersparse: 0, A->iso: 0, B->iso: 0, A_BITMAP: 0, B_BITMAP: 0, GB_IS_FULL(A): 0, GB_IS_FULL(B): 0, semiring header size: 32 + rmm_wrap_alloc 512 bytes + rmm_wrap_alloc 512 bytes + rmm_wrap_alloc 512 bytes + + 32x32 GraphBLAS int32_t matrix, sparse by row + sparsity control: sparse only + M actual, 32 entries, memory: 1.1 KB + + (0,6) 1 + (1,1) 1 + (3,12) 1 + (3,17) 0 + (4,19) 1 + (5,19) 1 + (6,22) 0 + (6,24) 1 + (8,10) 0 + (9,19) 0 + (9,31) 0 + (11,13) 0 + (12,11) 1 + (14,24) 1 + (15,30) 1 + (16,20) 0 + (17,30) 0 + (18,18) 1 + (19,1) 0 + (20,25) 0 + (21,24) 0 + (21,27) 0 + (22,30) 0 + (23,30) 1 + (24,14) 1 + (25,4) 1 + (26,15) 1 + (27,28) 1 + (28,16) 1 + ... + + 32x32 GraphBLAS int32_t matrix, sparse by row + sparsity control: sparse only + C GPU, 5 entries, memory: 864 bytes + + (1,1) 1 + (6,24) 0 + (9,31) 0 + (16,20) 0 + (25,4) 1 + + + 32x32 GraphBLAS int32_t matrix, sparse by row + C_actual, 5 entries, memory: 864 bytes + + (1,1) 1 + (6,24) 0 + (9,31) 0 + (16,20) 0 + (25,4) 1 + + rmm_wrap_alloc 256 bytes + + 32x32 GraphBLAS double matrix, sparse by row + Diff actual, 5 entries, memory: 896 bytes + + (1,1) 0 + (6,24) 0 + (9,31) 0 + (16,20) 0 + (25,4) 0 + + + 32x32 GraphBLAS bool matrix, sparse by row + T actual, 5 entries, memory: 840 bytes + + (1,1) 1 + (6,24) 1 + (9,31) 1 + (16,20) 1 + (25,4) 1 + work:5 gpus:0 [ OK ] AxB_dot3_tests_PLUS_TIMES_3.tinyxtinyPLUS_TIMES_Cint32_tMint32_tAint32_tBint32_tXint32_tYint32_tZint32_t (20 ms) +[ RUN ] AxB_dot3_tests_PLUS_TIMES_3.smallxsmallPLUS_TIMES_Cint32_tMint32_tAint32_tBint32_tXint32_tYint32_tZint32_t +Getting test data +Creating problem gen +filling matrices +inside fill, using seed 4567 +fill_random nrows=1024ncols=1024 need 1024 values, invsparse = 1024 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling sparse +1024 nonzeroes left to fill.. +35 nonzeroes left to fill.. +inside fill, using seed 4567 +fill_random nrows=1024ncols=1024 need 1024 values, invsparse = 1024 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling sparse +1024 nonzeroes left to fill.. +35 nonzeroes left to fill.. +inside fill, using seed 543210 +fill_random nrows=1024ncols=1024 need 1048576 values, invsparse = 1 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling dense + rmm_wrap_alloc 4096 bytes + rmm_wrap_alloc 524288 bytes + rmm_wrap_alloc 524288 bytes + rmm_wrap_alloc 1048576 bytes + rmm_wrap_alloc 1048576 bytes + rmm_wrap_alloc 524288 bytes + rmm_wrap_alloc 2097152 bytes + rmm_wrap_alloc 2097152 bytes + rmm_wrap_alloc 1048576 bytes + rmm_wrap_alloc 4194304 bytes + rmm_wrap_alloc 4194304 bytes + rmm_wrap_alloc 2097152 bytes + rmm_wrap_alloc 8388608 bytes + rmm_wrap_alloc 8388608 bytes + rmm_wrap_alloc 4194304 bytes + rmm_wrap_alloc 8388608 bytes + rmm_wrap_alloc 4194304 bytes + rmm_wrap_alloc 8388608 bytes +inside fill, using seed 32 +fill_random nrows=1024ncols=1024 need 1048576 values, invsparse = 1 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling dense + rmm_wrap_alloc 16384 bytes + rmm_wrap_alloc 524288 bytes + rmm_wrap_alloc 524288 bytes + rmm_wrap_alloc 1048576 bytes + rmm_wrap_alloc 1048576 bytes + rmm_wrap_alloc 524288 bytes + rmm_wrap_alloc 2097152 bytes + rmm_wrap_alloc 2097152 bytes + rmm_wrap_alloc 1048576 bytes + rmm_wrap_alloc 4194304 bytes + rmm_wrap_alloc 4194304 bytes + rmm_wrap_alloc 2097152 bytes + rmm_wrap_alloc 8388608 bytes + rmm_wrap_alloc 8388608 bytes + rmm_wrap_alloc 4194304 bytes + rmm_wrap_alloc 8388608 bytes + rmm_wrap_alloc 4194304 bytes + rmm_wrap_alloc 8388608 bytes +1024 slots to fill +all pairs to bucket 1, no filling +done assigning buckets +Building semiring factgory + calling stringify semiring: 0x7f1ff53ee200 +inside enumify: 0x7f1ff53ee200 + + GraphBLAS Semiring: semiring (user-defined) + GraphBLAS Monoid: semiring->add (built-in) + GraphBLAS BinaryOp: monoid->op (built-in) z=plus(x,y) + GraphBLAS type: ztype int32_t size: 4 + GraphBLAS type: xtype int32_t size: 4 + GraphBLAS type: ytype int32_t size: 4 + identity: [ 0 ] + + GraphBLAS BinaryOp: semiring->multiply (built-in) z=times(x,y) + GraphBLAS type: ztype int32_t size: 4 + GraphBLAS type: xtype int32_t size: 4 + GraphBLAS type: ytype int32_t size: 4 +Getting semiring add +Getting semiring mult +Getting semiring add op +Getting types +Getting opcodes +Getting typecodes +Performing asserts +Invoking boolean rename +Invoking boolean rename +Invoking enumify binop +e 14 +Invoking enumify monoid +Calling enumify binop +Inside plus binop code +e 11 +Calling enumify identity +Calling enumify terminal +Done enumify monoid +Done invoking enumify monoid +atype +btype +ctype +Invoking enumify_mask, mtype 0x7f2028b56f40 +GB_enumify_mask gets mcode: 6 Mask_struct: 0 Mask_comp: 0 +got mask_ecode: 8 +constructing semiring scode +before: add_ecode: 11, id_ecode: 0, term_ecode: 31, mult_ecode: 14, flipxy: 0, zcode: 6, xcode: 6, ycode: 6, mask_ecode: 8, ccode: 6, acode: 6, bcode: 6, csparsity: 0, msparsity: 0, asparsity: 0, bsparsity: 0 +serialized_scode: 397409434378593792 +done enumify semiring +scode=397409434378593792 +done stringify semiring + returned from stringify semiring + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 8192 bytes +1024 slots to fill +all pairs to bucket 1, no filling +done assigning buckets +bucket 1 has 1024 dots to do +LAUNCHING BUCKET CODE: 1 +INside get cached file +looking for prog in file /home/cjnolet/.SuiteSparse/GraphBLAS/6.3.0/GB_semiring_397409434378593792.h +opening /home/cjnolet/.SuiteSparse/GraphBLAS/6.3.0/GB_semiring_397409434378593792.h for write +about to close + read cache file /home/cjnolet/.SuiteSparse/GraphBLAS/6.3.0/GB_semiring_397409434378593792.h +successful_read: 1 +Just closed + jit_cache get program GB_jit_AxB_dot3_phase3_dndn +found memory-cached prog GB_jit_AxB_dot3_phase3_dndn + got kernel instance AxB_dot3_phase3_dndn_int32_t_int32_t_int32_t +found memory-cached prog AxB_dot3_phase3_dndn_int32_t_int32_t_int32_t +Launching _Z20AxB_dot3_phase3_dndnIiiiEvxxPxP16GB_Matrix_opaqueS2_S2_S2_i<<<32,32,0,0>>>(long,long,long*,GB_Matrix_opaque*,GB_Matrix_opaque*,GB_Matrix_opaque*,GB_Matrix_opaque*,int) +tid=0, i,j = 569,22 nnzA= 1024, nnzB=1024 +tid=0, i,j = 192,13 nnzA= 1024, nnzB=1024 +tid=0, i,j = 103,18 nnzA= 1024, nnzB=1024 +tid=0, i,j = 886,19 nnzA= 1024, nnzB=1024 +tid=0, i,j = 905,24 nnzA= 1024, nnzB=1024 +tid=0, i,j = 975,21 nnzA= 1024, nnzB=1024 +tid=0, i,j = 996,6 nnzA= 1024, nnzB=1024 +tid=0, i,j = 107,28 nnzA= 1024, nnzB=1024 +tid=0, i,j = 568,15 nnzA= 1024, nnzB=1024 +tid=0, i,j = 187,9 nnzA= 1024, nnzB=1024 +tid=0, i,j = 574,0 nnzA= 1024, nnzB=1024 +tid=0, i,j = 928,17 nnzA= 1024, nnzB=1024 +tid=0, i,j = 428,26 nnzA= 1024, nnzB=1024 +tid=0, i,j = 310,23 nnzA= 1024, nnzB=1024 +tid=0, i,j = 376,2 nnzA= 1024, nnzB=1024 +tid=0, i,j = 560,5 nnzA= 1024, nnzB=1024 +tid=0, i,j = 896,8 nnzA= 1024, nnzB=1024 +tid=0, i,j = 446,10 nnzA= 1024, nnzB=1024 +tid=0, i,j = 953,12 nnzA= 1024, nnzB=1024 +tid=0, i,j = 821,19 nnzA= 1024, nnzB=1024 +tid=0, i,j = 46,11 nnzA= 1024, nnzB=1024 +tid=0, i,j = 421,14 nnzA= 1024, nnzB=1024 +tid=0, i,j = 479,21 nnzA= 1024, nnzB=1024 +tid=0, i,j = 397,12 nnzA= 1024, nnzB=1024 +tid=0, i,j = 904,16 nnzA= 1024, nnzB=1024 +tid=0, i,j = 241,25 nnzA= 1024, nnzB=1024 +tid=0, i,j = 474,20 nnzA= 1024, nnzB=1024 +tid=0, i,j = 788,16 nnzA= 1024, nnzB=1024 +tid=0, i,j = 478,0 nnzA= 1024, nnzB=1024 +tid=0, i,j = 666,7 nnzA= 1024, nnzB=1024 +tid=0, i,j = 955,11 nnzA= 1024, nnzB=1024 +tid=0, i,j = 183,7 nnzA= 1024, nnzB=1024 +tid=0, i,j = 960,54 nnzA= 1024, nnzB=1024 +tid=0, i,j = 476,49 nnzA= 1024, nnzB=1024 +tid=0, i,j = 893,57 nnzA= 1024, nnzB=1024 +tid=0, i,j = 108,59 nnzA= 1024, nnzB=1024 +tid=0, i,j = 940,53 nnzA= 1024, nnzB=1024 +tid=0, i,j = 804,34 nnzA= 1024, nnzB=1024 +tid=0, i,j = 70,47 nnzA= 1024, nnzB=1024 +tid=0, i,j = 846,57 nnzA= 1024, nnzB=1024 +tid=0, i,j = 771,62 nnzA= 1024, nnzB=1024 +tid=0, i,j = 35,58 nnzA= 1024, nnzB=1024 +tid=0, i,j = 694,30 nnzA= 1024, nnzB=1024 +tid=0, i,j = 999,52 nnzA= 1024, nnzB=1024 +tid=0, i,j = 590,61 nnzA= 1024, nnzB=1024 +tid=0, i,j = 558,54 nnzA= 1024, nnzB=1024 +tid=0, i,j = 121,32 nnzA= 1024, nnzB=1024 +tid=0, i,j = 138,38 nnzA= 1024, nnzB=1024 +tid=0, i,j = 568,41 nnzA= 1024, nnzB=1024 +tid=0, i,j = 81,33 nnzA= 1024, nnzB=1024 +tid=0, i,j = 698,39 nnzA= 1024, nnzB=1024 +tid=0, i,j = 950,40 nnzA= 1024, nnzB=1024 +tid=0, i,j = 208,46 nnzA= 1024, nnzB=1024 +tid=0, i,j = 336,48 nnzA= 1024, nnzB=1024 +tid=0, i,j = 90,56 nnzA= 1024, nnzB=1024 +tid=0, i,j = 798,43 nnzA= 1024, nnzB=1024 +tid=0, i,j = 556,51 nnzA= 1024, nnzB=1024 +tid=0, i,j = 609,37 nnzA= 1024, nnzB=1024 +tid=0, i,j = 441,28 nnzA= 1024, nnzB=1024 +tid=0, i,j = 479,60 nnzA= 1024, nnzB=1024 +tid=0, i,j = 979,55 nnzA= 1024, nnzB=1024 +tid=0, i,j = 35,50 nnzA= 1024, nnzB=1024 +tid=0, i,j = 324,42 nnzA= 1024, nnzB=1024 +tid=0, i,j = 451,36 nnzA= 1024, nnzB=1024 +tid=0, i,j = 665,73 nnzA= 1024, nnzB=1024 +tid=0, i,j = 694,65 nnzA= 1024, nnzB=1024 +tid=0, i,j = 297,72 nnzA= 1024, nnzB=1024 +tid=0, i,j = 50,63 nnzA= 1024, nnzB=1024 +tid=0, i,j = 324,68 nnzA= 1024, nnzB=1024 +tid=0, i,j = 234,91 nnzA= 1024, nnzB=1024 +tid=0, i,j = 637,94 nnzA= 1024, nnzB=1024 +tid=0, i,j = 690,92 nnzA= 1024, nnzB=1024 +tid=0, i,j = 243,82 nnzA= 1024, nnzB=1024 +tid=0, i,j = 90,78 nnzA= 1024, nnzB=1024 +tid=0, i,j = 411,67 nnzA= 1024, nnzB=1024 +tid=0, i,j = 825,89 nnzA= 1024, nnzB=1024 +tid=0, i,j = 218,92 nnzA= 1024, nnzB=1024 +tid=0, i,j = 748,97 nnzA= 1024, nnzB=1024 +tid=0, i,j = 486,93 nnzA= 1024, nnzB=1024 +tid=0, i,j = 268,64 nnzA= 1024, nnzB=1024 +tid=0, i,j = 657,87 nnzA= 1024, nnzB=1024 +tid=0, i,j = 37,90 nnzA= 1024, nnzB=1024 +tid=0, i,j = 719,66 nnzA= 1024, nnzB=1024 +tid=0, i,j = 564,96 nnzA= 1024, nnzB=1024 +tid=0, i,j = 539,70 nnzA= 1024, nnzB=1024 +tid=0, i,j = 754,81 nnzA= 1024, nnzB=1024 +tid=0, i,j = 228,71 nnzA= 1024, nnzB=1024 +tid=0, i,j = 74,92 nnzA= 1024, nnzB=1024 +tid=0, i,j = 433,77 nnzA= 1024, nnzB=1024 +tid=0, i,j = 248,76 nnzA= 1024, nnzB=1024 +tid=0, i,j = 104,86 nnzA= 1024, nnzB=1024 +tid=0, i,j = 722,94 nnzA= 1024, nnzB=1024 +tid=0, i,j = 519,91 nnzA= 1024, nnzB=1024 +tid=0, i,j = 402,110 nnzA= 1024, nnzB=1024 +tid=0, i,j = 609,102 nnzA= 1024, nnzB=1024 +tid=0, i,j = 253,84 nnzA= 1024, nnzB=1024 +tid=0, i,j = 884,109 nnzA= 1024, nnzB=1024 +tid=0, i,j = 477,69 nnzA= 1024, nnzB=1024 +tid=0, i,j = 326,99 nnzA= 1024, nnzB=1024 +tid=0, i,j = 855,75 nnzA= 1024, nnzB=1024 +tid=0, i,j = 652,106 nnzA= 1024, nnzB=1024 +tid=0, i,j = 820,128 nnzA= 1024, nnzB=1024 +tid=0, i,j = 240,115 nnzA= 1024, nnzB=1024 +tid=0, i,j = 177,125 nnzA= 1024, nnzB=1024 +tid=0, i,j = 554,121 nnzA= 1024, nnzB=1024 +tid=0, i,j = 278,113 nnzA= 1024, nnzB=1024 +tid=0, i,j = 99,118 nnzA= 1024, nnzB=1024 +tid=0, i,j = 621,123 nnzA= 1024, nnzB=1024 +tid=0, i,j = 623,130 nnzA= 1024, nnzB=1024 +tid=0, i,j = 644,104 nnzA= 1024, nnzB=1024 +tid=0, i,j = 131,108 nnzA= 1024, nnzB=1024 +tid=0, i,j = 338,117 nnzA= 1024, nnzB=1024 +tid=0, i,j = 281,100 nnzA= 1024, nnzB=1024 +tid=0, i,j = 36,126 nnzA= 1024, nnzB=1024 +tid=0, i,j = 660,129 nnzA= 1024, nnzB=1024 +tid=0, i,j = 522,107 nnzA= 1024, nnzB=1024 +tid=0, i,j = 779,112 nnzA= 1024, nnzB=1024 +tid=0, i,j = 151,122 nnzA= 1024, nnzB=1024 +tid=0, i,j = 477,120 nnzA= 1024, nnzB=1024 +tid=0, i,j = 127,112 nnzA= 1024, nnzB=1024 +tid=0, i,j = 621,103 nnzA= 1024, nnzB=1024 +tid=0, i,j = 519,114 nnzA= 1024, nnzB=1024 +tid=0, i,j = 219,117 nnzA= 1024, nnzB=1024 +tid=0, i,j = 288,138 nnzA= 1024, nnzB=1024 +tid=0, i,j = 284,139 nnzA= 1024, nnzB=1024 +tid=0, i,j = 355,131 nnzA= 1024, nnzB=1024 +tid=0, i,j = 835,128 nnzA= 1024, nnzB=1024 +tid=0, i,j = 715,121 nnzA= 1024, nnzB=1024 +tid=0, i,j = 198,116 nnzA= 1024, nnzB=1024 +tid=0, i,j = 821,134 nnzA= 1024, nnzB=1024 +tid=0, i,j = 905,111 nnzA= 1024, nnzB=1024 +tid=0, i,j = 239,107 nnzA= 1024, nnzB=1024 +tid=0, i,j = 807,130 nnzA= 1024, nnzB=1024 +tid=0, i,j = 959,158 nnzA= 1024, nnzB=1024 +tid=0, i,j = 714,157 nnzA= 1024, nnzB=1024 +tid=0, i,j = 123,143 nnzA= 1024, nnzB=1024 +tid=0, i,j = 87,142 nnzA= 1024, nnzB=1024 +tid=0, i,j = 290,156 nnzA= 1024, nnzB=1024 +tid=0, i,j = 995,146 nnzA= 1024, nnzB=1024 +tid=0, i,j = 816,163 nnzA= 1024, nnzB=1024 +tid=0, i,j = 949,149 nnzA= 1024, nnzB=1024 +tid=0, i,j = 492,133 nnzA= 1024, nnzB=1024 +tid=0, i,j = 974,157 nnzA= 1024, nnzB=1024 +tid=0, i,j = 834,137 nnzA= 1024, nnzB=1024 +tid=0, i,j = 253,131 nnzA= 1024, nnzB=1024 +tid=0, i,j = 194,146 nnzA= 1024, nnzB=1024 +tid=0, i,j = 601,162 nnzA= 1024, nnzB=1024 +tid=0, i,j = 108,136 nnzA= 1024, nnzB=1024 +tid=0, i,j = 199,141 nnzA= 1024, nnzB=1024 +tid=0, i,j = 357,148 nnzA= 1024, nnzB=1024 +tid=0, i,j = 484,151 nnzA= 1024, nnzB=1024 +tid=0, i,j = 570,132 nnzA= 1024, nnzB=1024 +tid=0, i,j = 225,142 nnzA= 1024, nnzB=1024 +tid=0, i,j = 552,145 nnzA= 1024, nnzB=1024 +tid=0, i,j = 0,174 nnzA= 1024, nnzB=1024 +tid=0, i,j = 887,140 nnzA= 1024, nnzB=1024 +tid=0, i,j = 595,174 nnzA= 1024, nnzB=1024 +tid=0, i,j = 801,166 nnzA= 1024, nnzB=1024 +tid=0, i,j = 297,160 nnzA= 1024, nnzB=1024 +tid=0, i,j = 717,150 nnzA= 1024, nnzB=1024 +tid=0, i,j = 574,144 nnzA= 1024, nnzB=1024 +tid=0, i,j = 295,135 nnzA= 1024, nnzB=1024 +tid=0, i,j = 945,139 nnzA= 1024, nnzB=1024 +tid=0, i,j = 901,169 nnzA= 1024, nnzB=1024 +tid=0, i,j = 221,164 nnzA= 1024, nnzB=1024 +tid=0, i,j = 111,189 nnzA= 1024, nnzB=1024 +tid=0, i,j = 997,187 nnzA= 1024, nnzB=1024 +tid=0, i,j = 17,177 nnzA= 1024, nnzB=1024 +tid=0, i,j = 614,176 nnzA= 1024, nnzB=1024 +tid=0, i,j = 779,186 nnzA= 1024, nnzB=1024 +tid=0, i,j = 357,188 nnzA= 1024, nnzB=1024 +tid=0, i,j = 675,182 nnzA= 1024, nnzB=1024 +tid=0, i,j = 692,183 nnzA= 1024, nnzB=1024 +tid=0, i,j = 321,168 nnzA= 1024, nnzB=1024 +tid=0, i,j = 953,192 nnzA= 1024, nnzB=1024 +tid=0, i,j = 951,172 nnzA= 1024, nnzB=1024 +tid=0, i,j = 396,165 nnzA= 1024, nnzB=1024 +tid=0, i,j = 644,192 nnzA= 1024, nnzB=1024 +tid=0, i,j = 830,180 nnzA= 1024, nnzB=1024 +tid=0, i,j = 15,171 nnzA= 1024, nnzB=1024 +tid=0, i,j = 1001,182 nnzA= 1024, nnzB=1024 +tid=0, i,j = 188,176 nnzA= 1024, nnzB=1024 +tid=0, i,j = 450,185 nnzA= 1024, nnzB=1024 +tid=0, i,j = 781,176 nnzA= 1024, nnzB=1024 +tid=0, i,j = 879,167 nnzA= 1024, nnzB=1024 +tid=0, i,j = 922,195 nnzA= 1024, nnzB=1024 +tid=0, i,j = 932,179 nnzA= 1024, nnzB=1024 +tid=0, i,j = 826,207 nnzA= 1024, nnzB=1024 +tid=0, i,j = 108,176 nnzA= 1024, nnzB=1024 +tid=0, i,j = 990,190 nnzA= 1024, nnzB=1024 +tid=0, i,j = 143,184 nnzA= 1024, nnzB=1024 +tid=0, i,j = 612,170 nnzA= 1024, nnzB=1024 +tid=0, i,j = 631,178 nnzA= 1024, nnzB=1024 +tid=0, i,j = 669,175 nnzA= 1024, nnzB=1024 +tid=0, i,j = 531,199 nnzA= 1024, nnzB=1024 +tid=0, i,j = 135,193 nnzA= 1024, nnzB=1024 +tid=0, i,j = 545,222 nnzA= 1024, nnzB=1024 +tid=0, i,j = 56,221 nnzA= 1024, nnzB=1024 +tid=0, i,j = 71,211 nnzA= 1024, nnzB=1024 +tid=0, i,j = 704,220 nnzA= 1024, nnzB=1024 +tid=0, i,j = 551,221 nnzA= 1024, nnzB=1024 +tid=0, i,j = 704,218 nnzA= 1024, nnzB=1024 +tid=0, i,j = 550,216 nnzA= 1024, nnzB=1024 +tid=0, i,j = 910,198 nnzA= 1024, nnzB=1024 +tid=0, i,j = 935,225 nnzA= 1024, nnzB=1024 +tid=0, i,j = 865,203 nnzA= 1024, nnzB=1024 +tid=0, i,j = 137,194 nnzA= 1024, nnzB=1024 +tid=0, i,j = 721,224 nnzA= 1024, nnzB=1024 +tid=0, i,j = 921,217 nnzA= 1024, nnzB=1024 +tid=0, i,j = 268,215 nnzA= 1024, nnzB=1024 +tid=0, i,j = 659,210 nnzA= 1024, nnzB=1024 +tid=0, i,j = 66,219 nnzA= 1024, nnzB=1024 +tid=0, i,j = 931,211 nnzA= 1024, nnzB=1024 +tid=0, i,j = 859,197 nnzA= 1024, nnzB=1024 +tid=0, i,j = 535,229 nnzA= 1024, nnzB=1024 +tid=0, i,j = 808,209 nnzA= 1024, nnzB=1024 +tid=0, i,j = 755,237 nnzA= 1024, nnzB=1024 +tid=0, i,j = 928,214 nnzA= 1024, nnzB=1024 +tid=0, i,j = 1016,223 nnzA= 1024, nnzB=1024 +tid=0, i,j = 922,218 nnzA= 1024, nnzB=1024 +tid=0, i,j = 152,213 nnzA= 1024, nnzB=1024 +tid=0, i,j = 907,201 nnzA= 1024, nnzB=1024 +tid=0, i,j = 985,208 nnzA= 1024, nnzB=1024 +tid=0, i,j = 897,232 nnzA= 1024, nnzB=1024 +tid=0, i,j = 727,226 nnzA= 1024, nnzB=1024 +tid=0, i,j = 197,252 nnzA= 1024, nnzB=1024 +tid=0, i,j = 426,212 nnzA= 1024, nnzB=1024 +tid=0, i,j = 614,204 nnzA= 1024, nnzB=1024 +tid=0, i,j = 485,250 nnzA= 1024, nnzB=1024 +tid=0, i,j = 863,202 nnzA= 1024, nnzB=1024 +tid=0, i,j = 500,241 nnzA= 1024, nnzB=1024 +tid=0, i,j = 233,248 nnzA= 1024, nnzB=1024 +tid=0, i,j = 708,251 nnzA= 1024, nnzB=1024 +tid=0, i,j = 551,231 nnzA= 1024, nnzB=1024 +tid=0, i,j = 326,246 nnzA= 1024, nnzB=1024 +tid=0, i,j = 1004,244 nnzA= 1024, nnzB=1024 +tid=0, i,j = 743,228 nnzA= 1024, nnzB=1024 +tid=0, i,j = 40,254 nnzA= 1024, nnzB=1024 +tid=0, i,j = 494,245 nnzA= 1024, nnzB=1024 +tid=0, i,j = 588,244 nnzA= 1024, nnzB=1024 +tid=0, i,j = 522,235 nnzA= 1024, nnzB=1024 +tid=0, i,j = 238,254 nnzA= 1024, nnzB=1024 +tid=0, i,j = 388,240 nnzA= 1024, nnzB=1024 +tid=0, i,j = 382,230 nnzA= 1024, nnzB=1024 +tid=0, i,j = 147,247 nnzA= 1024, nnzB=1024 +tid=0, i,j = 82,239 nnzA= 1024, nnzB=1024 +tid=0, i,j = 485,253 nnzA= 1024, nnzB=1024 +tid=0, i,j = 300,243 nnzA= 1024, nnzB=1024 +tid=0, i,j = 124,242 nnzA= 1024, nnzB=1024 +tid=0, i,j = 115,247 nnzA= 1024, nnzB=1024 +tid=0, i,j = 621,242 nnzA= 1024, nnzB=1024 +tid=0, i,j = 570,233 nnzA= 1024, nnzB=1024 +tid=0, i,j = 964,238 nnzA= 1024, nnzB=1024 +tid=0, i,j = 352,266 nnzA= 1024, nnzB=1024 +tid=0, i,j = 461,257 nnzA= 1024, nnzB=1024 +tid=0, i,j = 895,255 nnzA= 1024, nnzB=1024 +tid=0, i,j = 884,260 nnzA= 1024, nnzB=1024 +tid=0, i,j = 32,285 nnzA= 1024, nnzB=1024 +tid=0, i,j = 193,242 nnzA= 1024, nnzB=1024 +tid=0, i,j = 221,236 nnzA= 1024, nnzB=1024 +tid=0, i,j = 1007,283 nnzA= 1024, nnzB=1024 +tid=0, i,j = 520,234 nnzA= 1024, nnzB=1024 +tid=0, i,j = 877,270 nnzA= 1024, nnzB=1024 +tid=0, i,j = 179,282 nnzA= 1024, nnzB=1024 +tid=0, i,j = 595,284 nnzA= 1024, nnzB=1024 +tid=0, i,j = 877,279 nnzA= 1024, nnzB=1024 +tid=0, i,j = 233,258 nnzA= 1024, nnzB=1024 +tid=0, i,j = 333,276 nnzA= 1024, nnzB=1024 +tid=0, i,j = 114,256 nnzA= 1024, nnzB=1024 +tid=0, i,j = 377,277 nnzA= 1024, nnzB=1024 +tid=0, i,j = 126,287 nnzA= 1024, nnzB=1024 +tid=0, i,j = 979,274 nnzA= 1024, nnzB=1024 +tid=0, i,j = 755,264 nnzA= 1024, nnzB=1024 +tid=0, i,j = 394,287 nnzA= 1024, nnzB=1024 +tid=0, i,j = 449,281 nnzA= 1024, nnzB=1024 +tid=0, i,j = 37,286 nnzA= 1024, nnzB=1024 +tid=0, i,j = 234,268 nnzA= 1024, nnzB=1024 +tid=0, i,j = 400,269 nnzA= 1024, nnzB=1024 +tid=0, i,j = 796,257 nnzA= 1024, nnzB=1024 +tid=0, i,j = 924,270 nnzA= 1024, nnzB=1024 +tid=0, i,j = 100,273 nnzA= 1024, nnzB=1024 +tid=0, i,j = 67,272 nnzA= 1024, nnzB=1024 +tid=0, i,j = 945,261 nnzA= 1024, nnzB=1024 +tid=0, i,j = 18,280 nnzA= 1024, nnzB=1024 +tid=0, i,j = 10,267 nnzA= 1024, nnzB=1024 +tid=0, i,j = 245,296 nnzA= 1024, nnzB=1024 +tid=0, i,j = 594,291 nnzA= 1024, nnzB=1024 +tid=0, i,j = 848,288 nnzA= 1024, nnzB=1024 +tid=0, i,j = 960,294 nnzA= 1024, nnzB=1024 +tid=0, i,j = 571,314 nnzA= 1024, nnzB=1024 +tid=0, i,j = 124,265 nnzA= 1024, nnzB=1024 +tid=0, i,j = 944,271 nnzA= 1024, nnzB=1024 +tid=0, i,j = 61,312 nnzA= 1024, nnzB=1024 +tid=0, i,j = 368,262 nnzA= 1024, nnzB=1024 +tid=0, i,j = 758,313 nnzA= 1024, nnzB=1024 +tid=0, i,j = 554,300 nnzA= 1024, nnzB=1024 +tid=0, i,j = 328,309 nnzA= 1024, nnzB=1024 +tid=0, i,j = 466,294 nnzA= 1024, nnzB=1024 +tid=0, i,j = 264,307 nnzA= 1024, nnzB=1024 +tid=0, i,j = 355,311 nnzA= 1024, nnzB=1024 +tid=0, i,j = 298,315 nnzA= 1024, nnzB=1024 +tid=0, i,j = 28,308 nnzA= 1024, nnzB=1024 +tid=0, i,j = 622,305 nnzA= 1024, nnzB=1024 +tid=0, i,j = 317,290 nnzA= 1024, nnzB=1024 +tid=0, i,j = 109,296 nnzA= 1024, nnzB=1024 +tid=0, i,j = 562,292 nnzA= 1024, nnzB=1024 +tid=0, i,j = 1006,297 nnzA= 1024, nnzB=1024 +tid=0, i,j = 177,315 nnzA= 1024, nnzB=1024 +tid=0, i,j = 741,315 nnzA= 1024, nnzB=1024 +tid=0, i,j = 159,299 nnzA= 1024, nnzB=1024 +tid=0, i,j = 357,310 nnzA= 1024, nnzB=1024 +tid=0, i,j = 229,304 nnzA= 1024, nnzB=1024 +tid=0, i,j = 645,303 nnzA= 1024, nnzB=1024 +tid=0, i,j = 774,301 nnzA= 1024, nnzB=1024 +tid=0, i,j = 1,295 nnzA= 1024, nnzB=1024 +tid=0, i,j = 627,309 nnzA= 1024, nnzB=1024 +tid=0, i,j = 912,297 nnzA= 1024, nnzB=1024 +tid=0, i,j = 30,302 nnzA= 1024, nnzB=1024 +tid=0, i,j = 183,296 nnzA= 1024, nnzB=1024 +tid=0, i,j = 106,295 nnzA= 1024, nnzB=1024 +tid=0, i,j = 126,319 nnzA= 1024, nnzB=1024 +tid=0, i,j = 549,324 nnzA= 1024, nnzB=1024 +tid=0, i,j = 595,318 nnzA= 1024, nnzB=1024 +tid=0, i,j = 235,322 nnzA= 1024, nnzB=1024 +tid=0, i,j = 375,323 nnzA= 1024, nnzB=1024 +tid=0, i,j = 308,316 nnzA= 1024, nnzB=1024 +tid=0, i,j = 651,323 nnzA= 1024, nnzB=1024 +tid=0, i,j = 487,325 nnzA= 1024, nnzB=1024 +tid=0, i,j = 323,317 nnzA= 1024, nnzB=1024 +tid=0, i,j = 73,321 nnzA= 1024, nnzB=1024 +tid=0, i,j = 177,316 nnzA= 1024, nnzB=1024 +tid=0, i,j = 468,320 nnzA= 1024, nnzB=1024 +tid=0, i,j = 306,325 nnzA= 1024, nnzB=1024 +tid=0, i,j = 993,338 nnzA= 1024, nnzB=1024 +tid=0, i,j = 384,344 nnzA= 1024, nnzB=1024 +tid=0, i,j = 849,340 nnzA= 1024, nnzB=1024 +tid=0, i,j = 680,339 nnzA= 1024, nnzB=1024 +tid=0, i,j = 704,327 nnzA= 1024, nnzB=1024 +tid=0, i,j = 176,329 nnzA= 1024, nnzB=1024 +tid=0, i,j = 608,337 nnzA= 1024, nnzB=1024 +tid=0, i,j = 795,332 nnzA= 1024, nnzB=1024 +tid=0, i,j = 695,334 nnzA= 1024, nnzB=1024 +tid=0, i,j = 649,326 nnzA= 1024, nnzB=1024 +tid=0, i,j = 694,335 nnzA= 1024, nnzB=1024 +tid=0, i,j = 142,328 nnzA= 1024, nnzB=1024 +tid=0, i,j = 723,342 nnzA= 1024, nnzB=1024 +tid=0, i,j = 848,330 nnzA= 1024, nnzB=1024 +tid=0, i,j = 808,336 nnzA= 1024, nnzB=1024 +tid=0, i,j = 965,330 nnzA= 1024, nnzB=1024 +tid=0, i,j = 775,336 nnzA= 1024, nnzB=1024 +tid=0, i,j = 36,341 nnzA= 1024, nnzB=1024 +tid=0, i,j = 678,343 nnzA= 1024, nnzB=1024 +tid=0, i,j = 821,348 nnzA= 1024, nnzB=1024 +tid=0, i,j = 60,348 nnzA= 1024, nnzB=1024 +tid=0, i,j = 536,353 nnzA= 1024, nnzB=1024 +tid=0, i,j = 937,351 nnzA= 1024, nnzB=1024 +tid=0, i,j = 160,352 nnzA= 1024, nnzB=1024 +tid=0, i,j = 340,356 nnzA= 1024, nnzB=1024 +tid=0, i,j = 142,351 nnzA= 1024, nnzB=1024 +tid=0, i,j = 996,347 nnzA= 1024, nnzB=1024 +tid=0, i,j = 282,350 nnzA= 1024, nnzB=1024 +tid=0, i,j = 804,349 nnzA= 1024, nnzB=1024 +tid=0, i,j = 680,344 nnzA= 1024, nnzB=1024 +tid=0, i,j = 75,345 nnzA= 1024, nnzB=1024 +tid=0, i,j = 352,355 nnzA= 1024, nnzB=1024 +tid=0, i,j = 679,360 nnzA= 1024, nnzB=1024 +tid=0, i,j = 989,361 nnzA= 1024, nnzB=1024 +tid=0, i,j = 678,358 nnzA= 1024, nnzB=1024 +tid=0, i,j = 794,361 nnzA= 1024, nnzB=1024 +tid=0, i,j = 669,374 nnzA= 1024, nnzB=1024 +tid=0, i,j = 270,370 nnzA= 1024, nnzB=1024 +tid=0, i,j = 971,366 nnzA= 1024, nnzB=1024 +tid=0, i,j = 327,371 nnzA= 1024, nnzB=1024 +tid=0, i,j = 471,372 nnzA= 1024, nnzB=1024 +tid=0, i,j = 990,365 nnzA= 1024, nnzB=1024 +tid=0, i,j = 629,364 nnzA= 1024, nnzB=1024 +tid=0, i,j = 841,366 nnzA= 1024, nnzB=1024 +tid=0, i,j = 88,373 nnzA= 1024, nnzB=1024 +tid=0, i,j = 587,368 nnzA= 1024, nnzB=1024 +tid=0, i,j = 816,362 nnzA= 1024, nnzB=1024 +tid=0, i,j = 888,367 nnzA= 1024, nnzB=1024 +tid=0, i,j = 684,369 nnzA= 1024, nnzB=1024 +tid=0, i,j = 992,375 nnzA= 1024, nnzB=1024 +tid=0, i,j = 206,363 nnzA= 1024, nnzB=1024 +tid=0, i,j = 77,380 nnzA= 1024, nnzB=1024 +tid=0, i,j = 592,379 nnzA= 1024, nnzB=1024 +tid=0, i,j = 537,386 nnzA= 1024, nnzB=1024 +tid=0, i,j = 816,382 nnzA= 1024, nnzB=1024 +tid=0, i,j = 494,388 nnzA= 1024, nnzB=1024 +tid=0, i,j = 711,383 nnzA= 1024, nnzB=1024 +tid=0, i,j = 670,385 nnzA= 1024, nnzB=1024 +tid=0, i,j = 86,377 nnzA= 1024, nnzB=1024 +tid=0, i,j = 882,378 nnzA= 1024, nnzB=1024 +tid=0, i,j = 643,380 nnzA= 1024, nnzB=1024 +tid=0, i,j = 336,376 nnzA= 1024, nnzB=1024 +tid=0, i,j = 733,389 nnzA= 1024, nnzB=1024 +tid=0, i,j = 347,387 nnzA= 1024, nnzB=1024 +tid=0, i,j = 1012,381 nnzA= 1024, nnzB=1024 +tid=0, i,j = 328,389 nnzA= 1024, nnzB=1024 +tid=0, i,j = 551,390 nnzA= 1024, nnzB=1024 +tid=0, i,j = 59,391 nnzA= 1024, nnzB=1024 +tid=0, i,j = 246,398 nnzA= 1024, nnzB=1024 +tid=0, i,j = 382,400 nnzA= 1024, nnzB=1024 +tid=0, i,j = 790,401 nnzA= 1024, nnzB=1024 +tid=0, i,j = 49,405 nnzA= 1024, nnzB=1024 +tid=0, i,j = 748,420 nnzA= 1024, nnzB=1024 +tid=0, i,j = 26,415 nnzA= 1024, nnzB=1024 +tid=0, i,j = 177,422 nnzA= 1024, nnzB=1024 +tid=0, i,j = 645,396 nnzA= 1024, nnzB=1024 +tid=0, i,j = 692,394 nnzA= 1024, nnzB=1024 +tid=0, i,j = 835,397 nnzA= 1024, nnzB=1024 +tid=0, i,j = 643,411 nnzA= 1024, nnzB=1024 +tid=0, i,j = 366,417 nnzA= 1024, nnzB=1024 +tid=0, i,j = 285,419 nnzA= 1024, nnzB=1024 +tid=0, i,j = 614,421 nnzA= 1024, nnzB=1024 +tid=0, i,j = 209,419 nnzA= 1024, nnzB=1024 +tid=0, i,j = 160,418 nnzA= 1024, nnzB=1024 +tid=0, i,j = 172,400 nnzA= 1024, nnzB=1024 +tid=0, i,j = 600,391 nnzA= 1024, nnzB=1024 +tid=0, i,j = 575,416 nnzA= 1024, nnzB=1024 +tid=0, i,j = 669,418 nnzA= 1024, nnzB=1024 +tid=0, i,j = 641,404 nnzA= 1024, nnzB=1024 +tid=0, i,j = 320,407 nnzA= 1024, nnzB=1024 +tid=0, i,j = 94,413 nnzA= 1024, nnzB=1024 +tid=0, i,j = 949,412 nnzA= 1024, nnzB=1024 +tid=0, i,j = 475,405 nnzA= 1024, nnzB=1024 +tid=0, i,j = 320,402 nnzA= 1024, nnzB=1024 +tid=0, i,j = 40,403 nnzA= 1024, nnzB=1024 +tid=0, i,j = 61,408 nnzA= 1024, nnzB=1024 +tid=0, i,j = 107,398 nnzA= 1024, nnzB=1024 +tid=0, i,j = 754,410 nnzA= 1024, nnzB=1024 +tid=0, i,j = 436,399 nnzA= 1024, nnzB=1024 +tid=0, i,j = 991,414 nnzA= 1024, nnzB=1024 +tid=0, i,j = 51,431 nnzA= 1024, nnzB=1024 +tid=0, i,j = 868,452 nnzA= 1024, nnzB=1024 +tid=0, i,j = 377,426 nnzA= 1024, nnzB=1024 +tid=0, i,j = 415,454 nnzA= 1024, nnzB=1024 +tid=0, i,j = 263,425 nnzA= 1024, nnzB=1024 +tid=0, i,j = 857,432 nnzA= 1024, nnzB=1024 +tid=0, i,j = 753,443 nnzA= 1024, nnzB=1024 +tid=0, i,j = 970,447 nnzA= 1024, nnzB=1024 +tid=0, i,j = 542,424 nnzA= 1024, nnzB=1024 +tid=0, i,j = 950,428 nnzA= 1024, nnzB=1024 +tid=0, i,j = 105,437 nnzA= 1024, nnzB=1024 +tid=0, i,j = 26,453 nnzA= 1024, nnzB=1024 +tid=0, i,j = 669,444 nnzA= 1024, nnzB=1024 +tid=0, i,j = 835,449 nnzA= 1024, nnzB=1024 +tid=0, i,j = 356,436 nnzA= 1024, nnzB=1024 +tid=0, i,j = 152,435 nnzA= 1024, nnzB=1024 +tid=0, i,j = 147,451 nnzA= 1024, nnzB=1024 +tid=0, i,j = 873,423 nnzA= 1024, nnzB=1024 +tid=0, i,j = 604,434 nnzA= 1024, nnzB=1024 +tid=0, i,j = 1017,451 nnzA= 1024, nnzB=1024 +tid=0, i,j = 744,448 nnzA= 1024, nnzB=1024 +tid=0, i,j = 338,440 nnzA= 1024, nnzB=1024 +tid=0, i,j = 982,441 nnzA= 1024, nnzB=1024 +tid=0, i,j = 579,450 nnzA= 1024, nnzB=1024 +tid=0, i,j = 880,442 nnzA= 1024, nnzB=1024 +tid=0, i,j = 718,430 nnzA= 1024, nnzB=1024 +tid=0, i,j = 814,438 nnzA= 1024, nnzB=1024 +tid=0, i,j = 646,448 nnzA= 1024, nnzB=1024 +tid=0, i,j = 149,427 nnzA= 1024, nnzB=1024 +tid=0, i,j = 952,445 nnzA= 1024, nnzB=1024 +tid=0, i,j = 305,429 nnzA= 1024, nnzB=1024 +tid=0, i,j = 741,446 nnzA= 1024, nnzB=1024 +tid=0, i,j = 849,456 nnzA= 1024, nnzB=1024 +tid=0, i,j = 43,455 nnzA= 1024, nnzB=1024 +tid=0, i,j = 668,454 nnzA= 1024, nnzB=1024 +tid=0, i,j = 917,461 nnzA= 1024, nnzB=1024 +tid=0, i,j = 510,458 nnzA= 1024, nnzB=1024 +tid=0, i,j = 316,463 nnzA= 1024, nnzB=1024 +tid=0, i,j = 985,456 nnzA= 1024, nnzB=1024 +tid=0, i,j = 650,471 nnzA= 1024, nnzB=1024 +tid=0, i,j = 382,475 nnzA= 1024, nnzB=1024 +tid=0, i,j = 900,462 nnzA= 1024, nnzB=1024 +tid=0, i,j = 889,470 nnzA= 1024, nnzB=1024 +tid=0, i,j = 218,457 nnzA= 1024, nnzB=1024 +tid=0, i,j = 194,487 nnzA= 1024, nnzB=1024 +tid=0, i,j = 44,476 nnzA= 1024, nnzB=1024 +tid=0, i,j = 342,477 nnzA= 1024, nnzB=1024 +tid=0, i,j = 517,483 nnzA= 1024, nnzB=1024 +tid=0, i,j = 836,460 nnzA= 1024, nnzB=1024 +tid=0, i,j = 667,480 nnzA= 1024, nnzB=1024 +tid=0, i,j = 762,464 nnzA= 1024, nnzB=1024 +tid=0, i,j = 127,473 nnzA= 1024, nnzB=1024 +tid=0, i,j = 673,466 nnzA= 1024, nnzB=1024 +tid=0, i,j = 355,465 nnzA= 1024, nnzB=1024 +tid=0, i,j = 849,461 nnzA= 1024, nnzB=1024 +tid=0, i,j = 112,467 nnzA= 1024, nnzB=1024 +tid=0, i,j = 801,465 nnzA= 1024, nnzB=1024 +tid=0, i,j = 680,482 nnzA= 1024, nnzB=1024 +tid=0, i,j = 487,474 nnzA= 1024, nnzB=1024 +tid=0, i,j = 558,481 nnzA= 1024, nnzB=1024 +tid=0, i,j = 961,484 nnzA= 1024, nnzB=1024 +tid=0, i,j = 288,468 nnzA= 1024, nnzB=1024 +tid=0, i,j = 737,459 nnzA= 1024, nnzB=1024 +tid=0, i,j = 121,473 nnzA= 1024, nnzB=1024 +tid=0, i,j = 786,478 nnzA= 1024, nnzB=1024 +tid=0, i,j = 1015,486 nnzA= 1024, nnzB=1024 +tid=0, i,j = 274,485 nnzA= 1024, nnzB=1024 +tid=0, i,j = 802,489 nnzA= 1024, nnzB=1024 +tid=0, i,j = 267,496 nnzA= 1024, nnzB=1024 +tid=0, i,j = 643,507 nnzA= 1024, nnzB=1024 +tid=0, i,j = 404,514 nnzA= 1024, nnzB=1024 +tid=0, i,j = 194,498 nnzA= 1024, nnzB=1024 +tid=0, i,j = 811,490 nnzA= 1024, nnzB=1024 +tid=0, i,j = 319,491 nnzA= 1024, nnzB=1024 +tid=0, i,j = 362,525 nnzA= 1024, nnzB=1024 +tid=0, i,j = 902,496 nnzA= 1024, nnzB=1024 +tid=0, i,j = 369,517 nnzA= 1024, nnzB=1024 +tid=0, i,j = 574,505 nnzA= 1024, nnzB=1024 +tid=0, i,j = 758,515 nnzA= 1024, nnzB=1024 +tid=0, i,j = 284,522 nnzA= 1024, nnzB=1024 +tid=0, i,j = 1013,521 nnzA= 1024, nnzB=1024 +tid=0, i,j = 786,519 nnzA= 1024, nnzB=1024 +tid=0, i,j = 952,499 nnzA= 1024, nnzB=1024 +tid=0, i,j = 809,495 nnzA= 1024, nnzB=1024 +tid=0, i,j = 704,501 nnzA= 1024, nnzB=1024 +tid=0, i,j = 84,500 nnzA= 1024, nnzB=1024 +tid=0, i,j = 271,513 nnzA= 1024, nnzB=1024 +tid=0, i,j = 432,494 nnzA= 1024, nnzB=1024 +tid=0, i,j = 892,512 nnzA= 1024, nnzB=1024 +tid=0, i,j = 519,503 nnzA= 1024, nnzB=1024 +tid=0, i,j = 270,520 nnzA= 1024, nnzB=1024 +tid=0, i,j = 887,504 nnzA= 1024, nnzB=1024 +tid=0, i,j = 510,504 nnzA= 1024, nnzB=1024 +tid=0, i,j = 377,492 nnzA= 1024, nnzB=1024 +tid=0, i,j = 945,524 nnzA= 1024, nnzB=1024 +tid=0, i,j = 632,523 nnzA= 1024, nnzB=1024 +tid=0, i,j = 449,508 nnzA= 1024, nnzB=1024 +tid=0, i,j = 94,525 nnzA= 1024, nnzB=1024 +tid=0, i,j = 293,518 nnzA= 1024, nnzB=1024 +tid=0, i,j = 52,526 nnzA= 1024, nnzB=1024 +tid=0, i,j = 908,531 nnzA= 1024, nnzB=1024 +tid=0, i,j = 137,542 nnzA= 1024, nnzB=1024 +tid=0, i,j = 505,534 nnzA= 1024, nnzB=1024 +tid=0, i,j = 61,527 nnzA= 1024, nnzB=1024 +tid=0, i,j = 294,528 nnzA= 1024, nnzB=1024 +tid=0, i,j = 912,547 nnzA= 1024, nnzB=1024 +tid=0, i,j = 674,533 nnzA= 1024, nnzB=1024 +tid=0, i,j = 245,540 nnzA= 1024, nnzB=1024 +tid=0, i,j = 1018,547 nnzA= 1024, nnzB=1024 +tid=0, i,j = 910,557 nnzA= 1024, nnzB=1024 +tid=0, i,j = 46,548 nnzA= 1024, nnzB=1024 +tid=0, i,j = 500,536 nnzA= 1024, nnzB=1024 +tid=0, i,j = 112,530 nnzA= 1024, nnzB=1024 +tid=0, i,j = 710,552 nnzA= 1024, nnzB=1024 +tid=0, i,j = 602,549 nnzA= 1024, nnzB=1024 +tid=0, i,j = 660,535 nnzA= 1024, nnzB=1024 +tid=0, i,j = 391,553 nnzA= 1024, nnzB=1024 +tid=0, i,j = 776,535 nnzA= 1024, nnzB=1024 +tid=0, i,j = 767,547 nnzA= 1024, nnzB=1024 +tid=0, i,j = 927,550 nnzA= 1024, nnzB=1024 +tid=0, i,j = 861,538 nnzA= 1024, nnzB=1024 +tid=0, i,j = 492,538 nnzA= 1024, nnzB=1024 +tid=0, i,j = 799,537 nnzA= 1024, nnzB=1024 +tid=0, i,j = 351,554 nnzA= 1024, nnzB=1024 +tid=0, i,j = 145,529 nnzA= 1024, nnzB=1024 +tid=0, i,j = 998,529 nnzA= 1024, nnzB=1024 +tid=0, i,j = 213,546 nnzA= 1024, nnzB=1024 +tid=0, i,j = 10,555 nnzA= 1024, nnzB=1024 +tid=0, i,j = 26,556 nnzA= 1024, nnzB=1024 +tid=0, i,j = 697,548 nnzA= 1024, nnzB=1024 +tid=0, i,j = 658,545 nnzA= 1024, nnzB=1024 +tid=0, i,j = 603,574 nnzA= 1024, nnzB=1024 +tid=0, i,j = 552,558 nnzA= 1024, nnzB=1024 +tid=0, i,j = 310,564 nnzA= 1024, nnzB=1024 +tid=0, i,j = 792,560 nnzA= 1024, nnzB=1024 +tid=0, i,j = 597,561 nnzA= 1024, nnzB=1024 +tid=0, i,j = 445,581 nnzA= 1024, nnzB=1024 +tid=0, i,j = 490,565 nnzA= 1024, nnzB=1024 +tid=0, i,j = 630,572 nnzA= 1024, nnzB=1024 +tid=0, i,j = 872,569 nnzA= 1024, nnzB=1024 +tid=0, i,j = 177,582 nnzA= 1024, nnzB=1024 +tid=0, i,j = 118,583 nnzA= 1024, nnzB=1024 +tid=0, i,j = 877,563 nnzA= 1024, nnzB=1024 +tid=0, i,j = 609,564 nnzA= 1024, nnzB=1024 +tid=0, i,j = 914,588 nnzA= 1024, nnzB=1024 +tid=0, i,j = 433,585 nnzA= 1024, nnzB=1024 +tid=0, i,j = 564,566 nnzA= 1024, nnzB=1024 +tid=0, i,j = 607,566 nnzA= 1024, nnzB=1024 +tid=0, i,j = 182,580 nnzA= 1024, nnzB=1024 +tid=0, i,j = 1016,589 nnzA= 1024, nnzB=1024 +tid=0, i,j = 465,570 nnzA= 1024, nnzB=1024 +tid=0, i,j = 271,571 nnzA= 1024, nnzB=1024 +tid=0, i,j = 254,587 nnzA= 1024, nnzB=1024 +tid=0, i,j = 919,571 nnzA= 1024, nnzB=1024 +tid=0, i,j = 274,579 nnzA= 1024, nnzB=1024 +tid=0, i,j = 862,562 nnzA= 1024, nnzB=1024 +tid=0, i,j = 399,584 nnzA= 1024, nnzB=1024 +tid=0, i,j = 256,576 nnzA= 1024, nnzB=1024 +tid=0, i,j = 182,562 nnzA= 1024, nnzB=1024 +tid=0, i,j = 478,601 nnzA= 1024, nnzB=1024 +tid=0, i,j = 127,599 nnzA= 1024, nnzB=1024 +tid=0, i,j = 95,590 nnzA= 1024, nnzB=1024 +tid=0, i,j = 832,608 nnzA= 1024, nnzB=1024 +tid=0, i,j = 794,610 nnzA= 1024, nnzB=1024 +tid=0, i,j = 759,611 nnzA= 1024, nnzB=1024 +tid=0, i,j = 5,620 nnzA= 1024, nnzB=1024 +tid=0, i,j = 527,592 nnzA= 1024, nnzB=1024 +tid=0, i,j = 41,621 nnzA= 1024, nnzB=1024 +tid=0, i,j = 996,619 nnzA= 1024, nnzB=1024 +tid=0, i,j = 928,616 nnzA= 1024, nnzB=1024 +tid=0, i,j = 978,591 nnzA= 1024, nnzB=1024 +tid=0, i,j = 802,590 nnzA= 1024, nnzB=1024 +tid=0, i,j = 398,613 nnzA= 1024, nnzB=1024 +tid=0, i,j = 759,603 nnzA= 1024, nnzB=1024 +tid=0, i,j = 218,602 nnzA= 1024, nnzB=1024 +tid=0, i,j = 76,605 nnzA= 1024, nnzB=1024 +tid=0, i,j = 339,600 nnzA= 1024, nnzB=1024 +tid=0, i,j = 270,604 nnzA= 1024, nnzB=1024 +tid=0, i,j = 787,595 nnzA= 1024, nnzB=1024 +tid=0, i,j = 337,620 nnzA= 1024, nnzB=1024 +tid=0, i,j = 430,594 nnzA= 1024, nnzB=1024 +tid=0, i,j = 143,593 nnzA= 1024, nnzB=1024 +tid=0, i,j = 386,614 nnzA= 1024, nnzB=1024 +tid=0, i,j = 115,615 nnzA= 1024, nnzB=1024 +tid=0, i,j = 677,596 nnzA= 1024, nnzB=1024 +tid=0, i,j = 287,609 nnzA= 1024, nnzB=1024 +tid=0, i,j = 788,598 nnzA= 1024, nnzB=1024 +tid=0, i,j = 30,617 nnzA= 1024, nnzB=1024 +tid=0, i,j = 930,606 nnzA= 1024, nnzB=1024 +tid=0, i,j = 361,618 nnzA= 1024, nnzB=1024 +tid=0, i,j = 1006,612 nnzA= 1024, nnzB=1024 +tid=0, i,j = 716,637 nnzA= 1024, nnzB=1024 +tid=0, i,j = 341,630 nnzA= 1024, nnzB=1024 +tid=0, i,j = 396,628 nnzA= 1024, nnzB=1024 +tid=0, i,j = 957,650 nnzA= 1024, nnzB=1024 +tid=0, i,j = 44,623 nnzA= 1024, nnzB=1024 +tid=0, i,j = 19,624 nnzA= 1024, nnzB=1024 +tid=0, i,j = 617,636 nnzA= 1024, nnzB=1024 +tid=0, i,j = 113,638 nnzA= 1024, nnzB=1024 +tid=0, i,j = 934,643 nnzA= 1024, nnzB=1024 +tid=0, i,j = 973,652 nnzA= 1024, nnzB=1024 +tid=0, i,j = 614,649 nnzA= 1024, nnzB=1024 +tid=0, i,j = 49,631 nnzA= 1024, nnzB=1024 +tid=0, i,j = 73,632 nnzA= 1024, nnzB=1024 +tid=0, i,j = 242,624 nnzA= 1024, nnzB=1024 +tid=0, i,j = 79,623 nnzA= 1024, nnzB=1024 +tid=0, i,j = 585,631 nnzA= 1024, nnzB=1024 +tid=0, i,j = 683,625 nnzA= 1024, nnzB=1024 +tid=0, i,j = 912,634 nnzA= 1024, nnzB=1024 +tid=0, i,j = 457,641 nnzA= 1024, nnzB=1024 +tid=0, i,j = 882,629 nnzA= 1024, nnzB=1024 +tid=0, i,j = 281,651 nnzA= 1024, nnzB=1024 +tid=0, i,j = 966,623 nnzA= 1024, nnzB=1024 +tid=0, i,j = 524,624 nnzA= 1024, nnzB=1024 +tid=0, i,j = 783,647 nnzA= 1024, nnzB=1024 +tid=0, i,j = 616,639 nnzA= 1024, nnzB=1024 +tid=0, i,j = 837,640 nnzA= 1024, nnzB=1024 +tid=0, i,j = 367,638 nnzA= 1024, nnzB=1024 +tid=0, i,j = 285,637 nnzA= 1024, nnzB=1024 +tid=0, i,j = 195,648 nnzA= 1024, nnzB=1024 +tid=0, i,j = 51,626 nnzA= 1024, nnzB=1024 +tid=0, i,j = 361,627 nnzA= 1024, nnzB=1024 +tid=0, i,j = 882,635 nnzA= 1024, nnzB=1024 +tid=0, i,j = 158,664 nnzA= 1024, nnzB=1024 +tid=0, i,j = 503,666 nnzA= 1024, nnzB=1024 +tid=0, i,j = 819,674 nnzA= 1024, nnzB=1024 +tid=0, i,j = 819,672 nnzA= 1024, nnzB=1024 +tid=0, i,j = 60,653 nnzA= 1024, nnzB=1024 +tid=0, i,j = 117,682 nnzA= 1024, nnzB=1024 +tid=0, i,j = 910,655 nnzA= 1024, nnzB=1024 +tid=0, i,j = 454,675 nnzA= 1024, nnzB=1024 +tid=0, i,j = 951,667 nnzA= 1024, nnzB=1024 +tid=0, i,j = 699,681 nnzA= 1024, nnzB=1024 +tid=0, i,j = 300,669 nnzA= 1024, nnzB=1024 +tid=0, i,j = 899,679 nnzA= 1024, nnzB=1024 +tid=0, i,j = 907,684 nnzA= 1024, nnzB=1024 +tid=0, i,j = 77,665 nnzA= 1024, nnzB=1024 +tid=0, i,j = 141,671 nnzA= 1024, nnzB=1024 +tid=0, i,j = 110,683 nnzA= 1024, nnzB=1024 +tid=0, i,j = 349,656 nnzA= 1024, nnzB=1024 +tid=0, i,j = 591,660 nnzA= 1024, nnzB=1024 +tid=0, i,j = 365,668 nnzA= 1024, nnzB=1024 +tid=0, i,j = 333,653 nnzA= 1024, nnzB=1024 +tid=0, i,j = 802,678 nnzA= 1024, nnzB=1024 +tid=0, i,j = 36,674 nnzA= 1024, nnzB=1024 +tid=0, i,j = 17,659 nnzA= 1024, nnzB=1024 +tid=0, i,j = 605,654 nnzA= 1024, nnzB=1024 +tid=0, i,j = 289,677 nnzA= 1024, nnzB=1024 +tid=0, i,j = 767,663 nnzA= 1024, nnzB=1024 +tid=0, i,j = 512,661 nnzA= 1024, nnzB=1024 +tid=0, i,j = 398,680 nnzA= 1024, nnzB=1024 +tid=0, i,j = 647,677 nnzA= 1024, nnzB=1024 +tid=0, i,j = 565,671 nnzA= 1024, nnzB=1024 +tid=0, i,j = 390,681 nnzA= 1024, nnzB=1024 +tid=0, i,j = 242,676 nnzA= 1024, nnzB=1024 +tid=0, i,j = 702,695 nnzA= 1024, nnzB=1024 +tid=0, i,j = 17,685 nnzA= 1024, nnzB=1024 +tid=0, i,j = 287,688 nnzA= 1024, nnzB=1024 +tid=0, i,j = 152,699 nnzA= 1024, nnzB=1024 +tid=0, i,j = 631,698 nnzA= 1024, nnzB=1024 +tid=0, i,j = 374,704 nnzA= 1024, nnzB=1024 +tid=0, i,j = 511,706 nnzA= 1024, nnzB=1024 +tid=0, i,j = 475,714 nnzA= 1024, nnzB=1024 +tid=0, i,j = 76,707 nnzA= 1024, nnzB=1024 +tid=0, i,j = 323,701 nnzA= 1024, nnzB=1024 +tid=0, i,j = 102,696 nnzA= 1024, nnzB=1024 +tid=0, i,j = 392,691 nnzA= 1024, nnzB=1024 +tid=0, i,j = 777,702 nnzA= 1024, nnzB=1024 +tid=0, i,j = 751,715 nnzA= 1024, nnzB=1024 +tid=0, i,j = 107,710 nnzA= 1024, nnzB=1024 +tid=0, i,j = 172,715 nnzA= 1024, nnzB=1024 +tid=0, i,j = 462,711 nnzA= 1024, nnzB=1024 +tid=0, i,j = 502,689 nnzA= 1024, nnzB=1024 +tid=0, i,j = 299,690 nnzA= 1024, nnzB=1024 +tid=0, i,j = 202,686 nnzA= 1024, nnzB=1024 +tid=0, i,j = 873,709 nnzA= 1024, nnzB=1024 +tid=0, i,j = 579,705 nnzA= 1024, nnzB=1024 +tid=0, i,j = 378,694 nnzA= 1024, nnzB=1024 +tid=0, i,j = 600,692 nnzA= 1024, nnzB=1024 +tid=0, i,j = 840,700 nnzA= 1024, nnzB=1024 +tid=0, i,j = 293,710 nnzA= 1024, nnzB=1024 +tid=0, i,j = 45,687 nnzA= 1024, nnzB=1024 +tid=0, i,j = 925,708 nnzA= 1024, nnzB=1024 +tid=0, i,j = 259,708 nnzA= 1024, nnzB=1024 +tid=0, i,j = 872,709 nnzA= 1024, nnzB=1024 +tid=0, i,j = 210,711 nnzA= 1024, nnzB=1024 +tid=0, i,j = 132,703 nnzA= 1024, nnzB=1024 +tid=0, i,j = 934,722 nnzA= 1024, nnzB=1024 +tid=0, i,j = 697,716 nnzA= 1024, nnzB=1024 +tid=0, i,j = 337,726 nnzA= 1024, nnzB=1024 +tid=0, i,j = 848,718 nnzA= 1024, nnzB=1024 +tid=0, i,j = 480,724 nnzA= 1024, nnzB=1024 +tid=0, i,j = 574,734 nnzA= 1024, nnzB=1024 +tid=0, i,j = 630,741 nnzA= 1024, nnzB=1024 +tid=0, i,j = 797,728 nnzA= 1024, nnzB=1024 +tid=0, i,j = 275,731 nnzA= 1024, nnzB=1024 +tid=0, i,j = 763,733 nnzA= 1024, nnzB=1024 +tid=0, i,j = 485,742 nnzA= 1024, nnzB=1024 +tid=0, i,j = 675,723 nnzA= 1024, nnzB=1024 +tid=0, i,j = 884,729 nnzA= 1024, nnzB=1024 +tid=0, i,j = 725,720 nnzA= 1024, nnzB=1024 +tid=0, i,j = 1023,738 nnzA= 1024, nnzB=1024 +tid=0, i,j = 186,743 nnzA= 1024, nnzB=1024 +tid=0, i,j = 912,737 nnzA= 1024, nnzB=1024 +tid=0, i,j = 234,717 nnzA= 1024, nnzB=1024 +tid=0, i,j = 910,732 nnzA= 1024, nnzB=1024 +tid=0, i,j = 331,719 nnzA= 1024, nnzB=1024 +tid=0, i,j = 365,741 nnzA= 1024, nnzB=1024 +tid=0, i,j = 335,739 nnzA= 1024, nnzB=1024 +tid=0, i,j = 201,720 nnzA= 1024, nnzB=1024 +tid=0, i,j = 806,721 nnzA= 1024, nnzB=1024 +tid=0, i,j = 415,722 nnzA= 1024, nnzB=1024 +tid=0, i,j = 146,737 nnzA= 1024, nnzB=1024 +tid=0, i,j = 177,727 nnzA= 1024, nnzB=1024 +tid=0, i,j = 620,717 nnzA= 1024, nnzB=1024 +tid=0, i,j = 115,736 nnzA= 1024, nnzB=1024 +tid=0, i,j = 268,735 nnzA= 1024, nnzB=1024 +tid=0, i,j = 596,740 nnzA= 1024, nnzB=1024 +tid=0, i,j = 767,730 nnzA= 1024, nnzB=1024 +tid=0, i,j = 742,752 nnzA= 1024, nnzB=1024 +tid=0, i,j = 621,745 nnzA= 1024, nnzB=1024 +tid=0, i,j = 592,753 nnzA= 1024, nnzB=1024 +tid=0, i,j = 91,747 nnzA= 1024, nnzB=1024 +tid=0, i,j = 954,763 nnzA= 1024, nnzB=1024 +tid=0, i,j = 472,753 nnzA= 1024, nnzB=1024 +tid=0, i,j = 962,762 nnzA= 1024, nnzB=1024 +tid=0, i,j = 745,752 nnzA= 1024, nnzB=1024 +tid=0, i,j = 340,760 nnzA= 1024, nnzB=1024 +tid=0, i,j = 847,758 nnzA= 1024, nnzB=1024 +tid=0, i,j = 633,756 nnzA= 1024, nnzB=1024 +tid=0, i,j = 755,749 nnzA= 1024, nnzB=1024 +tid=0, i,j = 936,766 nnzA= 1024, nnzB=1024 +tid=0, i,j = 381,760 nnzA= 1024, nnzB=1024 +tid=0, i,j = 645,745 nnzA= 1024, nnzB=1024 +tid=0, i,j = 886,748 nnzA= 1024, nnzB=1024 +tid=0, i,j = 313,752 nnzA= 1024, nnzB=1024 +tid=0, i,j = 59,749 nnzA= 1024, nnzB=1024 +tid=0, i,j = 1007,754 nnzA= 1024, nnzB=1024 +tid=0, i,j = 915,766 nnzA= 1024, nnzB=1024 +tid=0, i,j = 348,751 nnzA= 1024, nnzB=1024 +tid=0, i,j = 913,764 nnzA= 1024, nnzB=1024 +tid=0, i,j = 392,764 nnzA= 1024, nnzB=1024 +tid=0, i,j = 273,746 nnzA= 1024, nnzB=1024 +tid=0, i,j = 500,759 nnzA= 1024, nnzB=1024 +tid=0, i,j = 56,773 nnzA= 1024, nnzB=1024 +tid=0, i,j = 372,767 nnzA= 1024, nnzB=1024 +tid=0, i,j = 773,774 nnzA= 1024, nnzB=1024 +tid=0, i,j = 307,768 nnzA= 1024, nnzB=1024 +tid=0, i,j = 487,771 nnzA= 1024, nnzB=1024 +tid=0, i,j = 103,771 nnzA= 1024, nnzB=1024 +tid=0, i,j = 458,770 nnzA= 1024, nnzB=1024 +tid=0, i,j = 614,783 nnzA= 1024, nnzB=1024 +tid=0, i,j = 115,775 nnzA= 1024, nnzB=1024 +tid=0, i,j = 462,786 nnzA= 1024, nnzB=1024 +tid=0, i,j = 893,778 nnzA= 1024, nnzB=1024 +tid=0, i,j = 809,793 nnzA= 1024, nnzB=1024 +tid=0, i,j = 793,791 nnzA= 1024, nnzB=1024 +tid=0, i,j = 273,788 nnzA= 1024, nnzB=1024 +tid=0, i,j = 816,785 nnzA= 1024, nnzB=1024 +tid=0, i,j = 54,784 nnzA= 1024, nnzB=1024 +tid=0, i,j = 696,789 nnzA= 1024, nnzB=1024 +tid=0, i,j = 955,792 nnzA= 1024, nnzB=1024 +tid=0, i,j = 899,781 nnzA= 1024, nnzB=1024 +tid=0, i,j = 636,792 nnzA= 1024, nnzB=1024 +tid=0, i,j = 880,797 nnzA= 1024, nnzB=1024 +tid=0, i,j = 256,780 nnzA= 1024, nnzB=1024 +tid=0, i,j = 537,776 nnzA= 1024, nnzB=1024 +tid=0, i,j = 876,787 nnzA= 1024, nnzB=1024 +tid=0, i,j = 644,779 nnzA= 1024, nnzB=1024 +tid=0, i,j = 892,782 nnzA= 1024, nnzB=1024 +tid=0, i,j = 399,782 nnzA= 1024, nnzB=1024 +tid=0, i,j = 392,777 nnzA= 1024, nnzB=1024 +tid=0, i,j = 347,796 nnzA= 1024, nnzB=1024 +tid=0, i,j = 986,794 nnzA= 1024, nnzB=1024 +tid=0, i,j = 471,790 nnzA= 1024, nnzB=1024 +tid=0, i,j = 656,795 nnzA= 1024, nnzB=1024 +tid=0, i,j = 1022,804 nnzA= 1024, nnzB=1024 +tid=0, i,j = 896,803 nnzA= 1024, nnzB=1024 +tid=0, i,j = 802,798 nnzA= 1024, nnzB=1024 +tid=0, i,j = 32,805 nnzA= 1024, nnzB=1024 +tid=0, i,j = 294,799 nnzA= 1024, nnzB=1024 +tid=0, i,j = 130,801 nnzA= 1024, nnzB=1024 +tid=0, i,j = 970,800 nnzA= 1024, nnzB=1024 +tid=0, i,j = 408,815 nnzA= 1024, nnzB=1024 +tid=0, i,j = 479,805 nnzA= 1024, nnzB=1024 +tid=0, i,j = 985,818 nnzA= 1024, nnzB=1024 +tid=0, i,j = 719,809 nnzA= 1024, nnzB=1024 +tid=0, i,j = 430,817 nnzA= 1024, nnzB=1024 +tid=0, i,j = 839,821 nnzA= 1024, nnzB=1024 +tid=0, i,j = 747,822 nnzA= 1024, nnzB=1024 +tid=0, i,j = 547,811 nnzA= 1024, nnzB=1024 +tid=0, i,j = 902,816 nnzA= 1024, nnzB=1024 +tid=0, i,j = 646,810 nnzA= 1024, nnzB=1024 +tid=0, i,j = 737,809 nnzA= 1024, nnzB=1024 +tid=0, i,j = 889,806 nnzA= 1024, nnzB=1024 +tid=0, i,j = 200,813 nnzA= 1024, nnzB=1024 +tid=0, i,j = 375,812 nnzA= 1024, nnzB=1024 +tid=0, i,j = 688,819 nnzA= 1024, nnzB=1024 +tid=0, i,j = 504,807 nnzA= 1024, nnzB=1024 +tid=0, i,j = 625,827 nnzA= 1024, nnzB=1024 +tid=0, i,j = 886,824 nnzA= 1024, nnzB=1024 +tid=0, i,j = 814,826 nnzA= 1024, nnzB=1024 +tid=0, i,j = 406,825 nnzA= 1024, nnzB=1024 +tid=0, i,j = 982,831 nnzA= 1024, nnzB=1024 +tid=0, i,j = 915,830 nnzA= 1024, nnzB=1024 +tid=0, i,j = 511,829 nnzA= 1024, nnzB=1024 +tid=0, i,j = 407,828 nnzA= 1024, nnzB=1024 +tid=0, i,j = 39,823 nnzA= 1024, nnzB=1024 +tid=0, i,j = 115,835 nnzA= 1024, nnzB=1024 +tid=0, i,j = 1003,832 nnzA= 1024, nnzB=1024 +tid=0, i,j = 218,836 nnzA= 1024, nnzB=1024 +tid=0, i,j = 362,833 nnzA= 1024, nnzB=1024 +tid=0, i,j = 295,834 nnzA= 1024, nnzB=1024 +tid=0, i,j = 999,833 nnzA= 1024, nnzB=1024 +tid=0, i,j = 136,834 nnzA= 1024, nnzB=1024 +tid=0, i,j = 565,837 nnzA= 1024, nnzB=1024 +tid=0, i,j = 159,840 nnzA= 1024, nnzB=1024 +tid=0, i,j = 103,841 nnzA= 1024, nnzB=1024 +tid=0, i,j = 541,839 nnzA= 1024, nnzB=1024 +tid=0, i,j = 636,841 nnzA= 1024, nnzB=1024 +tid=0, i,j = 533,846 nnzA= 1024, nnzB=1024 +tid=0, i,j = 483,848 nnzA= 1024, nnzB=1024 +tid=0, i,j = 741,847 nnzA= 1024, nnzB=1024 +tid=0, i,j = 711,839 nnzA= 1024, nnzB=1024 +tid=0, i,j = 529,852 nnzA= 1024, nnzB=1024 +tid=0, i,j = 524,843 nnzA= 1024, nnzB=1024 +tid=0, i,j = 464,849 nnzA= 1024, nnzB=1024 +tid=0, i,j = 136,842 nnzA= 1024, nnzB=1024 +tid=0, i,j = 694,845 nnzA= 1024, nnzB=1024 +tid=0, i,j = 567,851 nnzA= 1024, nnzB=1024 +tid=0, i,j = 302,850 nnzA= 1024, nnzB=1024 +tid=0, i,j = 114,844 nnzA= 1024, nnzB=1024 +tid=0, i,j = 623,853 nnzA= 1024, nnzB=1024 +tid=0, i,j = 150,852 nnzA= 1024, nnzB=1024 +tid=0, i,j = 972,865 nnzA= 1024, nnzB=1024 +tid=0, i,j = 781,858 nnzA= 1024, nnzB=1024 +tid=0, i,j = 106,855 nnzA= 1024, nnzB=1024 +tid=0, i,j = 994,858 nnzA= 1024, nnzB=1024 +tid=0, i,j = 97,866 nnzA= 1024, nnzB=1024 +tid=0, i,j = 716,859 nnzA= 1024, nnzB=1024 +tid=0, i,j = 650,857 nnzA= 1024, nnzB=1024 +tid=0, i,j = 1022,864 nnzA= 1024, nnzB=1024 +tid=0, i,j = 508,859 nnzA= 1024, nnzB=1024 +tid=0, i,j = 21,863 nnzA= 1024, nnzB=1024 +tid=0, i,j = 1014,856 nnzA= 1024, nnzB=1024 +tid=0, i,j = 636,862 nnzA= 1024, nnzB=1024 +tid=0, i,j = 151,857 nnzA= 1024, nnzB=1024 +tid=0, i,j = 48,867 nnzA= 1024, nnzB=1024 +tid=0, i,j = 506,870 nnzA= 1024, nnzB=1024 +tid=0, i,j = 453,871 nnzA= 1024, nnzB=1024 +tid=0, i,j = 303,868 nnzA= 1024, nnzB=1024 +tid=0, i,j = 775,872 nnzA= 1024, nnzB=1024 +tid=0, i,j = 364,869 nnzA= 1024, nnzB=1024 +tid=0, i,j = 173,873 nnzA= 1024, nnzB=1024 +tid=0, i,j = 990,877 nnzA= 1024, nnzB=1024 +tid=0, i,j = 357,876 nnzA= 1024, nnzB=1024 +tid=0, i,j = 110,888 nnzA= 1024, nnzB=1024 +tid=0, i,j = 679,891 nnzA= 1024, nnzB=1024 +tid=0, i,j = 390,896 nnzA= 1024, nnzB=1024 +tid=0, i,j = 42,897 nnzA= 1024, nnzB=1024 +tid=0, i,j = 485,874 nnzA= 1024, nnzB=1024 +tid=0, i,j = 37,895 nnzA= 1024, nnzB=1024 +tid=0, i,j = 168,875 nnzA= 1024, nnzB=1024 +tid=0, i,j = 628,890 nnzA= 1024, nnzB=1024 +tid=0, i,j = 695,895 nnzA= 1024, nnzB=1024 +tid=0, i,j = 687,885 nnzA= 1024, nnzB=1024 +tid=0, i,j = 892,889 nnzA= 1024, nnzB=1024 +tid=0, i,j = 929,883 nnzA= 1024, nnzB=1024 +tid=0, i,j = 722,877 nnzA= 1024, nnzB=1024 +tid=0, i,j = 608,882 nnzA= 1024, nnzB=1024 +tid=0, i,j = 33,894 nnzA= 1024, nnzB=1024 +tid=0, i,j = 605,901 nnzA= 1024, nnzB=1024 +tid=0, i,j = 943,888 nnzA= 1024, nnzB=1024 +tid=0, i,j = 176,880 nnzA= 1024, nnzB=1024 +tid=0, i,j = 146,901 nnzA= 1024, nnzB=1024 +tid=0, i,j = 687,900 nnzA= 1024, nnzB=1024 +tid=0, i,j = 653,892 nnzA= 1024, nnzB=1024 +tid=0, i,j = 643,884 nnzA= 1024, nnzB=1024 +tid=0, i,j = 487,887 nnzA= 1024, nnzB=1024 +tid=0, i,j = 23,881 nnzA= 1024, nnzB=1024 +tid=0, i,j = 808,904 nnzA= 1024, nnzB=1024 +tid=0, i,j = 795,905 nnzA= 1024, nnzB=1024 +tid=0, i,j = 57,902 nnzA= 1024, nnzB=1024 +tid=0, i,j = 479,906 nnzA= 1024, nnzB=1024 +tid=0, i,j = 1021,903 nnzA= 1024, nnzB=1024 +tid=0, i,j = 674,907 nnzA= 1024, nnzB=1024 +tid=0, i,j = 548,911 nnzA= 1024, nnzB=1024 +tid=0, i,j = 823,916 nnzA= 1024, nnzB=1024 +tid=0, i,j = 366,915 nnzA= 1024, nnzB=1024 +tid=0, i,j = 502,915 nnzA= 1024, nnzB=1024 +tid=0, i,j = 414,940 nnzA= 1024, nnzB=1024 +tid=0, i,j = 109,944 nnzA= 1024, nnzB=1024 +tid=0, i,j = 13,930 nnzA= 1024, nnzB=1024 +tid=0, i,j = 165,934 nnzA= 1024, nnzB=1024 +tid=0, i,j = 966,921 nnzA= 1024, nnzB=1024 +tid=0, i,j = 31,943 nnzA= 1024, nnzB=1024 +tid=0, i,j = 538,943 nnzA= 1024, nnzB=1024 +tid=0, i,j = 216,923 nnzA= 1024, nnzB=1024 +tid=0, i,j = 420,916 nnzA= 1024, nnzB=1024 +tid=0, i,j = 456,909 nnzA= 1024, nnzB=1024 +tid=0, i,j = 399,936 nnzA= 1024, nnzB=1024 +tid=0, i,j = 748,939 nnzA= 1024, nnzB=1024 +tid=0, i,j = 970,919 nnzA= 1024, nnzB=1024 +tid=0, i,j = 755,926 nnzA= 1024, nnzB=1024 +tid=0, i,j = 351,935 nnzA= 1024, nnzB=1024 +tid=0, i,j = 480,918 nnzA= 1024, nnzB=1024 +tid=0, i,j = 924,914 nnzA= 1024, nnzB=1024 +tid=0, i,j = 685,925 nnzA= 1024, nnzB=1024 +tid=0, i,j = 167,910 nnzA= 1024, nnzB=1024 +tid=0, i,j = 496,939 nnzA= 1024, nnzB=1024 +tid=0, i,j = 860,933 nnzA= 1024, nnzB=1024 +tid=0, i,j = 356,942 nnzA= 1024, nnzB=1024 +tid=0, i,j = 215,938 nnzA= 1024, nnzB=1024 +tid=0, i,j = 608,920 nnzA= 1024, nnzB=1024 +tid=0, i,j = 586,941 nnzA= 1024, nnzB=1024 +tid=0, i,j = 479,931 nnzA= 1024, nnzB=1024 +tid=0, i,j = 538,929 nnzA= 1024, nnzB=1024 +tid=0, i,j = 671,945 nnzA= 1024, nnzB=1024 +tid=0, i,j = 1017,950 nnzA= 1024, nnzB=1024 +tid=0, i,j = 221,951 nnzA= 1024, nnzB=1024 +tid=0, i,j = 628,948 nnzA= 1024, nnzB=1024 +tid=0, i,j = 823,955 nnzA= 1024, nnzB=1024 +tid=0, i,j = 370,964 nnzA= 1024, nnzB=1024 +tid=0, i,j = 667,959 nnzA= 1024, nnzB=1024 +tid=0, i,j = 1023,967 nnzA= 1024, nnzB=1024 +tid=0, i,j = 78,960 nnzA= 1024, nnzB=1024 +tid=0, i,j = 316,949 nnzA= 1024, nnzB=1024 +tid=0, i,j = 653,956 nnzA= 1024, nnzB=1024 +tid=0, i,j = 564,969 nnzA= 1024, nnzB=1024 +tid=0, i,j = 656,957 nnzA= 1024, nnzB=1024 +tid=0, i,j = 400,968 nnzA= 1024, nnzB=1024 +tid=0, i,j = 877,962 nnzA= 1024, nnzB=1024 +tid=0, i,j = 644,958 nnzA= 1024, nnzB=1024 +tid=0, i,j = 246,946 nnzA= 1024, nnzB=1024 +tid=0, i,j = 457,955 nnzA= 1024, nnzB=1024 +tid=0, i,j = 504,965 nnzA= 1024, nnzB=1024 +tid=0, i,j = 828,961 nnzA= 1024, nnzB=1024 +tid=0, i,j = 182,947 nnzA= 1024, nnzB=1024 +tid=0, i,j = 483,966 nnzA= 1024, nnzB=1024 +tid=0, i,j = 397,963 nnzA= 1024, nnzB=1024 +tid=0, i,j = 123,978 nnzA= 1024, nnzB=1024 +tid=0, i,j = 549,972 nnzA= 1024, nnzB=1024 +tid=0, i,j = 980,976 nnzA= 1024, nnzB=1024 +tid=0, i,j = 743,985 nnzA= 1024, nnzB=1024 +tid=0, i,j = 175,981 nnzA= 1024, nnzB=1024 +tid=0, i,j = 371,980 nnzA= 1024, nnzB=1024 +tid=0, i,j = 630,972 nnzA= 1024, nnzB=1024 +tid=0, i,j = 875,971 nnzA= 1024, nnzB=1024 +tid=0, i,j = 347,977 nnzA= 1024, nnzB=1024 +tid=0, i,j = 934,974 nnzA= 1024, nnzB=1024 +tid=0, i,j = 856,970 nnzA= 1024, nnzB=1024 +tid=0, i,j = 323,986 nnzA= 1024, nnzB=1024 +tid=0, i,j = 582,984 nnzA= 1024, nnzB=1024 +tid=0, i,j = 340,987 nnzA= 1024, nnzB=1024 +tid=0, i,j = 72,985 nnzA= 1024, nnzB=1024 +tid=0, i,j = 172,988 nnzA= 1024, nnzB=1024 +tid=0, i,j = 678,998 nnzA= 1024, nnzB=1024 +tid=0, i,j = 660,991 nnzA= 1024, nnzB=1024 +tid=0, i,j = 992,994 nnzA= 1024, nnzB=1024 +tid=0, i,j = 15,992 nnzA= 1024, nnzB=1024 +tid=0, i,j = 877,1001 nnzA= 1024, nnzB=1024 +tid=0, i,j = 514,990 nnzA= 1024, nnzB=1024 +tid=0, i,j = 585,989 nnzA= 1024, nnzB=1024 +tid=0, i,j = 167,999 nnzA= 1024, nnzB=1024 +tid=0, i,j = 58,983 nnzA= 1024, nnzB=1024 +tid=0, i,j = 170,995 nnzA= 1024, nnzB=1024 +tid=0, i,j = 946,997 nnzA= 1024, nnzB=1024 +tid=0, i,j = 449,984 nnzA= 1024, nnzB=1024 +tid=0, i,j = 87,993 nnzA= 1024, nnzB=1024 +tid=0, i,j = 120,987 nnzA= 1024, nnzB=1024 +tid=0, i,j = 531,992 nnzA= 1024, nnzB=1024 +tid=0, i,j = 674,993 nnzA= 1024, nnzB=1024 +tid=0, i,j = 667,1014 nnzA= 1024, nnzB=1024 +tid=0, i,j = 159,1008 nnzA= 1024, nnzB=1024 +tid=0, i,j = 1022,1006 nnzA= 1024, nnzB=1024 +tid=0, i,j = 67,1020 nnzA= 1024, nnzB=1024 +tid=0, i,j = 613,1015 nnzA= 1024, nnzB=1024 +tid=0, i,j = 533,1012 nnzA= 1024, nnzB=1024 +tid=0, i,j = 574,1013 nnzA= 1024, nnzB=1024 +tid=0, i,j = 151,1020 nnzA= 1024, nnzB=1024 +tid=0, i,j = 250,1004 nnzA= 1024, nnzB=1024 +tid=0, i,j = 127,1015 nnzA= 1024, nnzB=1024 +tid=0, i,j = 574,1009 nnzA= 1024, nnzB=1024 +tid=0, i,j = 254,1018 nnzA= 1024, nnzB=1024 +tid=0, i,j = 286,1002 nnzA= 1024, nnzB=1024 +tid=0, i,j = 491,1022 nnzA= 1024, nnzB=1024 +tid=0, i,j = 840,1023 nnzA= 1024, nnzB=1024 +tid=0, i,j = 287,1019 nnzA= 1024, nnzB=1024 +tid=0, i,j = 180,1017 nnzA= 1024, nnzB=1024 +tid=0, i,j = 457,1016 nnzA= 1024, nnzB=1024 +tid=0, i,j = 810,1021 nnzA= 1024, nnzB=1024 +Printing bucketp +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +Done. +returned from kernel 13.5107ms + + 1024x1024 GraphBLAS int32_t matrix, sparse (jumbled) by row + sparsity control: sparse only + C GPU, 1024 entries, memory: 28.2 KB + + (0,478) 268 + (0,574) 246 + (2,376) 235 + (5,560) 278 + (6,996) 255 + (7,183) 256 + (7,666) 248 + (8,896) 255 + (9,187) 274 + (10,446) 256 + (11,46) 270 + (11,955) 284 + (12,397) 250 + (12,953) 259 + (13,192) 278 + (14,421) 267 + (15,568) 251 + (16,788) 225 + (16,904) 246 + (17,928) 240 + (18,103) 262 + (19,821) 235 + (19,886) 236 + (20,474) 267 + (21,479) 248 + (21,975) 251 + (22,569) 255 + (23,310) 272 + (24,905) 262 + ... +Not using cuda path. M_is_hypersparse: 0, A->iso: 0, B->iso: 0, A_BITMAP: 0, B_BITMAP: 0, GB_IS_FULL(A): 0, GB_IS_FULL(B): 0, semiring header size: 32 + rmm_wrap_alloc 16384 bytes + + 1024x1024 GraphBLAS int32_t matrix, sparse by row + sparsity control: sparse only + M actual, 1024 entries, memory: 28.2 KB + + (0,478) 0 + (0,574) 0 + (2,376) 1 + (5,560) 0 + (6,996) 0 + (7,183) 1 + (7,666) 1 + (8,896) 0 + (9,187) 0 + (10,446) 1 + (11,46) 1 + (11,955) 1 + (12,397) 0 + (12,953) 1 + (13,192) 1 + (14,421) 0 + (15,568) 1 + (16,788) 1 + (16,904) 1 + (17,928) 0 + (18,103) 1 + (19,821) 0 + (19,886) 0 + (20,474) 0 + (21,479) 1 + (21,975) 0 + (22,569) 1 + (23,310) 0 + (24,905) 1 + ... + + 1024x1024 GraphBLAS int32_t matrix, sparse by row + sparsity control: sparse only + C GPU, 1024 entries, memory: 28.2 KB + + (0,478) 268 + (0,574) 246 + (2,376) 235 + (5,560) 278 + (6,996) 255 + (7,183) 256 + (7,666) 248 + (8,896) 255 + (9,187) 274 + (10,446) 256 + (11,46) 270 + (11,955) 284 + (12,397) 250 + (12,953) 259 + (13,192) 278 + (14,421) 267 + (15,568) 251 + (16,788) 225 + (16,904) 246 + (17,928) 240 + (18,103) 262 + (19,821) 235 + (19,886) 236 + (20,474) 267 + (21,479) 248 + (21,975) 251 + (22,569) 255 + (23,310) 272 + (24,905) 262 + (25,241) 225 + (26,428) 224 + (28,107) 228 + (28,441) 274 + (30,694) 269 + (32,121) 239 + (33,81) 249 + (34,804) 269 + (36,451) 264 + (37,609) 263 + (38,138) 258 + (39,698) 263 + (40,950) 236 + (41,568) 236 + (42,324) 238 + (43,798) 244 + (46,208) 240 + (47,70) 264 + (48,336) 277 + (49,476) 254 + (50,35) 242 + (51,556) 265 + (52,999) 247 + (53,940) 264 + (54,558) 257 + (54,960) 259 + (55,979) 251 + (56,90) 305 + (57,846) 275 + (57,893) 272 + (58,35) 260 + (59,108) 255 + (60,479) 255 + (61,590) 264 + (62,771) 259 + (63,50) 267 + (64,268) 276 + (65,694) 249 + (66,719) 261 + (67,411) 239 + (68,324) 246 + (69,477) 254 + (70,539) 241 + (71,228) 235 + (72,297) 242 + (73,665) 269 + (75,855) 227 + (76,248) 235 + (77,433) 251 + (78,90) 275 + (81,754) 270 + (82,243) 286 + (84,253) 267 + (86,104) 247 + (87,657) 255 + (89,825) 251 + (90,37) 248 + (91,234) 259 + (91,519) 276 + (92,74) 259 + (92,218) 266 + (92,690) 256 + (93,486) 268 + (94,637) 277 + (94,722) 261 + (96,564) 282 + (97,748) 245 + (99,326) 249 + (100,281) 248 + (102,609) 258 + (103,621) 277 + (104,644) 226 + (106,652) 244 + (107,239) 273 + (107,522) 234 + (108,131) 274 + (109,884) 253 + (110,402) 251 + (111,905) 256 + (112,127) 241 + (112,779) 239 + (113,278) 251 + (114,519) 264 + (115,240) 262 + (116,198) 258 + (117,219) 230 + (117,338) 251 + (118,99) 260 + (120,477) 266 + (121,554) 271 + (121,715) 291 + (122,151) 253 + (123,621) 252 + (125,177) 236 + (126,36) 275 + (128,820) 263 + (128,835) 248 + (129,660) 255 + (130,623) 246 + (130,807) 273 + (131,253) 271 + (131,355) 260 + (132,570) 264 + (133,492) 278 + (134,821) 268 + (135,295) 266 + (136,108) 263 + (137,834) 271 + (138,288) 253 + (139,284) 249 + (139,945) 286 + (140,887) 265 + (141,199) 274 + (142,87) 235 + (142,225) 261 + (143,123) 258 + (144,574) 262 + (145,552) 250 + (146,194) 244 + (146,995) 255 + (148,357) 253 + (149,949) 253 + (150,717) 255 + (151,484) 272 + (156,290) 250 + (157,714) 302 + (157,974) 274 + (158,959) 228 + (160,297) 252 + (162,601) 264 + (163,816) 271 + (164,221) 254 + (165,396) 243 + (166,801) 242 + (167,879) 234 + (168,321) 273 + (169,901) 286 + (170,612) 282 + (171,15) 253 + (172,951) 261 + (174,0) 258 + (174,595) 259 + (175,669) 254 + (176,108) 261 + (176,188) 279 + (176,614) 269 + (176,781) 255 + (177,17) 261 + (178,631) 265 + (179,932) 225 + (180,830) 258 + (182,675) 259 + (182,1001) 257 + (183,692) 240 + (184,143) 247 + (185,450) 240 + (186,779) 270 + (187,997) 256 + (188,357) 265 + (189,111) 250 + (190,990) 262 + (192,644) 269 + (192,953) 250 + (193,135) 246 + (194,137) 267 + (195,922) 276 + (197,859) 269 + (198,910) 239 + (199,531) 270 + (201,907) 253 + (202,863) 255 + (203,865) 232 + (204,614) 268 + (207,826) 239 + (208,985) 262 + (209,808) 256 + (210,659) 250 + (211,71) 236 + (211,931) 266 + (212,426) 291 + (213,152) 255 + (214,928) 264 + (215,268) 270 + (216,550) 268 + (217,921) 252 + (218,704) 246 + (218,922) 265 + (219,66) 232 + (220,704) 235 + (221,56) 280 + (221,551) 273 + (222,545) 243 + (223,1016) 249 + (224,721) 261 + (225,935) 270 + (226,727) 254 + (228,743) 240 + (229,535) 242 + (230,382) 245 + (231,551) 260 + (232,897) 273 + (233,570) 235 + (234,520) 246 + (235,522) 261 + (236,221) 244 + (237,755) 271 + (238,964) 243 + (239,82) 243 + (240,388) 238 + (241,500) 276 + (242,124) 240 + (242,193) 243 + (242,621) 243 + (243,300) 254 + (244,588) 256 + (244,1004) 265 + (245,494) 253 + (246,326) 262 + (247,115) 263 + (247,147) 263 + (248,233) 224 + (250,485) 259 + (251,708) 262 + (252,197) 237 + (253,485) 256 + (254,40) 243 + (254,238) 261 + (255,895) 243 + (256,114) 268 + (257,461) 250 + (257,796) 237 + (258,233) 236 + (260,884) 257 + (261,945) 279 + (262,368) 260 + (264,755) 251 + (265,124) 253 + (266,352) 255 + (267,10) 238 + (268,234) 248 + (269,400) 248 + (270,877) 259 + (270,924) 231 + (271,944) 245 + (272,67) 253 + (273,100) 273 + (274,979) 284 + (276,333) 258 + (277,377) 245 + (279,877) 252 + (280,18) 242 + (281,449) 240 + (282,179) 259 + (283,1007) 244 + (284,595) 271 + (285,32) 231 + (286,37) 245 + (287,126) 299 + (287,394) 257 + (288,848) 267 + (290,317) 257 + (291,594) 264 + (292,562) 257 + (294,466) 265 + (294,960) 262 + (295,1) 245 + (295,106) 252 + (296,109) 245 + (296,183) 243 + (296,245) 238 + (297,912) 281 + (297,1006) 269 + (299,159) 271 + (300,554) 260 + (301,774) 240 + (302,30) 273 + (303,645) 243 + (304,229) 263 + (305,622) 282 + (307,264) 267 + (308,28) 241 + (309,328) 249 + (309,627) 280 + (310,357) 234 + (311,355) 243 + (312,61) 239 + (313,758) 265 + (314,571) 268 + (315,177) 236 + (315,298) 244 + (315,741) 236 + (316,177) 226 + (316,308) 279 + (317,323) 245 + (318,595) 288 + (319,126) 281 + (320,468) 260 + (321,73) 267 + (322,235) 246 + (323,375) 233 + (323,651) 255 + (324,549) 239 + (325,306) 246 + (325,487) 279 + (326,649) 272 + (327,704) 246 + (328,142) 271 + (329,176) 257 + (330,848) 249 + (330,965) 244 + (332,795) 265 + (334,695) 275 + (335,694) 236 + (336,775) 251 + (336,808) 231 + (337,608) 236 + (338,993) 243 + (339,680) 277 + (340,849) 251 + (341,36) 273 + (342,723) 252 + (343,678) 235 + (344,384) 255 + (344,680) 248 + (345,75) 252 + (347,996) 264 + (348,60) 280 + (348,821) 297 + (349,804) 265 + (350,282) 254 + (351,142) 272 + (351,937) 275 + (352,160) 256 + (353,536) 260 + (355,352) 264 + (356,340) 243 + (358,678) 257 + (360,679) 276 + (361,794) 255 + (361,989) 264 + (362,816) 295 + (363,206) 250 + (364,629) 267 + (365,990) 269 + (366,841) 262 + (366,971) 261 + (367,888) 315 + (368,587) 245 + (369,684) 261 + (370,270) 253 + (371,327) 257 + (372,471) 258 + (373,88) 246 + (374,669) 242 + (375,992) 241 + (376,336) 259 + (377,86) 292 + (378,882) 270 + (379,592) 264 + (380,77) 258 + (380,643) 240 + (381,1012) 255 + (382,816) 253 + (383,711) 240 + (385,670) 249 + (386,537) 255 + (387,347) 240 + (388,494) 268 + (389,328) 235 + (389,733) 237 + (390,551) 269 + (391,59) 254 + (391,600) 270 + (394,692) 247 + (396,645) 233 + (397,835) 259 + (398,107) 261 + (398,246) 264 + (399,436) 267 + (400,172) 260 + (400,382) 240 + (401,790) 245 + (402,320) 258 + (403,40) 257 + (404,641) 250 + (405,49) 269 + (405,475) 257 + (407,320) 277 + (408,61) 253 + (410,754) 239 + (411,643) 269 + (412,949) 260 + (413,94) 254 + (414,991) 257 + (415,26) 244 + (416,575) 254 + (417,366) 232 + (418,160) 258 + (418,669) 266 + (419,209) 252 + (419,285) 266 + (420,748) 277 + (421,614) 258 + (422,177) 217 + (423,873) 251 + (424,542) 258 + (425,263) 247 + (426,377) 261 + (427,149) 236 + (428,950) 246 + (429,305) 277 + (430,718) 237 + (431,51) 246 + (432,857) 246 + (434,604) 248 + (435,152) 248 + (436,356) 286 + (437,105) 235 + (438,814) 254 + (440,338) 251 + (441,982) 259 + (442,880) 244 + (443,753) 273 + (444,669) 240 + (445,952) 236 + (446,741) 264 + (447,970) 247 + (448,646) 244 + (448,744) 237 + (449,835) 286 + (450,579) 241 + (451,147) 258 + (451,1017) 257 + (452,868) 247 + (453,26) 262 + (454,415) 236 + (454,668) 249 + (455,43) 247 + (456,849) 270 + (456,985) 251 + (457,218) 266 + (458,510) 282 + (459,737) 250 + (460,836) 269 + (461,849) 263 + (461,917) 270 + (462,900) 262 + (463,316) 256 + (464,762) 250 + (465,355) 262 + (465,801) 254 + (466,673) 247 + (467,112) 260 + (468,288) 261 + (470,889) 248 + (471,650) 269 + (473,121) 239 + (473,127) 251 + (474,487) 265 + (475,382) 218 + (476,44) 258 + (477,342) 257 + (478,786) 267 + (480,667) 244 + (481,558) 252 + (482,680) 224 + (483,517) 270 + (484,961) 276 + (485,274) 249 + (486,1015) 262 + (487,194) 241 + (489,802) 252 + (490,811) 260 + (491,319) 254 + (492,377) 242 + (494,432) 207 + (495,809) 292 + (496,267) 255 + (496,902) 247 + (498,194) 244 + (499,952) 273 + (500,84) 259 + (501,704) 233 + (503,519) 278 + (504,510) 264 + (504,887) 262 + (505,574) 285 + (507,643) 259 + (508,449) 241 + (512,892) 253 + (513,271) 242 + (514,404) 276 + (515,758) 263 + (517,369) 271 + (518,293) 245 + (519,786) 261 + (520,270) 256 + (521,1013) 259 + (522,284) 262 + (523,632) 265 + (524,945) 273 + (525,94) 249 + (525,362) 257 + (526,52) 282 + (527,61) 242 + (528,294) 274 + (529,145) 248 + (529,998) 261 + (530,112) 253 + (531,908) 249 + (533,674) 252 + (534,505) 227 + (535,660) 261 + (535,776) 265 + (536,500) 274 + (537,799) 258 + (538,492) 241 + (538,861) 258 + (540,245) 272 + (542,137) 268 + (545,658) 246 + (546,213) 272 + (547,767) 255 + (547,912) 279 + (547,1018) 252 + (548,46) 261 + (548,697) 265 + (549,602) 257 + (550,927) 277 + (552,710) 271 + (553,391) 244 + (554,351) 227 + (555,10) 235 + (556,26) 238 + (557,910) 255 + (558,552) 261 + (560,792) 265 + (561,597) 257 + (562,182) 264 + (562,862) 261 + (563,877) 276 + (564,310) 259 + (564,609) 251 + (565,490) 251 + (566,564) 263 + (566,607) 251 + (569,872) 279 + (570,465) 263 + (571,271) 271 + (571,919) 243 + (572,630) 237 + (574,603) 272 + (576,256) 284 + (579,274) 236 + (580,182) 252 + (581,445) 251 + (582,177) 196 + (583,118) 280 + (584,399) 250 + (585,433) 244 + (587,254) 237 + (588,914) 254 + (589,1016) 269 + (590,95) 277 + (590,802) 279 + (591,978) 265 + (592,527) 245 + (593,143) 276 + (594,430) 232 + (595,787) 261 + (596,677) 247 + (598,788) 250 + (599,127) 228 + (600,339) 249 + (601,478) 271 + (602,218) 271 + (603,759) 242 + (604,270) 247 + (605,76) 243 + (606,930) 257 + (608,832) 267 + (609,287) 265 + (610,794) 256 + (611,759) 247 + (612,1006) 282 + (613,398) 239 + (614,386) 259 + (615,115) 264 + (616,928) 254 + (617,30) 260 + (618,361) 243 + (619,996) 222 + (620,5) 248 + (620,337) 256 + (621,41) 251 + (623,44) 267 + (623,79) 252 + (623,966) 263 + (624,19) 270 + (624,242) 258 + (624,524) 244 + (625,683) 288 + (626,51) 242 + (627,361) 257 + (628,396) 248 + (629,882) 260 + (630,341) 237 + (631,49) 238 + (631,585) 234 + (632,73) 268 + (634,912) 278 + (635,882) 266 + (636,617) 252 + (637,285) 251 + (637,716) 275 + (638,113) 274 + (638,367) 254 + (639,616) 258 + (640,837) 234 + (641,457) 251 + (643,934) 265 + (647,783) 240 + (648,195) 270 + (649,614) 239 + (650,957) 265 + (651,281) 252 + (652,973) 267 + (653,60) 249 + (653,333) 268 + (654,605) 272 + (655,910) 234 + (656,349) 255 + (659,17) 250 + (660,591) 275 + (661,512) 277 + (663,767) 258 + (664,158) 224 + (665,77) 239 + (666,503) 248 + (667,951) 261 + (668,365) 278 + (669,300) 273 + (671,141) 272 + (671,565) 285 + (672,819) 223 + (674,36) 249 + (674,819) 249 + (675,454) 234 + (676,242) 263 + (677,289) 278 + (677,647) 255 + (678,802) 240 + (679,899) 242 + (680,398) 266 + (681,390) 266 + (681,699) 233 + (682,117) 246 + (683,110) 265 + (684,907) 243 + (685,17) 239 + (686,202) 255 + (687,45) 222 + (688,287) 242 + (689,502) 257 + (690,299) 252 + (691,392) 256 + (692,600) 264 + (694,378) 243 + (695,702) 271 + (696,102) 251 + (698,631) 252 + (699,152) 272 + (700,840) 267 + (701,323) 239 + (702,777) 232 + (703,132) 264 + (704,374) 261 + (705,579) 254 + (706,511) 233 + (707,76) 261 + (708,259) 269 + (708,925) 266 + (709,872) 269 + (709,873) 265 + (710,107) 235 + (710,293) 266 + (711,210) 257 + (711,462) 267 + (714,475) 245 + (715,172) 253 + (715,751) 241 + (716,697) 249 + (717,234) 239 + (717,620) 244 + (718,848) 260 + (719,331) 265 + (720,201) 255 + (720,725) 272 + (721,806) 262 + (722,415) 239 + (722,934) 262 + (723,675) 249 + (724,480) 259 + (726,337) 259 + (727,177) 237 + (728,797) 272 + (729,884) 241 + (730,767) 249 + (731,275) 275 + (732,910) 231 + (733,763) 283 + (734,574) 263 + (735,268) 253 + (736,115) 218 + (737,146) 238 + (737,912) 249 + (738,1023) 252 + (739,335) 259 + (740,596) 233 + (741,365) 270 + (741,630) 256 + (742,485) 250 + (743,186) 252 + (745,621) 250 + (745,645) 246 + (746,273) 276 + (747,91) 256 + (748,886) 245 + (749,59) 273 + (749,755) 254 + (751,348) 253 + (752,313) 255 + (752,742) 277 + (752,745) 260 + (753,472) 260 + (753,592) 249 + (754,1007) 234 + (756,633) 255 + (758,847) 268 + (759,500) 253 + (760,340) 251 + (760,381) 270 + (762,962) 270 + (763,954) 236 + (764,392) 236 + (764,913) 258 + (766,915) 265 + (766,936) 259 + (767,372) 266 + (768,307) 266 + (770,458) 265 + (771,103) 241 + (771,487) 264 + (773,56) 248 + (774,773) 259 + (775,115) 266 + (776,537) 254 + (777,392) 258 + (778,893) 287 + (779,644) 270 + (780,256) 263 + (781,899) 261 + (782,399) 251 + (782,892) 277 + (783,614) 237 + (784,54) 231 + (785,816) 261 + (786,462) 248 + (787,876) 262 + (788,273) 276 + (789,696) 260 + (790,471) 251 + (791,793) 261 + (792,636) 264 + (792,955) 263 + (793,809) 269 + (794,986) 249 + (795,656) 253 + (796,347) 246 + (797,880) 264 + (798,802) 256 + (799,294) 267 + (800,970) 231 + (801,130) 244 + (803,896) 256 + (804,1022) 257 + (805,32) 232 + (805,479) 257 + (806,889) 245 + (807,504) 251 + (809,719) 272 + (809,737) 270 + (810,646) 241 + (811,547) 238 + (812,375) 262 + (813,200) 257 + (815,408) 252 + (816,902) 256 + (817,430) 241 + (818,985) 256 + (819,688) 254 + (821,839) 257 + (822,747) 262 + (823,39) 259 + (824,886) 241 + (825,406) 247 + (826,814) 242 + (827,625) 266 + (828,407) 260 + (829,511) 254 + (830,915) 263 + (831,982) 266 + (832,1003) 246 + (833,362) 259 + (833,999) 258 + (834,136) 263 + (834,295) 267 + (835,115) 281 + (836,218) 272 + (837,565) 285 + (839,541) 280 + (839,711) 273 + (840,159) 251 + (841,103) 240 + (841,636) 271 + (842,136) 257 + (843,524) 254 + (844,114) 260 + (845,694) 268 + (846,533) 274 + (847,741) 243 + (848,483) 269 + (849,464) 257 + (850,302) 245 + (851,567) 248 + (852,150) 262 + (852,529) 258 + (853,623) 234 + (855,106) 265 + (856,1014) 261 + (857,151) 270 + (857,650) 280 + (858,781) 242 + (858,994) 242 + (859,508) 255 + (859,716) 284 + (862,636) 241 + (863,21) 242 + (864,1022) 242 + (865,972) 264 + (866,97) 243 + (867,48) 235 + (868,303) 249 + (869,364) 255 + (870,506) 241 + (871,453) 255 + (872,775) 259 + (873,173) 269 + (874,485) 249 + (875,168) 249 + (876,357) 243 + (877,722) 255 + (877,990) 267 + (880,176) 291 + (881,23) 268 + (882,608) 248 + (883,929) 251 + (884,643) 247 + (885,687) 259 + (887,487) 257 + (888,110) 266 + (888,943) 264 + (889,892) 267 + (890,628) 261 + (891,679) 258 + (892,653) 254 + (894,33) 258 + (895,37) 266 + (895,695) 269 + (896,390) 269 + (897,42) 265 + (900,687) 281 + (901,146) 241 + (901,605) 261 + (902,57) 230 + (903,1021) 250 + (904,808) 237 + (905,795) 271 + (906,479) 257 + (907,674) 277 + (909,456) 250 + (910,167) 265 + (911,548) 248 + (914,924) 250 + (915,366) 253 + (915,502) 238 + (916,420) 273 + (916,823) 247 + (918,480) 248 + (919,970) 259 + (920,608) 246 + (921,966) 230 + (923,216) 247 + (925,685) 275 + (926,755) 274 + (929,538) 268 + (930,13) 259 + (931,479) 250 + (933,860) 261 + (934,165) 250 + (935,351) 233 + (936,399) 244 + (938,215) 264 + (939,496) 276 + (939,748) 262 + (940,414) 242 + (941,586) 265 + (942,356) 274 + (943,31) 263 + (943,538) 262 + (944,109) 249 + (945,671) 258 + (946,246) 255 + (947,182) 262 + (948,628) 262 + (949,316) 238 + (950,1017) 259 + (951,221) 250 + (955,457) 237 + (955,823) 241 + (956,653) 258 + (957,656) 255 + (958,644) 238 + (959,667) 246 + (960,78) 247 + (961,828) 252 + (962,877) 269 + (963,397) 284 + (964,370) 262 + (965,504) 244 + (966,483) 246 + (967,1023) 246 + (968,400) 233 + (969,564) 254 + (970,856) 257 + (971,875) 243 + (972,549) 259 + (972,630) 240 + (974,934) 281 + (976,980) 247 + (977,347) 230 + (978,123) 258 + (980,371) 245 + (981,175) 258 + (983,58) 252 + (984,449) 248 + (984,582) 246 + (985,72) 253 + (985,743) 237 + (986,323) 248 + (987,120) 241 + (987,340) 266 + (988,172) 251 + (989,585) 241 + (990,514) 271 + (991,660) 256 + (992,15) 283 + (992,531) 277 + (993,87) 267 + (993,674) 252 + (994,992) 244 + (995,170) 269 + (997,946) 270 + (998,678) 251 + (999,167) 258 + (1001,877) 250 + (1002,286) 242 + (1004,250) 259 + (1006,1022) 248 + (1008,159) 264 + (1009,574) 258 + (1012,533) 270 + (1013,574) 273 + (1014,667) 247 + (1015,127) 244 + (1015,613) 245 + (1016,457) 246 + (1017,180) 267 + (1018,254) 237 + (1019,287) 248 + (1020,67) 261 + (1020,151) 248 + (1021,810) 239 + (1022,491) 268 + (1023,840) 264 + + + 1024x1024 GraphBLAS int32_t matrix, sparse by row + C_actual, 1024 entries, memory: 40.2 KB + + (0,478) 268 + (0,574) 246 + (2,376) 235 + (5,560) 278 + (6,996) 255 + (7,183) 256 + (7,666) 248 + (8,896) 255 + (9,187) 274 + (10,446) 256 + (11,46) 270 + (11,955) 284 + (12,397) 250 + (12,953) 259 + (13,192) 278 + (14,421) 267 + (15,568) 251 + (16,788) 225 + (16,904) 246 + (17,928) 240 + (18,103) 262 + (19,821) 235 + (19,886) 236 + (20,474) 267 + (21,479) 248 + (21,975) 251 + (22,569) 255 + (23,310) 272 + (24,905) 262 + (25,241) 225 + (26,428) 224 + (28,107) 228 + (28,441) 274 + (30,694) 269 + (32,121) 239 + (33,81) 249 + (34,804) 269 + (36,451) 264 + (37,609) 263 + (38,138) 258 + (39,698) 263 + (40,950) 236 + (41,568) 236 + (42,324) 238 + (43,798) 244 + (46,208) 240 + (47,70) 264 + (48,336) 277 + (49,476) 254 + (50,35) 242 + (51,556) 265 + (52,999) 247 + (53,940) 264 + (54,558) 257 + (54,960) 259 + (55,979) 251 + (56,90) 305 + (57,846) 275 + (57,893) 272 + (58,35) 260 + (59,108) 255 + (60,479) 255 + (61,590) 264 + (62,771) 259 + (63,50) 267 + (64,268) 276 + (65,694) 249 + (66,719) 261 + (67,411) 239 + (68,324) 246 + (69,477) 254 + (70,539) 241 + (71,228) 235 + (72,297) 242 + (73,665) 269 + (75,855) 227 + (76,248) 235 + (77,433) 251 + (78,90) 275 + (81,754) 270 + (82,243) 286 + (84,253) 267 + (86,104) 247 + (87,657) 255 + (89,825) 251 + (90,37) 248 + (91,234) 259 + (91,519) 276 + (92,74) 259 + (92,218) 266 + (92,690) 256 + (93,486) 268 + (94,637) 277 + (94,722) 261 + (96,564) 282 + (97,748) 245 + (99,326) 249 + (100,281) 248 + (102,609) 258 + (103,621) 277 + (104,644) 226 + (106,652) 244 + (107,239) 273 + (107,522) 234 + (108,131) 274 + (109,884) 253 + (110,402) 251 + (111,905) 256 + (112,127) 241 + (112,779) 239 + (113,278) 251 + (114,519) 264 + (115,240) 262 + (116,198) 258 + (117,219) 230 + (117,338) 251 + (118,99) 260 + (120,477) 266 + (121,554) 271 + (121,715) 291 + (122,151) 253 + (123,621) 252 + (125,177) 236 + (126,36) 275 + (128,820) 263 + (128,835) 248 + (129,660) 255 + (130,623) 246 + (130,807) 273 + (131,253) 271 + (131,355) 260 + (132,570) 264 + (133,492) 278 + (134,821) 268 + (135,295) 266 + (136,108) 263 + (137,834) 271 + (138,288) 253 + (139,284) 249 + (139,945) 286 + (140,887) 265 + (141,199) 274 + (142,87) 235 + (142,225) 261 + (143,123) 258 + (144,574) 262 + (145,552) 250 + (146,194) 244 + (146,995) 255 + (148,357) 253 + (149,949) 253 + (150,717) 255 + (151,484) 272 + (156,290) 250 + (157,714) 302 + (157,974) 274 + (158,959) 228 + (160,297) 252 + (162,601) 264 + (163,816) 271 + (164,221) 254 + (165,396) 243 + (166,801) 242 + (167,879) 234 + (168,321) 273 + (169,901) 286 + (170,612) 282 + (171,15) 253 + (172,951) 261 + (174,0) 258 + (174,595) 259 + (175,669) 254 + (176,108) 261 + (176,188) 279 + (176,614) 269 + (176,781) 255 + (177,17) 261 + (178,631) 265 + (179,932) 225 + (180,830) 258 + (182,675) 259 + (182,1001) 257 + (183,692) 240 + (184,143) 247 + (185,450) 240 + (186,779) 270 + (187,997) 256 + (188,357) 265 + (189,111) 250 + (190,990) 262 + (192,644) 269 + (192,953) 250 + (193,135) 246 + (194,137) 267 + (195,922) 276 + (197,859) 269 + (198,910) 239 + (199,531) 270 + (201,907) 253 + (202,863) 255 + (203,865) 232 + (204,614) 268 + (207,826) 239 + (208,985) 262 + (209,808) 256 + (210,659) 250 + (211,71) 236 + (211,931) 266 + (212,426) 291 + (213,152) 255 + (214,928) 264 + (215,268) 270 + (216,550) 268 + (217,921) 252 + (218,704) 246 + (218,922) 265 + (219,66) 232 + (220,704) 235 + (221,56) 280 + (221,551) 273 + (222,545) 243 + (223,1016) 249 + (224,721) 261 + (225,935) 270 + (226,727) 254 + (228,743) 240 + (229,535) 242 + (230,382) 245 + (231,551) 260 + (232,897) 273 + (233,570) 235 + (234,520) 246 + (235,522) 261 + (236,221) 244 + (237,755) 271 + (238,964) 243 + (239,82) 243 + (240,388) 238 + (241,500) 276 + (242,124) 240 + (242,193) 243 + (242,621) 243 + (243,300) 254 + (244,588) 256 + (244,1004) 265 + (245,494) 253 + (246,326) 262 + (247,115) 263 + (247,147) 263 + (248,233) 224 + (250,485) 259 + (251,708) 262 + (252,197) 237 + (253,485) 256 + (254,40) 243 + (254,238) 261 + (255,895) 243 + (256,114) 268 + (257,461) 250 + (257,796) 237 + (258,233) 236 + (260,884) 257 + (261,945) 279 + (262,368) 260 + (264,755) 251 + (265,124) 253 + (266,352) 255 + (267,10) 238 + (268,234) 248 + (269,400) 248 + (270,877) 259 + (270,924) 231 + (271,944) 245 + (272,67) 253 + (273,100) 273 + (274,979) 284 + (276,333) 258 + (277,377) 245 + (279,877) 252 + (280,18) 242 + (281,449) 240 + (282,179) 259 + (283,1007) 244 + (284,595) 271 + (285,32) 231 + (286,37) 245 + (287,126) 299 + (287,394) 257 + (288,848) 267 + (290,317) 257 + (291,594) 264 + (292,562) 257 + (294,466) 265 + (294,960) 262 + (295,1) 245 + (295,106) 252 + (296,109) 245 + (296,183) 243 + (296,245) 238 + (297,912) 281 + (297,1006) 269 + (299,159) 271 + (300,554) 260 + (301,774) 240 + (302,30) 273 + (303,645) 243 + (304,229) 263 + (305,622) 282 + (307,264) 267 + (308,28) 241 + (309,328) 249 + (309,627) 280 + (310,357) 234 + (311,355) 243 + (312,61) 239 + (313,758) 265 + (314,571) 268 + (315,177) 236 + (315,298) 244 + (315,741) 236 + (316,177) 226 + (316,308) 279 + (317,323) 245 + (318,595) 288 + (319,126) 281 + (320,468) 260 + (321,73) 267 + (322,235) 246 + (323,375) 233 + (323,651) 255 + (324,549) 239 + (325,306) 246 + (325,487) 279 + (326,649) 272 + (327,704) 246 + (328,142) 271 + (329,176) 257 + (330,848) 249 + (330,965) 244 + (332,795) 265 + (334,695) 275 + (335,694) 236 + (336,775) 251 + (336,808) 231 + (337,608) 236 + (338,993) 243 + (339,680) 277 + (340,849) 251 + (341,36) 273 + (342,723) 252 + (343,678) 235 + (344,384) 255 + (344,680) 248 + (345,75) 252 + (347,996) 264 + (348,60) 280 + (348,821) 297 + (349,804) 265 + (350,282) 254 + (351,142) 272 + (351,937) 275 + (352,160) 256 + (353,536) 260 + (355,352) 264 + (356,340) 243 + (358,678) 257 + (360,679) 276 + (361,794) 255 + (361,989) 264 + (362,816) 295 + (363,206) 250 + (364,629) 267 + (365,990) 269 + (366,841) 262 + (366,971) 261 + (367,888) 315 + (368,587) 245 + (369,684) 261 + (370,270) 253 + (371,327) 257 + (372,471) 258 + (373,88) 246 + (374,669) 242 + (375,992) 241 + (376,336) 259 + (377,86) 292 + (378,882) 270 + (379,592) 264 + (380,77) 258 + (380,643) 240 + (381,1012) 255 + (382,816) 253 + (383,711) 240 + (385,670) 249 + (386,537) 255 + (387,347) 240 + (388,494) 268 + (389,328) 235 + (389,733) 237 + (390,551) 269 + (391,59) 254 + (391,600) 270 + (394,692) 247 + (396,645) 233 + (397,835) 259 + (398,107) 261 + (398,246) 264 + (399,436) 267 + (400,172) 260 + (400,382) 240 + (401,790) 245 + (402,320) 258 + (403,40) 257 + (404,641) 250 + (405,49) 269 + (405,475) 257 + (407,320) 277 + (408,61) 253 + (410,754) 239 + (411,643) 269 + (412,949) 260 + (413,94) 254 + (414,991) 257 + (415,26) 244 + (416,575) 254 + (417,366) 232 + (418,160) 258 + (418,669) 266 + (419,209) 252 + (419,285) 266 + (420,748) 277 + (421,614) 258 + (422,177) 217 + (423,873) 251 + (424,542) 258 + (425,263) 247 + (426,377) 261 + (427,149) 236 + (428,950) 246 + (429,305) 277 + (430,718) 237 + (431,51) 246 + (432,857) 246 + (434,604) 248 + (435,152) 248 + (436,356) 286 + (437,105) 235 + (438,814) 254 + (440,338) 251 + (441,982) 259 + (442,880) 244 + (443,753) 273 + (444,669) 240 + (445,952) 236 + (446,741) 264 + (447,970) 247 + (448,646) 244 + (448,744) 237 + (449,835) 286 + (450,579) 241 + (451,147) 258 + (451,1017) 257 + (452,868) 247 + (453,26) 262 + (454,415) 236 + (454,668) 249 + (455,43) 247 + (456,849) 270 + (456,985) 251 + (457,218) 266 + (458,510) 282 + (459,737) 250 + (460,836) 269 + (461,849) 263 + (461,917) 270 + (462,900) 262 + (463,316) 256 + (464,762) 250 + (465,355) 262 + (465,801) 254 + (466,673) 247 + (467,112) 260 + (468,288) 261 + (470,889) 248 + (471,650) 269 + (473,121) 239 + (473,127) 251 + (474,487) 265 + (475,382) 218 + (476,44) 258 + (477,342) 257 + (478,786) 267 + (480,667) 244 + (481,558) 252 + (482,680) 224 + (483,517) 270 + (484,961) 276 + (485,274) 249 + (486,1015) 262 + (487,194) 241 + (489,802) 252 + (490,811) 260 + (491,319) 254 + (492,377) 242 + (494,432) 207 + (495,809) 292 + (496,267) 255 + (496,902) 247 + (498,194) 244 + (499,952) 273 + (500,84) 259 + (501,704) 233 + (503,519) 278 + (504,510) 264 + (504,887) 262 + (505,574) 285 + (507,643) 259 + (508,449) 241 + (512,892) 253 + (513,271) 242 + (514,404) 276 + (515,758) 263 + (517,369) 271 + (518,293) 245 + (519,786) 261 + (520,270) 256 + (521,1013) 259 + (522,284) 262 + (523,632) 265 + (524,945) 273 + (525,94) 249 + (525,362) 257 + (526,52) 282 + (527,61) 242 + (528,294) 274 + (529,145) 248 + (529,998) 261 + (530,112) 253 + (531,908) 249 + (533,674) 252 + (534,505) 227 + (535,660) 261 + (535,776) 265 + (536,500) 274 + (537,799) 258 + (538,492) 241 + (538,861) 258 + (540,245) 272 + (542,137) 268 + (545,658) 246 + (546,213) 272 + (547,767) 255 + (547,912) 279 + (547,1018) 252 + (548,46) 261 + (548,697) 265 + (549,602) 257 + (550,927) 277 + (552,710) 271 + (553,391) 244 + (554,351) 227 + (555,10) 235 + (556,26) 238 + (557,910) 255 + (558,552) 261 + (560,792) 265 + (561,597) 257 + (562,182) 264 + (562,862) 261 + (563,877) 276 + (564,310) 259 + (564,609) 251 + (565,490) 251 + (566,564) 263 + (566,607) 251 + (569,872) 279 + (570,465) 263 + (571,271) 271 + (571,919) 243 + (572,630) 237 + (574,603) 272 + (576,256) 284 + (579,274) 236 + (580,182) 252 + (581,445) 251 + (582,177) 196 + (583,118) 280 + (584,399) 250 + (585,433) 244 + (587,254) 237 + (588,914) 254 + (589,1016) 269 + (590,95) 277 + (590,802) 279 + (591,978) 265 + (592,527) 245 + (593,143) 276 + (594,430) 232 + (595,787) 261 + (596,677) 247 + (598,788) 250 + (599,127) 228 + (600,339) 249 + (601,478) 271 + (602,218) 271 + (603,759) 242 + (604,270) 247 + (605,76) 243 + (606,930) 257 + (608,832) 267 + (609,287) 265 + (610,794) 256 + (611,759) 247 + (612,1006) 282 + (613,398) 239 + (614,386) 259 + (615,115) 264 + (616,928) 254 + (617,30) 260 + (618,361) 243 + (619,996) 222 + (620,5) 248 + (620,337) 256 + (621,41) 251 + (623,44) 267 + (623,79) 252 + (623,966) 263 + (624,19) 270 + (624,242) 258 + (624,524) 244 + (625,683) 288 + (626,51) 242 + (627,361) 257 + (628,396) 248 + (629,882) 260 + (630,341) 237 + (631,49) 238 + (631,585) 234 + (632,73) 268 + (634,912) 278 + (635,882) 266 + (636,617) 252 + (637,285) 251 + (637,716) 275 + (638,113) 274 + (638,367) 254 + (639,616) 258 + (640,837) 234 + (641,457) 251 + (643,934) 265 + (647,783) 240 + (648,195) 270 + (649,614) 239 + (650,957) 265 + (651,281) 252 + (652,973) 267 + (653,60) 249 + (653,333) 268 + (654,605) 272 + (655,910) 234 + (656,349) 255 + (659,17) 250 + (660,591) 275 + (661,512) 277 + (663,767) 258 + (664,158) 224 + (665,77) 239 + (666,503) 248 + (667,951) 261 + (668,365) 278 + (669,300) 273 + (671,141) 272 + (671,565) 285 + (672,819) 223 + (674,36) 249 + (674,819) 249 + (675,454) 234 + (676,242) 263 + (677,289) 278 + (677,647) 255 + (678,802) 240 + (679,899) 242 + (680,398) 266 + (681,390) 266 + (681,699) 233 + (682,117) 246 + (683,110) 265 + (684,907) 243 + (685,17) 239 + (686,202) 255 + (687,45) 222 + (688,287) 242 + (689,502) 257 + (690,299) 252 + (691,392) 256 + (692,600) 264 + (694,378) 243 + (695,702) 271 + (696,102) 251 + (698,631) 252 + (699,152) 272 + (700,840) 267 + (701,323) 239 + (702,777) 232 + (703,132) 264 + (704,374) 261 + (705,579) 254 + (706,511) 233 + (707,76) 261 + (708,259) 269 + (708,925) 266 + (709,872) 269 + (709,873) 265 + (710,107) 235 + (710,293) 266 + (711,210) 257 + (711,462) 267 + (714,475) 245 + (715,172) 253 + (715,751) 241 + (716,697) 249 + (717,234) 239 + (717,620) 244 + (718,848) 260 + (719,331) 265 + (720,201) 255 + (720,725) 272 + (721,806) 262 + (722,415) 239 + (722,934) 262 + (723,675) 249 + (724,480) 259 + (726,337) 259 + (727,177) 237 + (728,797) 272 + (729,884) 241 + (730,767) 249 + (731,275) 275 + (732,910) 231 + (733,763) 283 + (734,574) 263 + (735,268) 253 + (736,115) 218 + (737,146) 238 + (737,912) 249 + (738,1023) 252 + (739,335) 259 + (740,596) 233 + (741,365) 270 + (741,630) 256 + (742,485) 250 + (743,186) 252 + (745,621) 250 + (745,645) 246 + (746,273) 276 + (747,91) 256 + (748,886) 245 + (749,59) 273 + (749,755) 254 + (751,348) 253 + (752,313) 255 + (752,742) 277 + (752,745) 260 + (753,472) 260 + (753,592) 249 + (754,1007) 234 + (756,633) 255 + (758,847) 268 + (759,500) 253 + (760,340) 251 + (760,381) 270 + (762,962) 270 + (763,954) 236 + (764,392) 236 + (764,913) 258 + (766,915) 265 + (766,936) 259 + (767,372) 266 + (768,307) 266 + (770,458) 265 + (771,103) 241 + (771,487) 264 + (773,56) 248 + (774,773) 259 + (775,115) 266 + (776,537) 254 + (777,392) 258 + (778,893) 287 + (779,644) 270 + (780,256) 263 + (781,899) 261 + (782,399) 251 + (782,892) 277 + (783,614) 237 + (784,54) 231 + (785,816) 261 + (786,462) 248 + (787,876) 262 + (788,273) 276 + (789,696) 260 + (790,471) 251 + (791,793) 261 + (792,636) 264 + (792,955) 263 + (793,809) 269 + (794,986) 249 + (795,656) 253 + (796,347) 246 + (797,880) 264 + (798,802) 256 + (799,294) 267 + (800,970) 231 + (801,130) 244 + (803,896) 256 + (804,1022) 257 + (805,32) 232 + (805,479) 257 + (806,889) 245 + (807,504) 251 + (809,719) 272 + (809,737) 270 + (810,646) 241 + (811,547) 238 + (812,375) 262 + (813,200) 257 + (815,408) 252 + (816,902) 256 + (817,430) 241 + (818,985) 256 + (819,688) 254 + (821,839) 257 + (822,747) 262 + (823,39) 259 + (824,886) 241 + (825,406) 247 + (826,814) 242 + (827,625) 266 + (828,407) 260 + (829,511) 254 + (830,915) 263 + (831,982) 266 + (832,1003) 246 + (833,362) 259 + (833,999) 258 + (834,136) 263 + (834,295) 267 + (835,115) 281 + (836,218) 272 + (837,565) 285 + (839,541) 280 + (839,711) 273 + (840,159) 251 + (841,103) 240 + (841,636) 271 + (842,136) 257 + (843,524) 254 + (844,114) 260 + (845,694) 268 + (846,533) 274 + (847,741) 243 + (848,483) 269 + (849,464) 257 + (850,302) 245 + (851,567) 248 + (852,150) 262 + (852,529) 258 + (853,623) 234 + (855,106) 265 + (856,1014) 261 + (857,151) 270 + (857,650) 280 + (858,781) 242 + (858,994) 242 + (859,508) 255 + (859,716) 284 + (862,636) 241 + (863,21) 242 + (864,1022) 242 + (865,972) 264 + (866,97) 243 + (867,48) 235 + (868,303) 249 + (869,364) 255 + (870,506) 241 + (871,453) 255 + (872,775) 259 + (873,173) 269 + (874,485) 249 + (875,168) 249 + (876,357) 243 + (877,722) 255 + (877,990) 267 + (880,176) 291 + (881,23) 268 + (882,608) 248 + (883,929) 251 + (884,643) 247 + (885,687) 259 + (887,487) 257 + (888,110) 266 + (888,943) 264 + (889,892) 267 + (890,628) 261 + (891,679) 258 + (892,653) 254 + (894,33) 258 + (895,37) 266 + (895,695) 269 + (896,390) 269 + (897,42) 265 + (900,687) 281 + (901,146) 241 + (901,605) 261 + (902,57) 230 + (903,1021) 250 + (904,808) 237 + (905,795) 271 + (906,479) 257 + (907,674) 277 + (909,456) 250 + (910,167) 265 + (911,548) 248 + (914,924) 250 + (915,366) 253 + (915,502) 238 + (916,420) 273 + (916,823) 247 + (918,480) 248 + (919,970) 259 + (920,608) 246 + (921,966) 230 + (923,216) 247 + (925,685) 275 + (926,755) 274 + (929,538) 268 + (930,13) 259 + (931,479) 250 + (933,860) 261 + (934,165) 250 + (935,351) 233 + (936,399) 244 + (938,215) 264 + (939,496) 276 + (939,748) 262 + (940,414) 242 + (941,586) 265 + (942,356) 274 + (943,31) 263 + (943,538) 262 + (944,109) 249 + (945,671) 258 + (946,246) 255 + (947,182) 262 + (948,628) 262 + (949,316) 238 + (950,1017) 259 + (951,221) 250 + (955,457) 237 + (955,823) 241 + (956,653) 258 + (957,656) 255 + (958,644) 238 + (959,667) 246 + (960,78) 247 + (961,828) 252 + (962,877) 269 + (963,397) 284 + (964,370) 262 + (965,504) 244 + (966,483) 246 + (967,1023) 246 + (968,400) 233 + (969,564) 254 + (970,856) 257 + (971,875) 243 + (972,549) 259 + (972,630) 240 + (974,934) 281 + (976,980) 247 + (977,347) 230 + (978,123) 258 + (980,371) 245 + (981,175) 258 + (983,58) 252 + (984,449) 248 + (984,582) 246 + (985,72) 253 + (985,743) 237 + (986,323) 248 + (987,120) 241 + (987,340) 266 + (988,172) 251 + (989,585) 241 + (990,514) 271 + (991,660) 256 + (992,15) 283 + (992,531) 277 + (993,87) 267 + (993,674) 252 + (994,992) 244 + (995,170) 269 + (997,946) 270 + (998,678) 251 + (999,167) 258 + (1001,877) 250 + (1002,286) 242 + (1004,250) 259 + (1006,1022) 248 + (1008,159) 264 + (1009,574) 258 + (1012,533) 270 + (1013,574) 273 + (1014,667) 247 + (1015,127) 244 + (1015,613) 245 + (1016,457) 246 + (1017,180) 267 + (1018,254) 237 + (1019,287) 248 + (1020,67) 261 + (1020,151) 248 + (1021,810) 239 + (1022,491) 268 + (1023,840) 264 + + rmm_wrap_alloc 16384 bytes + rmm_wrap_alloc 16384 bytes + + 1024x1024 GraphBLAS double matrix, sparse by row + Diff actual, 1024 entries, memory: 32.2 KB + + (0,478) 0 + (0,574) 0 + (2,376) 0 + (5,560) 0 + (6,996) 0 + (7,183) 0 + (7,666) 0 + (8,896) 0 + (9,187) 0 + (10,446) 0 + (11,46) 0 + (11,955) 0 + (12,397) 0 + (12,953) 0 + (13,192) 0 + (14,421) 0 + (15,568) 0 + (16,788) 0 + (16,904) 0 + (17,928) 0 + (18,103) 0 + (19,821) 0 + (19,886) 0 + (20,474) 0 + (21,479) 0 + (21,975) 0 + (22,569) 0 + (23,310) 0 + (24,905) 0 + (25,241) 0 + (26,428) 0 + (28,107) 0 + (28,441) 0 + (30,694) 0 + (32,121) 0 + (33,81) 0 + (34,804) 0 + (36,451) 0 + (37,609) 0 + (38,138) 0 + (39,698) 0 + (40,950) 0 + (41,568) 0 + (42,324) 0 + (43,798) 0 + (46,208) 0 + (47,70) 0 + (48,336) 0 + (49,476) 0 + (50,35) 0 + (51,556) 0 + (52,999) 0 + (53,940) 0 + (54,558) 0 + (54,960) 0 + (55,979) 0 + (56,90) 0 + (57,846) 0 + (57,893) 0 + (58,35) 0 + (59,108) 0 + (60,479) 0 + (61,590) 0 + (62,771) 0 + (63,50) 0 + (64,268) 0 + (65,694) 0 + (66,719) 0 + (67,411) 0 + (68,324) 0 + (69,477) 0 + (70,539) 0 + (71,228) 0 + (72,297) 0 + (73,665) 0 + (75,855) 0 + (76,248) 0 + (77,433) 0 + (78,90) 0 + (81,754) 0 + (82,243) 0 + (84,253) 0 + (86,104) 0 + (87,657) 0 + (89,825) 0 + (90,37) 0 + (91,234) 0 + (91,519) 0 + (92,74) 0 + (92,218) 0 + (92,690) 0 + (93,486) 0 + (94,637) 0 + (94,722) 0 + (96,564) 0 + (97,748) 0 + (99,326) 0 + (100,281) 0 + (102,609) 0 + (103,621) 0 + (104,644) 0 + (106,652) 0 + (107,239) 0 + (107,522) 0 + (108,131) 0 + (109,884) 0 + (110,402) 0 + (111,905) 0 + (112,127) 0 + (112,779) 0 + (113,278) 0 + (114,519) 0 + (115,240) 0 + (116,198) 0 + (117,219) 0 + (117,338) 0 + (118,99) 0 + (120,477) 0 + (121,554) 0 + (121,715) 0 + (122,151) 0 + (123,621) 0 + (125,177) 0 + (126,36) 0 + (128,820) 0 + (128,835) 0 + (129,660) 0 + (130,623) 0 + (130,807) 0 + (131,253) 0 + (131,355) 0 + (132,570) 0 + (133,492) 0 + (134,821) 0 + (135,295) 0 + (136,108) 0 + (137,834) 0 + (138,288) 0 + (139,284) 0 + (139,945) 0 + (140,887) 0 + (141,199) 0 + (142,87) 0 + (142,225) 0 + (143,123) 0 + (144,574) 0 + (145,552) 0 + (146,194) 0 + (146,995) 0 + (148,357) 0 + (149,949) 0 + (150,717) 0 + (151,484) 0 + (156,290) 0 + (157,714) 0 + (157,974) 0 + (158,959) 0 + (160,297) 0 + (162,601) 0 + (163,816) 0 + (164,221) 0 + (165,396) 0 + (166,801) 0 + (167,879) 0 + (168,321) 0 + (169,901) 0 + (170,612) 0 + (171,15) 0 + (172,951) 0 + (174,0) 0 + (174,595) 0 + (175,669) 0 + (176,108) 0 + (176,188) 0 + (176,614) 0 + (176,781) 0 + (177,17) 0 + (178,631) 0 + (179,932) 0 + (180,830) 0 + (182,675) 0 + (182,1001) 0 + (183,692) 0 + (184,143) 0 + (185,450) 0 + (186,779) 0 + (187,997) 0 + (188,357) 0 + (189,111) 0 + (190,990) 0 + (192,644) 0 + (192,953) 0 + (193,135) 0 + (194,137) 0 + (195,922) 0 + (197,859) 0 + (198,910) 0 + (199,531) 0 + (201,907) 0 + (202,863) 0 + (203,865) 0 + (204,614) 0 + (207,826) 0 + (208,985) 0 + (209,808) 0 + (210,659) 0 + (211,71) 0 + (211,931) 0 + (212,426) 0 + (213,152) 0 + (214,928) 0 + (215,268) 0 + (216,550) 0 + (217,921) 0 + (218,704) 0 + (218,922) 0 + (219,66) 0 + (220,704) 0 + (221,56) 0 + (221,551) 0 + (222,545) 0 + (223,1016) 0 + (224,721) 0 + (225,935) 0 + (226,727) 0 + (228,743) 0 + (229,535) 0 + (230,382) 0 + (231,551) 0 + (232,897) 0 + (233,570) 0 + (234,520) 0 + (235,522) 0 + (236,221) 0 + (237,755) 0 + (238,964) 0 + (239,82) 0 + (240,388) 0 + (241,500) 0 + (242,124) 0 + (242,193) 0 + (242,621) 0 + (243,300) 0 + (244,588) 0 + (244,1004) 0 + (245,494) 0 + (246,326) 0 + (247,115) 0 + (247,147) 0 + (248,233) 0 + (250,485) 0 + (251,708) 0 + (252,197) 0 + (253,485) 0 + (254,40) 0 + (254,238) 0 + (255,895) 0 + (256,114) 0 + (257,461) 0 + (257,796) 0 + (258,233) 0 + (260,884) 0 + (261,945) 0 + (262,368) 0 + (264,755) 0 + (265,124) 0 + (266,352) 0 + (267,10) 0 + (268,234) 0 + (269,400) 0 + (270,877) 0 + (270,924) 0 + (271,944) 0 + (272,67) 0 + (273,100) 0 + (274,979) 0 + (276,333) 0 + (277,377) 0 + (279,877) 0 + (280,18) 0 + (281,449) 0 + (282,179) 0 + (283,1007) 0 + (284,595) 0 + (285,32) 0 + (286,37) 0 + (287,126) 0 + (287,394) 0 + (288,848) 0 + (290,317) 0 + (291,594) 0 + (292,562) 0 + (294,466) 0 + (294,960) 0 + (295,1) 0 + (295,106) 0 + (296,109) 0 + (296,183) 0 + (296,245) 0 + (297,912) 0 + (297,1006) 0 + (299,159) 0 + (300,554) 0 + (301,774) 0 + (302,30) 0 + (303,645) 0 + (304,229) 0 + (305,622) 0 + (307,264) 0 + (308,28) 0 + (309,328) 0 + (309,627) 0 + (310,357) 0 + (311,355) 0 + (312,61) 0 + (313,758) 0 + (314,571) 0 + (315,177) 0 + (315,298) 0 + (315,741) 0 + (316,177) 0 + (316,308) 0 + (317,323) 0 + (318,595) 0 + (319,126) 0 + (320,468) 0 + (321,73) 0 + (322,235) 0 + (323,375) 0 + (323,651) 0 + (324,549) 0 + (325,306) 0 + (325,487) 0 + (326,649) 0 + (327,704) 0 + (328,142) 0 + (329,176) 0 + (330,848) 0 + (330,965) 0 + (332,795) 0 + (334,695) 0 + (335,694) 0 + (336,775) 0 + (336,808) 0 + (337,608) 0 + (338,993) 0 + (339,680) 0 + (340,849) 0 + (341,36) 0 + (342,723) 0 + (343,678) 0 + (344,384) 0 + (344,680) 0 + (345,75) 0 + (347,996) 0 + (348,60) 0 + (348,821) 0 + (349,804) 0 + (350,282) 0 + (351,142) 0 + (351,937) 0 + (352,160) 0 + (353,536) 0 + (355,352) 0 + (356,340) 0 + (358,678) 0 + (360,679) 0 + (361,794) 0 + (361,989) 0 + (362,816) 0 + (363,206) 0 + (364,629) 0 + (365,990) 0 + (366,841) 0 + (366,971) 0 + (367,888) 0 + (368,587) 0 + (369,684) 0 + (370,270) 0 + (371,327) 0 + (372,471) 0 + (373,88) 0 + (374,669) 0 + (375,992) 0 + (376,336) 0 + (377,86) 0 + (378,882) 0 + (379,592) 0 + (380,77) 0 + (380,643) 0 + (381,1012) 0 + (382,816) 0 + (383,711) 0 + (385,670) 0 + (386,537) 0 + (387,347) 0 + (388,494) 0 + (389,328) 0 + (389,733) 0 + (390,551) 0 + (391,59) 0 + (391,600) 0 + (394,692) 0 + (396,645) 0 + (397,835) 0 + (398,107) 0 + (398,246) 0 + (399,436) 0 + (400,172) 0 + (400,382) 0 + (401,790) 0 + (402,320) 0 + (403,40) 0 + (404,641) 0 + (405,49) 0 + (405,475) 0 + (407,320) 0 + (408,61) 0 + (410,754) 0 + (411,643) 0 + (412,949) 0 + (413,94) 0 + (414,991) 0 + (415,26) 0 + (416,575) 0 + (417,366) 0 + (418,160) 0 + (418,669) 0 + (419,209) 0 + (419,285) 0 + (420,748) 0 + (421,614) 0 + (422,177) 0 + (423,873) 0 + (424,542) 0 + (425,263) 0 + (426,377) 0 + (427,149) 0 + (428,950) 0 + (429,305) 0 + (430,718) 0 + (431,51) 0 + (432,857) 0 + (434,604) 0 + (435,152) 0 + (436,356) 0 + (437,105) 0 + (438,814) 0 + (440,338) 0 + (441,982) 0 + (442,880) 0 + (443,753) 0 + (444,669) 0 + (445,952) 0 + (446,741) 0 + (447,970) 0 + (448,646) 0 + (448,744) 0 + (449,835) 0 + (450,579) 0 + (451,147) 0 + (451,1017) 0 + (452,868) 0 + (453,26) 0 + (454,415) 0 + (454,668) 0 + (455,43) 0 + (456,849) 0 + (456,985) 0 + (457,218) 0 + (458,510) 0 + (459,737) 0 + (460,836) 0 + (461,849) 0 + (461,917) 0 + (462,900) 0 + (463,316) 0 + (464,762) 0 + (465,355) 0 + (465,801) 0 + (466,673) 0 + (467,112) 0 + (468,288) 0 + (470,889) 0 + (471,650) 0 + (473,121) 0 + (473,127) 0 + (474,487) 0 + (475,382) 0 + (476,44) 0 + (477,342) 0 + (478,786) 0 + (480,667) 0 + (481,558) 0 + (482,680) 0 + (483,517) 0 + (484,961) 0 + (485,274) 0 + (486,1015) 0 + (487,194) 0 + (489,802) 0 + (490,811) 0 + (491,319) 0 + (492,377) 0 + (494,432) 0 + (495,809) 0 + (496,267) 0 + (496,902) 0 + (498,194) 0 + (499,952) 0 + (500,84) 0 + (501,704) 0 + (503,519) 0 + (504,510) 0 + (504,887) 0 + (505,574) 0 + (507,643) 0 + (508,449) 0 + (512,892) 0 + (513,271) 0 + (514,404) 0 + (515,758) 0 + (517,369) 0 + (518,293) 0 + (519,786) 0 + (520,270) 0 + (521,1013) 0 + (522,284) 0 + (523,632) 0 + (524,945) 0 + (525,94) 0 + (525,362) 0 + (526,52) 0 + (527,61) 0 + (528,294) 0 + (529,145) 0 + (529,998) 0 + (530,112) 0 + (531,908) 0 + (533,674) 0 + (534,505) 0 + (535,660) 0 + (535,776) 0 + (536,500) 0 + (537,799) 0 + (538,492) 0 + (538,861) 0 + (540,245) 0 + (542,137) 0 + (545,658) 0 + (546,213) 0 + (547,767) 0 + (547,912) 0 + (547,1018) 0 + (548,46) 0 + (548,697) 0 + (549,602) 0 + (550,927) 0 + (552,710) 0 + (553,391) 0 + (554,351) 0 + (555,10) 0 + (556,26) 0 + (557,910) 0 + (558,552) 0 + (560,792) 0 + (561,597) 0 + (562,182) 0 + (562,862) 0 + (563,877) 0 + (564,310) 0 + (564,609) 0 + (565,490) 0 + (566,564) 0 + (566,607) 0 + (569,872) 0 + (570,465) 0 + (571,271) 0 + (571,919) 0 + (572,630) 0 + (574,603) 0 + (576,256) 0 + (579,274) 0 + (580,182) 0 + (581,445) 0 + (582,177) 0 + (583,118) 0 + (584,399) 0 + (585,433) 0 + (587,254) 0 + (588,914) 0 + (589,1016) 0 + (590,95) 0 + (590,802) 0 + (591,978) 0 + (592,527) 0 + (593,143) 0 + (594,430) 0 + (595,787) 0 + (596,677) 0 + (598,788) 0 + (599,127) 0 + (600,339) 0 + (601,478) 0 + (602,218) 0 + (603,759) 0 + (604,270) 0 + (605,76) 0 + (606,930) 0 + (608,832) 0 + (609,287) 0 + (610,794) 0 + (611,759) 0 + (612,1006) 0 + (613,398) 0 + (614,386) 0 + (615,115) 0 + (616,928) 0 + (617,30) 0 + (618,361) 0 + (619,996) 0 + (620,5) 0 + (620,337) 0 + (621,41) 0 + (623,44) 0 + (623,79) 0 + (623,966) 0 + (624,19) 0 + (624,242) 0 + (624,524) 0 + (625,683) 0 + (626,51) 0 + (627,361) 0 + (628,396) 0 + (629,882) 0 + (630,341) 0 + (631,49) 0 + (631,585) 0 + (632,73) 0 + (634,912) 0 + (635,882) 0 + (636,617) 0 + (637,285) 0 + (637,716) 0 + (638,113) 0 + (638,367) 0 + (639,616) 0 + (640,837) 0 + (641,457) 0 + (643,934) 0 + (647,783) 0 + (648,195) 0 + (649,614) 0 + (650,957) 0 + (651,281) 0 + (652,973) 0 + (653,60) 0 + (653,333) 0 + (654,605) 0 + (655,910) 0 + (656,349) 0 + (659,17) 0 + (660,591) 0 + (661,512) 0 + (663,767) 0 + (664,158) 0 + (665,77) 0 + (666,503) 0 + (667,951) 0 + (668,365) 0 + (669,300) 0 + (671,141) 0 + (671,565) 0 + (672,819) 0 + (674,36) 0 + (674,819) 0 + (675,454) 0 + (676,242) 0 + (677,289) 0 + (677,647) 0 + (678,802) 0 + (679,899) 0 + (680,398) 0 + (681,390) 0 + (681,699) 0 + (682,117) 0 + (683,110) 0 + (684,907) 0 + (685,17) 0 + (686,202) 0 + (687,45) 0 + (688,287) 0 + (689,502) 0 + (690,299) 0 + (691,392) 0 + (692,600) 0 + (694,378) 0 + (695,702) 0 + (696,102) 0 + (698,631) 0 + (699,152) 0 + (700,840) 0 + (701,323) 0 + (702,777) 0 + (703,132) 0 + (704,374) 0 + (705,579) 0 + (706,511) 0 + (707,76) 0 + (708,259) 0 + (708,925) 0 + (709,872) 0 + (709,873) 0 + (710,107) 0 + (710,293) 0 + (711,210) 0 + (711,462) 0 + (714,475) 0 + (715,172) 0 + (715,751) 0 + (716,697) 0 + (717,234) 0 + (717,620) 0 + (718,848) 0 + (719,331) 0 + (720,201) 0 + (720,725) 0 + (721,806) 0 + (722,415) 0 + (722,934) 0 + (723,675) 0 + (724,480) 0 + (726,337) 0 + (727,177) 0 + (728,797) 0 + (729,884) 0 + (730,767) 0 + (731,275) 0 + (732,910) 0 + (733,763) 0 + (734,574) 0 + (735,268) 0 + (736,115) 0 + (737,146) 0 + (737,912) 0 + (738,1023) 0 + (739,335) 0 + (740,596) 0 + (741,365) 0 + (741,630) 0 + (742,485) 0 + (743,186) 0 + (745,621) 0 + (745,645) 0 + (746,273) 0 + (747,91) 0 + (748,886) 0 + (749,59) 0 + (749,755) 0 + (751,348) 0 + (752,313) 0 + (752,742) 0 + (752,745) 0 + (753,472) 0 + (753,592) 0 + (754,1007) 0 + (756,633) 0 + (758,847) 0 + (759,500) 0 + (760,340) 0 + (760,381) 0 + (762,962) 0 + (763,954) 0 + (764,392) 0 + (764,913) 0 + (766,915) 0 + (766,936) 0 + (767,372) 0 + (768,307) 0 + (770,458) 0 + (771,103) 0 + (771,487) 0 + (773,56) 0 + (774,773) 0 + (775,115) 0 + (776,537) 0 + (777,392) 0 + (778,893) 0 + (779,644) 0 + (780,256) 0 + (781,899) 0 + (782,399) 0 + (782,892) 0 + (783,614) 0 + (784,54) 0 + (785,816) 0 + (786,462) 0 + (787,876) 0 + (788,273) 0 + (789,696) 0 + (790,471) 0 + (791,793) 0 + (792,636) 0 + (792,955) 0 + (793,809) 0 + (794,986) 0 + (795,656) 0 + (796,347) 0 + (797,880) 0 + (798,802) 0 + (799,294) 0 + (800,970) 0 + (801,130) 0 + (803,896) 0 + (804,1022) 0 + (805,32) 0 + (805,479) 0 + (806,889) 0 + (807,504) 0 + (809,719) 0 + (809,737) 0 + (810,646) 0 + (811,547) 0 + (812,375) 0 + (813,200) 0 + (815,408) 0 + (816,902) 0 + (817,430) 0 + (818,985) 0 + (819,688) 0 + (821,839) 0 + (822,747) 0 + (823,39) 0 + (824,886) 0 + (825,406) 0 + (826,814) 0 + (827,625) 0 + (828,407) 0 + (829,511) 0 + (830,915) 0 + (831,982) 0 + (832,1003) 0 + (833,362) 0 + (833,999) 0 + (834,136) 0 + (834,295) 0 + (835,115) 0 + (836,218) 0 + (837,565) 0 + (839,541) 0 + (839,711) 0 + (840,159) 0 + (841,103) 0 + (841,636) 0 + (842,136) 0 + (843,524) 0 + (844,114) 0 + (845,694) 0 + (846,533) 0 + (847,741) 0 + (848,483) 0 + (849,464) 0 + (850,302) 0 + (851,567) 0 + (852,150) 0 + (852,529) 0 + (853,623) 0 + (855,106) 0 + (856,1014) 0 + (857,151) 0 + (857,650) 0 + (858,781) 0 + (858,994) 0 + (859,508) 0 + (859,716) 0 + (862,636) 0 + (863,21) 0 + (864,1022) 0 + (865,972) 0 + (866,97) 0 + (867,48) 0 + (868,303) 0 + (869,364) 0 + (870,506) 0 + (871,453) 0 + (872,775) 0 + (873,173) 0 + (874,485) 0 + (875,168) 0 + (876,357) 0 + (877,722) 0 + (877,990) 0 + (880,176) 0 + (881,23) 0 + (882,608) 0 + (883,929) 0 + (884,643) 0 + (885,687) 0 + (887,487) 0 + (888,110) 0 + (888,943) 0 + (889,892) 0 + (890,628) 0 + (891,679) 0 + (892,653) 0 + (894,33) 0 + (895,37) 0 + (895,695) 0 + (896,390) 0 + (897,42) 0 + (900,687) 0 + (901,146) 0 + (901,605) 0 + (902,57) 0 + (903,1021) 0 + (904,808) 0 + (905,795) 0 + (906,479) 0 + (907,674) 0 + (909,456) 0 + (910,167) 0 + (911,548) 0 + (914,924) 0 + (915,366) 0 + (915,502) 0 + (916,420) 0 + (916,823) 0 + (918,480) 0 + (919,970) 0 + (920,608) 0 + (921,966) 0 + (923,216) 0 + (925,685) 0 + (926,755) 0 + (929,538) 0 + (930,13) 0 + (931,479) 0 + (933,860) 0 + (934,165) 0 + (935,351) 0 + (936,399) 0 + (938,215) 0 + (939,496) 0 + (939,748) 0 + (940,414) 0 + (941,586) 0 + (942,356) 0 + (943,31) 0 + (943,538) 0 + (944,109) 0 + (945,671) 0 + (946,246) 0 + (947,182) 0 + (948,628) 0 + (949,316) 0 + (950,1017) 0 + (951,221) 0 + (955,457) 0 + (955,823) 0 + (956,653) 0 + (957,656) 0 + (958,644) 0 + (959,667) 0 + (960,78) 0 + (961,828) 0 + (962,877) 0 + (963,397) 0 + (964,370) 0 + (965,504) 0 + (966,483) 0 + (967,1023) 0 + (968,400) 0 + (969,564) 0 + (970,856) 0 + (971,875) 0 + (972,549) 0 + (972,630) 0 + (974,934) 0 + (976,980) 0 + (977,347) 0 + (978,123) 0 + (980,371) 0 + (981,175) 0 + (983,58) 0 + (984,449) 0 + (984,582) 0 + (985,72) 0 + (985,743) 0 + (986,323) 0 + (987,120) 0 + (987,340) 0 + (988,172) 0 + (989,585) 0 + (990,514) 0 + (991,660) 0 + (992,15) 0 + (992,531) 0 + (993,87) 0 + (993,674) 0 + (994,992) 0 + (995,170) 0 + (997,946) 0 + (998,678) 0 + (999,167) 0 + (1001,877) 0 + (1002,286) 0 + (1004,250) 0 + (1006,1022) 0 + (1008,159) 0 + (1009,574) 0 + (1012,533) 0 + (1013,574) 0 + (1014,667) 0 + (1015,127) 0 + (1015,613) 0 + (1016,457) 0 + (1017,180) 0 + (1018,254) 0 + (1019,287) 0 + (1020,67) 0 + (1020,151) 0 + (1021,810) 0 + (1022,491) 0 + (1023,840) 0 + + + 1024x1024 GraphBLAS bool matrix, sparse by row + T actual, 1024 entries, memory: 25.2 KB + + (0,478) 1 + (0,574) 1 + (2,376) 1 + (5,560) 1 + (6,996) 1 + (7,183) 1 + (7,666) 1 + (8,896) 1 + (9,187) 1 + (10,446) 1 + (11,46) 1 + (11,955) 1 + (12,397) 1 + (12,953) 1 + (13,192) 1 + (14,421) 1 + (15,568) 1 + (16,788) 1 + (16,904) 1 + (17,928) 1 + (18,103) 1 + (19,821) 1 + (19,886) 1 + (20,474) 1 + (21,479) 1 + (21,975) 1 + (22,569) 1 + (23,310) 1 + (24,905) 1 + ... + work:1024 gpus:0 Getting test data +Creating problem gen +filling matrices +inside fill, using seed 4567 +fill_random nrows=1024ncols=1024 need 1024 values, invsparse = 1024 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling sparse +1024 nonzeroes left to fill.. +35 nonzeroes left to fill.. +inside fill, using seed 4567 +fill_random nrows=1024ncols=1024 need 1024 values, invsparse = 1024 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling sparse +1024 nonzeroes left to fill.. +35 nonzeroes left to fill.. +inside fill, using seed 543210 +fill_random nrows=1024ncols=1024 need 1048576 values, invsparse = 1 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling dense + rmm_wrap_alloc 16384 bytes + rmm_wrap_alloc 16384 bytes + rmm_wrap_alloc 524288 bytes + rmm_wrap_alloc 524288 bytes + rmm_wrap_alloc 1048576 bytes + rmm_wrap_alloc 1048576 bytes + rmm_wrap_alloc 524288 bytes + rmm_wrap_alloc 2097152 bytes + rmm_wrap_alloc 2097152 bytes + rmm_wrap_alloc 1048576 bytes + rmm_wrap_alloc 4194304 bytes + rmm_wrap_alloc 4194304 bytes + rmm_wrap_alloc 2097152 bytes + rmm_wrap_alloc 8388608 bytes + rmm_wrap_alloc 8388608 bytes + rmm_wrap_alloc 4194304 bytes + rmm_wrap_alloc 8388608 bytes + rmm_wrap_alloc 4194304 bytes + rmm_wrap_alloc 8388608 bytes +inside fill, using seed 32 +fill_random nrows=1024ncols=1024 need 5120 values, invsparse = 205 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling sparse +5120 nonzeroes left to fill.. +2026 nonzeroes left to fill.. + rmm_wrap_alloc 16384 bytes +1024 slots to fill +all pairs to bucket 5, no filling +done assigning buckets +Building semiring factgory + rmm_wrap_alloc 256 bytes + calling stringify semiring: 0x7f1ff53ef300 +inside enumify: 0x7f1ff53ef300 + + GraphBLAS Semiring: semiring (user-defined) + GraphBLAS Monoid: semiring->add (built-in) + GraphBLAS BinaryOp: monoid->op (built-in) z=plus(x,y) + GraphBLAS type: ztype int32_t size: 4 + GraphBLAS type: xtype int32_t size: 4 + GraphBLAS type: ytype int32_t size: 4 + identity: [ 0 ] + + GraphBLAS BinaryOp: semiring->multiply (built-in) z=times(x,y) + GraphBLAS type: ztype int32_t size: 4 + GraphBLAS type: xtype int32_t size: 4 + GraphBLAS type: ytype int32_t size: 4 +Getting semiring add +Getting semiring mult +Getting semiring add op +Getting types +Getting opcodes +Getting typecodes +Performing asserts +Invoking boolean rename +Invoking boolean rename +Invoking enumify binop +e 14 +Invoking enumify monoid +Calling enumify binop +Inside plus binop code +e 11 +Calling enumify identity +Calling enumify terminal +Done enumify monoid +Done invoking enumify monoid +atype +btype +ctype +Invoking enumify_mask, mtype 0x7f2028b56f40 +GB_enumify_mask gets mcode: 6 Mask_struct: 0 Mask_comp: 0 +got mask_ecode: 8 +constructing semiring scode +before: add_ecode: 11, id_ecode: 0, term_ecode: 31, mult_ecode: 14, flipxy: 0, zcode: 6, xcode: 6, ycode: 6, mask_ecode: 8, ccode: 6, acode: 6, bcode: 6, csparsity: 0, msparsity: 0, asparsity: 0, bsparsity: 0 +serialized_scode: 397409434378593792 +done enumify semiring +scode=397409434378593792 +done stringify semiring + returned from stringify semiring + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 8192 bytes +1024 slots to fill +all pairs to bucket 5, no filling +done assigning buckets +bucket 5 has 1024 dots to do +LAUNCHING BUCKET CODE: 5 +Confiring spdnINside get cached file +looking for prog in file /home/cjnolet/.SuiteSparse/GraphBLAS/6.3.0/GB_semiring_397409434378593792.h +opening /home/cjnolet/.SuiteSparse/GraphBLAS/6.3.0/GB_semiring_397409434378593792.h for write +about to close + read cache file /home/cjnolet/.SuiteSparse/GraphBLAS/6.3.0/GB_semiring_397409434378593792.h +successful_read: 1 +Just closed + jit_cache get program GB_jit_AxB_dot3_phase3_spdn +found memory-cached prog GB_jit_AxB_dot3_phase3_spdn + got kernel instance AxB_dot3_phase3_spdn_int32_t_int32_t_int32_t +found memory-cached prog AxB_dot3_phase3_spdn_int32_t_int32_t_int32_t +Launching _Z20AxB_dot3_phase3_spdnIiiiEvxxPxP16GB_Matrix_opaqueS2_S2_S2_i<<<32,32,0,0>>>(long,long,long*,GB_Matrix_opaque*,GB_Matrix_opaque*,GB_Matrix_opaque*,GB_Matrix_opaque*,int) +Printing bucketp +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +Done. +returned from kernel 3.78778ms + + 1024x1024 GraphBLAS int32_t matrix, sparse (jumbled) by row + sparsity control: sparse only + C GPU, 1024 entries, memory: 28.2 KB + pending tuples: 0 max pending: 0 zombies: 131 + + (0,478) 1 + (0,574) 2 + (2,376) zombie + (5,560) 3 + (6,996) 2 + (7,183) 0 + (7,666) 0 + (8,896) 2 + (9,187) 0 + (10,446) 2 + (11,46) 2 + (11,955) 2 + (12,397) 1 + (12,953) 0 + (13,192) 1 + (14,421) 1 + (15,568) 1 + (16,788) 1 + (16,904) 0 + (17,928) 0 + (18,103) zombie + (19,821) 1 + (19,886) 0 + (20,474) 4 + (21,479) 1 + (21,975) 1 + (22,569) 1 + (23,310) 2 + (24,905) 0 + ... + rmm_wrap_alloc 256 bytes +Not using cuda path. M_is_hypersparse: 0, A->iso: 0, B->iso: 0, A_BITMAP: 0, B_BITMAP: 0, GB_IS_FULL(A): 0, GB_IS_FULL(B): 0, semiring header size: 32 + rmm_wrap_alloc 16384 bytes + + 1024x1024 GraphBLAS int32_t matrix, sparse by row + sparsity control: sparse only + M actual, 1024 entries, memory: 28.2 KB + + (0,478) 0 + (0,574) 0 + (2,376) 1 + (5,560) 0 + (6,996) 0 + (7,183) 1 + (7,666) 1 + (8,896) 0 + (9,187) 0 + (10,446) 1 + (11,46) 1 + (11,955) 1 + (12,397) 0 + (12,953) 1 + (13,192) 1 + (14,421) 0 + (15,568) 1 + (16,788) 1 + (16,904) 1 + (17,928) 0 + (18,103) 1 + (19,821) 0 + (19,886) 0 + (20,474) 0 + (21,479) 1 + (21,975) 0 + (22,569) 1 + (23,310) 0 + (24,905) 1 + ... + rmm_wrap_alloc 16384 bytes + + 1024x1024 GraphBLAS int32_t matrix, sparse by row + sparsity control: sparse only + C GPU, 893 entries, memory: 28.2 KB + + (0,478) 1 + (0,574) 2 + (5,560) 3 + (6,996) 2 + (7,183) 0 + (7,666) 0 + (8,896) 2 + (9,187) 0 + (10,446) 2 + (11,46) 2 + (11,955) 2 + (12,397) 1 + (12,953) 0 + (13,192) 1 + (14,421) 1 + (15,568) 1 + (16,788) 1 + (16,904) 0 + (17,928) 0 + (19,821) 1 + (19,886) 0 + (20,474) 4 + (21,479) 1 + (21,975) 1 + (22,569) 1 + (23,310) 2 + (24,905) 0 + (25,241) 0 + (26,428) 0 + (28,107) 2 + (32,121) 0 + (33,81) 2 + (37,609) 2 + (39,698) 1 + (41,568) 1 + (42,324) 0 + (43,798) 1 + (46,208) 0 + (47,70) 1 + (48,336) 1 + (49,476) 1 + (50,35) 0 + (51,556) 0 + (52,999) 1 + (53,940) 1 + (54,558) 0 + (54,960) 1 + (55,979) 1 + (56,90) 2 + (57,846) 3 + (57,893) 0 + (58,35) 0 + (59,108) 3 + (60,479) 1 + (61,590) 2 + (62,771) 0 + (63,50) 0 + (64,268) 3 + (66,719) 2 + (67,411) 2 + (68,324) 0 + (69,477) 0 + (70,539) 1 + (71,228) 3 + (72,297) 3 + (73,665) 0 + (75,855) 0 + (76,248) 0 + (77,433) 4 + (78,90) 3 + (81,754) 4 + (82,243) 2 + (84,253) 1 + (86,104) 3 + (87,657) 0 + (89,825) 2 + (90,37) 4 + (91,234) 1 + (91,519) 1 + (92,74) 3 + (92,218) 1 + (92,690) 1 + (93,486) 2 + (94,637) 0 + (94,722) 1 + (96,564) 1 + (99,326) 2 + (100,281) 1 + (102,609) 2 + (104,644) 0 + (106,652) 1 + (107,239) 0 + (107,522) 2 + (108,131) 1 + (109,884) 2 + (110,402) 3 + (111,905) 2 + (112,127) 0 + (112,779) 0 + (113,278) 0 + (114,519) 1 + (115,240) 4 + (117,219) 0 + (117,338) 2 + (118,99) 4 + (120,477) 1 + (121,554) 3 + (121,715) 3 + (122,151) 3 + (125,177) 5 + (128,820) 6 + (129,660) 0 + (130,623) 1 + (131,253) 1 + (131,355) 1 + (133,492) 1 + (134,821) 0 + (135,295) 2 + (136,108) 3 + (137,834) 2 + (138,288) 1 + (139,284) 2 + (139,945) 0 + (141,199) 1 + (142,87) 4 + (142,225) 1 + (143,123) 0 + (144,574) 0 + (146,194) 3 + (148,357) 0 + (149,949) 1 + (150,717) 2 + (151,484) 2 + (156,290) 2 + (157,714) 0 + (157,974) 1 + (160,297) 1 + (162,601) 2 + (163,816) 3 + (164,221) 1 + (165,396) 1 + (166,801) 3 + (167,879) 3 + (168,321) 0 + (169,901) 3 + (172,951) 1 + (176,108) 1 + (176,188) 1 + (176,614) 2 + (176,781) 1 + (178,631) 1 + (179,932) 2 + (180,830) 3 + (182,675) 1 + (182,1001) 2 + (183,692) 1 + (184,143) 2 + (185,450) 1 + (186,779) 0 + (187,997) 3 + (188,357) 1 + (189,111) 2 + (190,990) 1 + (192,644) 0 + (192,953) 0 + (193,135) 1 + (194,137) 4 + (195,922) 4 + (197,859) 1 + (198,910) 1 + (199,531) 3 + (201,907) 0 + (202,863) 1 + (203,865) 4 + (204,614) 3 + (207,826) 1 + (208,985) 2 + (209,808) 3 + (211,71) 4 + (211,931) 3 + (212,426) 0 + (213,152) 0 + (214,928) 0 + (215,268) 3 + (216,550) 3 + (217,921) 0 + (218,704) 2 + (218,922) 2 + (219,66) 1 + (220,704) 2 + (221,56) 1 + (221,551) 2 + (222,545) 1 + (223,1016) 2 + (224,721) 1 + (225,935) 1 + (226,727) 0 + (228,743) 4 + (229,535) 2 + (231,551) 3 + (232,897) 2 + (234,520) 2 + (235,522) 2 + (236,221) 3 + (237,755) 2 + (238,964) 2 + (239,82) 0 + (240,388) 0 + (241,500) 2 + (242,124) 3 + (242,193) 0 + (243,300) 0 + (244,588) 0 + (244,1004) 3 + (245,494) 0 + (246,326) 1 + (247,115) 1 + (247,147) 1 + (248,233) 0 + (250,485) 6 + (251,708) 0 + (252,197) 1 + (253,485) 5 + (254,40) 3 + (254,238) 0 + (255,895) 3 + (256,114) 0 + (257,461) 2 + (257,796) 0 + (258,233) 1 + (260,884) 2 + (261,945) 1 + (262,368) 2 + (264,755) 1 + (265,124) 1 + (266,352) 3 + (267,10) 1 + (268,234) 1 + (269,400) 1 + (270,877) 0 + (270,924) 0 + (271,944) 0 + (272,67) 3 + (273,100) 1 + (274,979) 4 + (276,333) 2 + (277,377) 0 + (279,877) 1 + (280,18) 3 + (281,449) 3 + (282,179) 2 + (283,1007) 2 + (285,32) 1 + (286,37) 2 + (287,394) 3 + (288,848) 0 + (290,317) 0 + (291,594) 1 + (294,466) 2 + (294,960) 0 + (295,1) 0 + (295,106) 2 + (296,109) 2 + (296,183) 0 + (296,245) 0 + (297,912) 1 + (299,159) 1 + (300,554) 1 + (301,774) 1 + (302,30) 1 + (303,645) 1 + (304,229) 1 + (305,622) 0 + (307,264) 3 + (308,28) 0 + (309,328) 4 + (309,627) 0 + (310,357) 1 + (311,355) 1 + (312,61) 2 + (314,571) 3 + (315,177) 3 + (315,741) 0 + (316,177) 3 + (316,308) 4 + (320,468) 1 + (321,73) 0 + (322,235) 2 + (323,375) 3 + (323,651) 3 + (324,549) 2 + (325,306) 1 + (325,487) 1 + (326,649) 2 + (327,704) 0 + (329,176) 2 + (330,848) 1 + (330,965) 2 + (332,795) 1 + (334,695) 1 + (336,808) 4 + (337,608) 1 + (338,993) 2 + (339,680) 0 + (340,849) 1 + (342,723) 2 + (343,678) 2 + (344,384) 3 + (344,680) 0 + (345,75) 0 + (347,996) 3 + (348,60) 3 + (348,821) 1 + (350,282) 1 + (352,160) 2 + (353,536) 1 + (355,352) 5 + (356,340) 2 + (358,678) 2 + (360,679) 1 + (361,794) 0 + (361,989) 3 + (362,816) 2 + (363,206) 4 + (364,629) 0 + (365,990) 0 + (366,841) 1 + (366,971) 0 + (367,888) 2 + (368,587) 0 + (369,684) 3 + (370,270) 1 + (372,471) 1 + (373,88) 1 + (375,992) 2 + (376,336) 3 + (377,86) 1 + (378,882) 1 + (379,592) 2 + (380,77) 2 + (380,643) 2 + (381,1012) 2 + (382,816) 2 + (383,711) 2 + (385,670) 1 + (386,537) 1 + (387,347) 2 + (388,494) 1 + (389,328) 3 + (390,551) 1 + (391,59) 2 + (391,600) 1 + (394,692) 4 + (396,645) 2 + (398,107) 3 + (398,246) 2 + (399,436) 3 + (400,172) 0 + (401,790) 3 + (402,320) 2 + (403,40) 2 + (404,641) 0 + (405,49) 0 + (405,475) 1 + (407,320) 3 + (408,61) 4 + (410,754) 3 + (411,643) 2 + (412,949) 1 + (413,94) 5 + (415,26) 1 + (416,575) 0 + (417,366) 3 + (418,160) 0 + (419,209) 1 + (421,614) 1 + (422,177) 2 + (423,873) 1 + (424,542) 3 + (425,263) 0 + (426,377) 0 + (427,149) 0 + (429,305) 0 + (430,718) 1 + (431,51) 0 + (432,857) 2 + (434,604) 0 + (435,152) 2 + (436,356) 1 + (437,105) 3 + (440,338) 0 + (441,982) 2 + (442,880) 1 + (443,753) 1 + (446,741) 0 + (448,646) 0 + (448,744) 2 + (450,579) 1 + (451,147) 0 + (451,1017) 0 + (452,868) 3 + (453,26) 1 + (454,415) 1 + (454,668) 0 + (455,43) 0 + (456,849) 1 + (456,985) 2 + (457,218) 2 + (458,510) 4 + (459,737) 2 + (460,836) 2 + (461,849) 0 + (461,917) 2 + (462,900) 1 + (463,316) 1 + (464,762) 1 + (465,355) 1 + (465,801) 1 + (466,673) 0 + (468,288) 1 + (470,889) 2 + (471,650) 1 + (473,121) 1 + (473,127) 2 + (474,487) 0 + (476,44) 0 + (477,342) 1 + (480,667) 1 + (481,558) 0 + (482,680) 1 + (483,517) 1 + (484,961) 1 + (485,274) 0 + (486,1015) 3 + (487,194) 1 + (489,802) 2 + (490,811) 1 + (491,319) 4 + (492,377) 1 + (494,432) 1 + (495,809) 0 + (496,267) 2 + (496,902) 1 + (498,194) 1 + (500,84) 0 + (501,704) 2 + (503,519) 2 + (504,510) 3 + (505,574) 1 + (507,643) 3 + (508,449) 3 + (512,892) 3 + (513,271) 2 + (517,369) 1 + (518,293) 2 + (520,270) 1 + (521,1013) 1 + (522,284) 1 + (524,945) 1 + (525,94) 5 + (525,362) 2 + (526,52) 1 + (527,61) 3 + (529,998) 0 + (531,908) 1 + (533,674) 4 + (535,660) 1 + (535,776) 1 + (536,500) 3 + (537,799) 2 + (538,492) 2 + (538,861) 1 + (540,245) 0 + (542,137) 2 + (545,658) 0 + (546,213) 1 + (547,767) 1 + (547,912) 3 + (547,1018) 1 + (548,46) 2 + (548,697) 0 + (549,602) 2 + (550,927) 2 + (553,391) 1 + (554,351) 2 + (555,10) 2 + (556,26) 2 + (557,910) 0 + (560,792) 0 + (562,182) 0 + (562,862) 1 + (563,877) 0 + (564,310) 3 + (564,609) 3 + (565,490) 0 + (566,564) 2 + (566,607) 1 + (569,872) 0 + (570,465) 1 + (571,271) 3 + (571,919) 1 + (574,603) 0 + (576,256) 4 + (579,274) 0 + (580,182) 0 + (581,445) 0 + (582,177) 3 + (583,118) 0 + (584,399) 1 + (585,433) 4 + (587,254) 2 + (588,914) 2 + (589,1016) 3 + (590,95) 3 + (590,802) 2 + (592,527) 0 + (593,143) 2 + (594,430) 0 + (595,787) 2 + (598,788) 1 + (599,127) 3 + (601,478) 2 + (602,218) 0 + (603,759) 1 + (604,270) 1 + (605,76) 3 + (606,930) 0 + (608,832) 1 + (609,287) 1 + (610,794) 0 + (611,759) 1 + (613,398) 3 + (614,386) 4 + (615,115) 0 + (616,928) 0 + (617,30) 2 + (618,361) 5 + (619,996) 4 + (620,5) 3 + (621,41) 0 + (623,44) 2 + (624,19) 1 + (624,242) 2 + (624,524) 1 + (626,51) 0 + (627,361) 1 + (628,396) 3 + (629,882) 1 + (630,341) 1 + (631,49) 1 + (631,585) 1 + (632,73) 1 + (634,912) 2 + (635,882) 1 + (636,617) 1 + (637,716) 0 + (638,113) 1 + (639,616) 5 + (640,837) 2 + (641,457) 1 + (643,934) 3 + (647,783) 2 + (648,195) 1 + (649,614) 1 + (650,957) 1 + (651,281) 2 + (652,973) 1 + (653,60) 1 + (653,333) 2 + (654,605) 3 + (655,910) 0 + (656,349) 3 + (660,591) 4 + (661,512) 2 + (663,767) 0 + (665,77) 3 + (666,503) 4 + (667,951) 2 + (668,365) 4 + (669,300) 1 + (671,141) 1 + (671,565) 2 + (672,819) 1 + (674,819) 1 + (675,454) 0 + (676,242) 2 + (677,289) 4 + (678,802) 3 + (680,398) 1 + (681,390) 1 + (682,117) 4 + (683,110) 2 + (684,907) 0 + (686,202) 0 + (687,45) 1 + (688,287) 2 + (689,502) 3 + (690,299) 3 + (691,392) 2 + (692,600) 0 + (694,378) 1 + (695,702) 1 + (696,102) 2 + (698,631) 0 + (699,152) 1 + (700,840) 1 + (702,777) 1 + (703,132) 1 + (704,374) 1 + (705,579) 1 + (706,511) 3 + (707,76) 3 + (708,259) 2 + (708,925) 0 + (709,872) 1 + (709,873) 1 + (710,107) 3 + (710,293) 2 + (711,462) 0 + (714,475) 2 + (715,172) 0 + (715,751) 2 + (716,697) 0 + (717,234) 0 + (718,848) 2 + (719,331) 1 + (720,201) 1 + (720,725) 2 + (722,415) 2 + (722,934) 2 + (723,675) 2 + (724,480) 3 + (727,177) 4 + (728,797) 1 + (729,884) 1 + (730,767) 0 + (731,275) 1 + (732,910) 0 + (733,763) 5 + (734,574) 0 + (735,268) 3 + (736,115) 1 + (737,912) 2 + (738,1023) 2 + (739,335) 0 + (740,596) 3 + (741,365) 1 + (742,485) 5 + (743,186) 1 + (745,645) 2 + (746,273) 3 + (747,91) 5 + (748,886) 0 + (749,59) 2 + (749,755) 2 + (751,348) 0 + (752,313) 2 + (752,742) 0 + (752,745) 1 + (753,472) 1 + (753,592) 1 + (754,1007) 0 + (756,633) 1 + (758,847) 2 + (759,500) 3 + (760,340) 2 + (760,381) 2 + (762,962) 3 + (763,954) 0 + (764,392) 1 + (764,913) 3 + (766,915) 3 + (766,936) 0 + (767,372) 1 + (768,307) 0 + (770,458) 0 + (771,487) 0 + (773,56) 1 + (774,773) 0 + (775,115) 1 + (776,537) 1 + (777,392) 1 + (778,893) 0 + (779,644) 0 + (780,256) 2 + (782,399) 1 + (782,892) 2 + (783,614) 2 + (785,816) 1 + (786,462) 1 + (787,876) 1 + (788,273) 4 + (789,696) 2 + (790,471) 1 + (791,793) 3 + (792,636) 3 + (792,955) 3 + (793,809) 0 + (794,986) 1 + (795,656) 0 + (796,347) 3 + (797,880) 2 + (798,802) 0 + (801,130) 1 + (803,896) 3 + (804,1022) 3 + (805,32) 1 + (805,479) 1 + (806,889) 2 + (807,504) 3 + (809,719) 1 + (809,737) 2 + (810,646) 0 + (812,375) 3 + (813,200) 2 + (815,408) 3 + (816,902) 1 + (817,430) 1 + (818,985) 5 + (819,688) 1 + (821,839) 1 + (822,747) 1 + (823,39) 1 + (824,886) 0 + (825,406) 0 + (828,407) 2 + (829,511) 1 + (830,915) 2 + (831,982) 1 + (832,1003) 2 + (833,362) 2 + (833,999) 2 + (834,136) 2 + (834,295) 1 + (835,115) 1 + (836,218) 2 + (837,565) 4 + (839,541) 0 + (839,711) 0 + (840,159) 1 + (841,636) 1 + (842,136) 2 + (843,524) 0 + (844,114) 0 + (846,533) 1 + (847,741) 0 + (848,483) 1 + (849,464) 3 + (850,302) 0 + (851,567) 1 + (852,150) 4 + (852,529) 0 + (853,623) 1 + (855,106) 2 + (856,1014) 1 + (857,151) 2 + (857,650) 1 + (858,781) 1 + (858,994) 0 + (859,508) 0 + (859,716) 0 + (862,636) 2 + (863,21) 4 + (864,1022) 2 + (866,97) 0 + (867,48) 1 + (868,303) 1 + (869,364) 4 + (871,453) 1 + (873,173) 0 + (874,485) 7 + (875,168) 1 + (876,357) 0 + (877,722) 1 + (877,990) 0 + (880,176) 2 + (881,23) 1 + (882,608) 0 + (884,643) 3 + (885,687) 0 + (887,487) 0 + (888,110) 2 + (888,943) 0 + (889,892) 3 + (890,628) 2 + (891,679) 1 + (892,653) 2 + (894,33) 0 + (895,37) 2 + (895,695) 0 + (896,390) 0 + (897,42) 2 + (900,687) 0 + (901,605) 2 + (902,57) 1 + (903,1021) 1 + (904,808) 4 + (905,795) 3 + (906,479) 0 + (907,674) 2 + (909,456) 2 + (911,548) 1 + (914,924) 1 + (915,366) 2 + (915,502) 3 + (916,420) 3 + (916,823) 1 + (918,480) 3 + (920,608) 1 + (925,685) 0 + (926,755) 4 + (929,538) 0 + (930,13) 1 + (931,479) 3 + (933,860) 0 + (934,165) 0 + (935,351) 2 + (936,399) 1 + (938,215) 0 + (939,496) 0 + (940,414) 0 + (941,586) 5 + (942,356) 1 + (943,31) 4 + (943,538) 0 + (944,109) 3 + (945,671) 1 + (946,246) 3 + (947,182) 0 + (948,628) 2 + (949,316) 0 + (950,1017) 0 + (951,221) 2 + (955,457) 1 + (955,823) 0 + (956,653) 2 + (957,656) 0 + (958,644) 0 + (959,667) 2 + (960,78) 3 + (961,828) 4 + (962,877) 1 + (963,397) 1 + (964,370) 1 + (965,504) 3 + (966,483) 2 + (967,1023) 2 + (968,400) 0 + (969,564) 1 + (970,856) 1 + (971,875) 1 + (972,549) 1 + (974,934) 2 + (977,347) 3 + (978,123) 0 + (981,175) 3 + (983,58) 1 + (984,449) 1 + (984,582) 2 + (985,72) 1 + (985,743) 2 + (987,120) 2 + (987,340) 4 + (988,172) 0 + (989,585) 2 + (991,660) 1 + (992,531) 3 + (993,87) 2 + (993,674) 2 + (994,992) 2 + (995,170) 2 + (997,946) 1 + (998,678) 2 + (1001,877) 1 + (1002,286) 2 + (1004,250) 3 + (1006,1022) 3 + (1008,159) 1 + (1009,574) 0 + (1012,533) 1 + (1013,574) 1 + (1014,667) 3 + (1015,127) 1 + (1015,613) 2 + (1016,457) 1 + (1017,180) 2 + (1018,254) 2 + (1019,287) 3 + (1020,67) 3 + (1020,151) 2 + (1021,810) 1 + (1022,491) 0 + (1023,840) 2 + + + 1024x1024 GraphBLAS int32_t matrix, sparse by row + C_actual, 893 entries, memory: 28.2 KB + + (0,478) 1 + (0,574) 2 + (5,560) 3 + (6,996) 2 + (7,183) 0 + (7,666) 0 + (8,896) 2 + (9,187) 0 + (10,446) 2 + (11,46) 2 + (11,955) 2 + (12,397) 1 + (12,953) 0 + (13,192) 1 + (14,421) 1 + (15,568) 1 + (16,788) 1 + (16,904) 0 + (17,928) 0 + (19,821) 1 + (19,886) 0 + (20,474) 4 + (21,479) 1 + (21,975) 1 + (22,569) 1 + (23,310) 2 + (24,905) 0 + (25,241) 0 + (26,428) 0 + (28,107) 2 + (32,121) 0 + (33,81) 2 + (37,609) 2 + (39,698) 1 + (41,568) 1 + (42,324) 0 + (43,798) 1 + (46,208) 0 + (47,70) 1 + (48,336) 1 + (49,476) 1 + (50,35) 0 + (51,556) 0 + (52,999) 1 + (53,940) 1 + (54,558) 0 + (54,960) 1 + (55,979) 1 + (56,90) 2 + (57,846) 3 + (57,893) 0 + (58,35) 0 + (59,108) 3 + (60,479) 1 + (61,590) 2 + (62,771) 0 + (63,50) 0 + (64,268) 3 + (66,719) 2 + (67,411) 2 + (68,324) 0 + (69,477) 0 + (70,539) 1 + (71,228) 3 + (72,297) 3 + (73,665) 0 + (75,855) 0 + (76,248) 0 + (77,433) 4 + (78,90) 3 + (81,754) 4 + (82,243) 2 + (84,253) 1 + (86,104) 3 + (87,657) 0 + (89,825) 2 + (90,37) 4 + (91,234) 1 + (91,519) 1 + (92,74) 3 + (92,218) 1 + (92,690) 1 + (93,486) 2 + (94,637) 0 + (94,722) 1 + (96,564) 1 + (99,326) 2 + (100,281) 1 + (102,609) 2 + (104,644) 0 + (106,652) 1 + (107,239) 0 + (107,522) 2 + (108,131) 1 + (109,884) 2 + (110,402) 3 + (111,905) 2 + (112,127) 0 + (112,779) 0 + (113,278) 0 + (114,519) 1 + (115,240) 4 + (117,219) 0 + (117,338) 2 + (118,99) 4 + (120,477) 1 + (121,554) 3 + (121,715) 3 + (122,151) 3 + (125,177) 5 + (128,820) 6 + (129,660) 0 + (130,623) 1 + (131,253) 1 + (131,355) 1 + (133,492) 1 + (134,821) 0 + (135,295) 2 + (136,108) 3 + (137,834) 2 + (138,288) 1 + (139,284) 2 + (139,945) 0 + (141,199) 1 + (142,87) 4 + (142,225) 1 + (143,123) 0 + (144,574) 0 + (146,194) 3 + (148,357) 0 + (149,949) 1 + (150,717) 2 + (151,484) 2 + (156,290) 2 + (157,714) 0 + (157,974) 1 + (160,297) 1 + (162,601) 2 + (163,816) 3 + (164,221) 1 + (165,396) 1 + (166,801) 3 + (167,879) 3 + (168,321) 0 + (169,901) 3 + (172,951) 1 + (176,108) 1 + (176,188) 1 + (176,614) 2 + (176,781) 1 + (178,631) 1 + (179,932) 2 + (180,830) 3 + (182,675) 1 + (182,1001) 2 + (183,692) 1 + (184,143) 2 + (185,450) 1 + (186,779) 0 + (187,997) 3 + (188,357) 1 + (189,111) 2 + (190,990) 1 + (192,644) 0 + (192,953) 0 + (193,135) 1 + (194,137) 4 + (195,922) 4 + (197,859) 1 + (198,910) 1 + (199,531) 3 + (201,907) 0 + (202,863) 1 + (203,865) 4 + (204,614) 3 + (207,826) 1 + (208,985) 2 + (209,808) 3 + (211,71) 4 + (211,931) 3 + (212,426) 0 + (213,152) 0 + (214,928) 0 + (215,268) 3 + (216,550) 3 + (217,921) 0 + (218,704) 2 + (218,922) 2 + (219,66) 1 + (220,704) 2 + (221,56) 1 + (221,551) 2 + (222,545) 1 + (223,1016) 2 + (224,721) 1 + (225,935) 1 + (226,727) 0 + (228,743) 4 + (229,535) 2 + (231,551) 3 + (232,897) 2 + (234,520) 2 + (235,522) 2 + (236,221) 3 + (237,755) 2 + (238,964) 2 + (239,82) 0 + (240,388) 0 + (241,500) 2 + (242,124) 3 + (242,193) 0 + (243,300) 0 + (244,588) 0 + (244,1004) 3 + (245,494) 0 + (246,326) 1 + (247,115) 1 + (247,147) 1 + (248,233) 0 + (250,485) 6 + (251,708) 0 + (252,197) 1 + (253,485) 5 + (254,40) 3 + (254,238) 0 + (255,895) 3 + (256,114) 0 + (257,461) 2 + (257,796) 0 + (258,233) 1 + (260,884) 2 + (261,945) 1 + (262,368) 2 + (264,755) 1 + (265,124) 1 + (266,352) 3 + (267,10) 1 + (268,234) 1 + (269,400) 1 + (270,877) 0 + (270,924) 0 + (271,944) 0 + (272,67) 3 + (273,100) 1 + (274,979) 4 + (276,333) 2 + (277,377) 0 + (279,877) 1 + (280,18) 3 + (281,449) 3 + (282,179) 2 + (283,1007) 2 + (285,32) 1 + (286,37) 2 + (287,394) 3 + (288,848) 0 + (290,317) 0 + (291,594) 1 + (294,466) 2 + (294,960) 0 + (295,1) 0 + (295,106) 2 + (296,109) 2 + (296,183) 0 + (296,245) 0 + (297,912) 1 + (299,159) 1 + (300,554) 1 + (301,774) 1 + (302,30) 1 + (303,645) 1 + (304,229) 1 + (305,622) 0 + (307,264) 3 + (308,28) 0 + (309,328) 4 + (309,627) 0 + (310,357) 1 + (311,355) 1 + (312,61) 2 + (314,571) 3 + (315,177) 3 + (315,741) 0 + (316,177) 3 + (316,308) 4 + (320,468) 1 + (321,73) 0 + (322,235) 2 + (323,375) 3 + (323,651) 3 + (324,549) 2 + (325,306) 1 + (325,487) 1 + (326,649) 2 + (327,704) 0 + (329,176) 2 + (330,848) 1 + (330,965) 2 + (332,795) 1 + (334,695) 1 + (336,808) 4 + (337,608) 1 + (338,993) 2 + (339,680) 0 + (340,849) 1 + (342,723) 2 + (343,678) 2 + (344,384) 3 + (344,680) 0 + (345,75) 0 + (347,996) 3 + (348,60) 3 + (348,821) 1 + (350,282) 1 + (352,160) 2 + (353,536) 1 + (355,352) 5 + (356,340) 2 + (358,678) 2 + (360,679) 1 + (361,794) 0 + (361,989) 3 + (362,816) 2 + (363,206) 4 + (364,629) 0 + (365,990) 0 + (366,841) 1 + (366,971) 0 + (367,888) 2 + (368,587) 0 + (369,684) 3 + (370,270) 1 + (372,471) 1 + (373,88) 1 + (375,992) 2 + (376,336) 3 + (377,86) 1 + (378,882) 1 + (379,592) 2 + (380,77) 2 + (380,643) 2 + (381,1012) 2 + (382,816) 2 + (383,711) 2 + (385,670) 1 + (386,537) 1 + (387,347) 2 + (388,494) 1 + (389,328) 3 + (390,551) 1 + (391,59) 2 + (391,600) 1 + (394,692) 4 + (396,645) 2 + (398,107) 3 + (398,246) 2 + (399,436) 3 + (400,172) 0 + (401,790) 3 + (402,320) 2 + (403,40) 2 + (404,641) 0 + (405,49) 0 + (405,475) 1 + (407,320) 3 + (408,61) 4 + (410,754) 3 + (411,643) 2 + (412,949) 1 + (413,94) 5 + (415,26) 1 + (416,575) 0 + (417,366) 3 + (418,160) 0 + (419,209) 1 + (421,614) 1 + (422,177) 2 + (423,873) 1 + (424,542) 3 + (425,263) 0 + (426,377) 0 + (427,149) 0 + (429,305) 0 + (430,718) 1 + (431,51) 0 + (432,857) 2 + (434,604) 0 + (435,152) 2 + (436,356) 1 + (437,105) 3 + (440,338) 0 + (441,982) 2 + (442,880) 1 + (443,753) 1 + (446,741) 0 + (448,646) 0 + (448,744) 2 + (450,579) 1 + (451,147) 0 + (451,1017) 0 + (452,868) 3 + (453,26) 1 + (454,415) 1 + (454,668) 0 + (455,43) 0 + (456,849) 1 + (456,985) 2 + (457,218) 2 + (458,510) 4 + (459,737) 2 + (460,836) 2 + (461,849) 0 + (461,917) 2 + (462,900) 1 + (463,316) 1 + (464,762) 1 + (465,355) 1 + (465,801) 1 + (466,673) 0 + (468,288) 1 + (470,889) 2 + (471,650) 1 + (473,121) 1 + (473,127) 2 + (474,487) 0 + (476,44) 0 + (477,342) 1 + (480,667) 1 + (481,558) 0 + (482,680) 1 + (483,517) 1 + (484,961) 1 + (485,274) 0 + (486,1015) 3 + (487,194) 1 + (489,802) 2 + (490,811) 1 + (491,319) 4 + (492,377) 1 + (494,432) 1 + (495,809) 0 + (496,267) 2 + (496,902) 1 + (498,194) 1 + (500,84) 0 + (501,704) 2 + (503,519) 2 + (504,510) 3 + (505,574) 1 + (507,643) 3 + (508,449) 3 + (512,892) 3 + (513,271) 2 + (517,369) 1 + (518,293) 2 + (520,270) 1 + (521,1013) 1 + (522,284) 1 + (524,945) 1 + (525,94) 5 + (525,362) 2 + (526,52) 1 + (527,61) 3 + (529,998) 0 + (531,908) 1 + (533,674) 4 + (535,660) 1 + (535,776) 1 + (536,500) 3 + (537,799) 2 + (538,492) 2 + (538,861) 1 + (540,245) 0 + (542,137) 2 + (545,658) 0 + (546,213) 1 + (547,767) 1 + (547,912) 3 + (547,1018) 1 + (548,46) 2 + (548,697) 0 + (549,602) 2 + (550,927) 2 + (553,391) 1 + (554,351) 2 + (555,10) 2 + (556,26) 2 + (557,910) 0 + (560,792) 0 + (562,182) 0 + (562,862) 1 + (563,877) 0 + (564,310) 3 + (564,609) 3 + (565,490) 0 + (566,564) 2 + (566,607) 1 + (569,872) 0 + (570,465) 1 + (571,271) 3 + (571,919) 1 + (574,603) 0 + (576,256) 4 + (579,274) 0 + (580,182) 0 + (581,445) 0 + (582,177) 3 + (583,118) 0 + (584,399) 1 + (585,433) 4 + (587,254) 2 + (588,914) 2 + (589,1016) 3 + (590,95) 3 + (590,802) 2 + (592,527) 0 + (593,143) 2 + (594,430) 0 + (595,787) 2 + (598,788) 1 + (599,127) 3 + (601,478) 2 + (602,218) 0 + (603,759) 1 + (604,270) 1 + (605,76) 3 + (606,930) 0 + (608,832) 1 + (609,287) 1 + (610,794) 0 + (611,759) 1 + (613,398) 3 + (614,386) 4 + (615,115) 0 + (616,928) 0 + (617,30) 2 + (618,361) 5 + (619,996) 4 + (620,5) 3 + (621,41) 0 + (623,44) 2 + (624,19) 1 + (624,242) 2 + (624,524) 1 + (626,51) 0 + (627,361) 1 + (628,396) 3 + (629,882) 1 + (630,341) 1 + (631,49) 1 + (631,585) 1 + (632,73) 1 + (634,912) 2 + (635,882) 1 + (636,617) 1 + (637,716) 0 + (638,113) 1 + (639,616) 5 + (640,837) 2 + (641,457) 1 + (643,934) 3 + (647,783) 2 + (648,195) 1 + (649,614) 1 + (650,957) 1 + (651,281) 2 + (652,973) 1 + (653,60) 1 + (653,333) 2 + (654,605) 3 + (655,910) 0 + (656,349) 3 + (660,591) 4 + (661,512) 2 + (663,767) 0 + (665,77) 3 + (666,503) 4 + (667,951) 2 + (668,365) 4 + (669,300) 1 + (671,141) 1 + (671,565) 2 + (672,819) 1 + (674,819) 1 + (675,454) 0 + (676,242) 2 + (677,289) 4 + (678,802) 3 + (680,398) 1 + (681,390) 1 + (682,117) 4 + (683,110) 2 + (684,907) 0 + (686,202) 0 + (687,45) 1 + (688,287) 2 + (689,502) 3 + (690,299) 3 + (691,392) 2 + (692,600) 0 + (694,378) 1 + (695,702) 1 + (696,102) 2 + (698,631) 0 + (699,152) 1 + (700,840) 1 + (702,777) 1 + (703,132) 1 + (704,374) 1 + (705,579) 1 + (706,511) 3 + (707,76) 3 + (708,259) 2 + (708,925) 0 + (709,872) 1 + (709,873) 1 + (710,107) 3 + (710,293) 2 + (711,462) 0 + (714,475) 2 + (715,172) 0 + (715,751) 2 + (716,697) 0 + (717,234) 0 + (718,848) 2 + (719,331) 1 + (720,201) 1 + (720,725) 2 + (722,415) 2 + (722,934) 2 + (723,675) 2 + (724,480) 3 + (727,177) 4 + (728,797) 1 + (729,884) 1 + (730,767) 0 + (731,275) 1 + (732,910) 0 + (733,763) 5 + (734,574) 0 + (735,268) 3 + (736,115) 1 + (737,912) 2 + (738,1023) 2 + (739,335) 0 + (740,596) 3 + (741,365) 1 + (742,485) 5 + (743,186) 1 + (745,645) 2 + (746,273) 3 + (747,91) 5 + (748,886) 0 + (749,59) 2 + (749,755) 2 + (751,348) 0 + (752,313) 2 + (752,742) 0 + (752,745) 1 + (753,472) 1 + (753,592) 1 + (754,1007) 0 + (756,633) 1 + (758,847) 2 + (759,500) 3 + (760,340) 2 + (760,381) 2 + (762,962) 3 + (763,954) 0 + (764,392) 1 + (764,913) 3 + (766,915) 3 + (766,936) 0 + (767,372) 1 + (768,307) 0 + (770,458) 0 + (771,487) 0 + (773,56) 1 + (774,773) 0 + (775,115) 1 + (776,537) 1 + (777,392) 1 + (778,893) 0 + (779,644) 0 + (780,256) 2 + (782,399) 1 + (782,892) 2 + (783,614) 2 + (785,816) 1 + (786,462) 1 + (787,876) 1 + (788,273) 4 + (789,696) 2 + (790,471) 1 + (791,793) 3 + (792,636) 3 + (792,955) 3 + (793,809) 0 + (794,986) 1 + (795,656) 0 + (796,347) 3 + (797,880) 2 + (798,802) 0 + (801,130) 1 + (803,896) 3 + (804,1022) 3 + (805,32) 1 + (805,479) 1 + (806,889) 2 + (807,504) 3 + (809,719) 1 + (809,737) 2 + (810,646) 0 + (812,375) 3 + (813,200) 2 + (815,408) 3 + (816,902) 1 + (817,430) 1 + (818,985) 5 + (819,688) 1 + (821,839) 1 + (822,747) 1 + (823,39) 1 + (824,886) 0 + (825,406) 0 + (828,407) 2 + (829,511) 1 + (830,915) 2 + (831,982) 1 + (832,1003) 2 + (833,362) 2 + (833,999) 2 + (834,136) 2 + (834,295) 1 + (835,115) 1 + (836,218) 2 + (837,565) 4 + (839,541) 0 + (839,711) 0 + (840,159) 1 + (841,636) 1 + (842,136) 2 + (843,524) 0 + (844,114) 0 + (846,533) 1 + (847,741) 0 + (848,483) 1 + (849,464) 3 + (850,302) 0 + (851,567) 1 + (852,150) 4 + (852,529) 0 + (853,623) 1 + (855,106) 2 + (856,1014) 1 + (857,151) 2 + (857,650) 1 + (858,781) 1 + (858,994) 0 + (859,508) 0 + (859,716) 0 + (862,636) 2 + (863,21) 4 + (864,1022) 2 + (866,97) 0 + (867,48) 1 + (868,303) 1 + (869,364) 4 + (871,453) 1 + (873,173) 0 + (874,485) 7 + (875,168) 1 + (876,357) 0 + (877,722) 1 + (877,990) 0 + (880,176) 2 + (881,23) 1 + (882,608) 0 + (884,643) 3 + (885,687) 0 + (887,487) 0 + (888,110) 2 + (888,943) 0 + (889,892) 3 + (890,628) 2 + (891,679) 1 + (892,653) 2 + (894,33) 0 + (895,37) 2 + (895,695) 0 + (896,390) 0 + (897,42) 2 + (900,687) 0 + (901,605) 2 + (902,57) 1 + (903,1021) 1 + (904,808) 4 + (905,795) 3 + (906,479) 0 + (907,674) 2 + (909,456) 2 + (911,548) 1 + (914,924) 1 + (915,366) 2 + (915,502) 3 + (916,420) 3 + (916,823) 1 + (918,480) 3 + (920,608) 1 + (925,685) 0 + (926,755) 4 + (929,538) 0 + (930,13) 1 + (931,479) 3 + (933,860) 0 + (934,165) 0 + (935,351) 2 + (936,399) 1 + (938,215) 0 + (939,496) 0 + (940,414) 0 + (941,586) 5 + (942,356) 1 + (943,31) 4 + (943,538) 0 + (944,109) 3 + (945,671) 1 + (946,246) 3 + (947,182) 0 + (948,628) 2 + (949,316) 0 + (950,1017) 0 + (951,221) 2 + (955,457) 1 + (955,823) 0 + (956,653) 2 + (957,656) 0 + (958,644) 0 + (959,667) 2 + (960,78) 3 + (961,828) 4 + (962,877) 1 + (963,397) 1 + (964,370) 1 + (965,504) 3 + (966,483) 2 + (967,1023) 2 + (968,400) 0 + (969,564) 1 + (970,856) 1 + (971,875) 1 + (972,549) 1 + (974,934) 2 + (977,347) 3 + (978,123) 0 + (981,175) 3 + (983,58) 1 + (984,449) 1 + (984,582) 2 + (985,72) 1 + (985,743) 2 + (987,120) 2 + (987,340) 4 + (988,172) 0 + (989,585) 2 + (991,660) 1 + (992,531) 3 + (993,87) 2 + (993,674) 2 + (994,992) 2 + (995,170) 2 + (997,946) 1 + (998,678) 2 + (1001,877) 1 + (1002,286) 2 + (1004,250) 3 + (1006,1022) 3 + (1008,159) 1 + (1009,574) 0 + (1012,533) 1 + (1013,574) 1 + (1014,667) 3 + (1015,127) 1 + (1015,613) 2 + (1016,457) 1 + (1017,180) 2 + (1018,254) 2 + (1019,287) 3 + (1020,67) 3 + (1020,151) 2 + (1021,810) 1 + (1022,491) 0 + (1023,840) 2 + + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes + + 1024x1024 GraphBLAS double matrix, sparse by row + Diff actual, 893 entries, memory: 32.2 KB + + (0,478) 0 + (0,574) 0 + (5,560) 0 + (6,996) 0 + (7,183) 0 + (7,666) 0 + (8,896) 0 + (9,187) 0 + (10,446) 0 + (11,46) 0 + (11,955) 0 + (12,397) 0 + (12,953) 0 + (13,192) 0 + (14,421) 0 + (15,568) 0 + (16,788) 0 + (16,904) 0 + (17,928) 0 + (19,821) 0 + (19,886) 0 + (20,474) 0 + (21,479) 0 + (21,975) 0 + (22,569) 0 + (23,310) 0 + (24,905) 0 + (25,241) 0 + (26,428) 0 + (28,107) 0 + (32,121) 0 + (33,81) 0 + (37,609) 0 + (39,698) 0 + (41,568) 0 + (42,324) 0 + (43,798) 0 + (46,208) 0 + (47,70) 0 + (48,336) 0 + (49,476) 0 + (50,35) 0 + (51,556) 0 + (52,999) 0 + (53,940) 0 + (54,558) 0 + (54,960) 0 + (55,979) 0 + (56,90) 0 + (57,846) 0 + (57,893) 0 + (58,35) 0 + (59,108) 0 + (60,479) 0 + (61,590) 0 + (62,771) 0 + (63,50) 0 + (64,268) 0 + (66,719) 0 + (67,411) 0 + (68,324) 0 + (69,477) 0 + (70,539) 0 + (71,228) 0 + (72,297) 0 + (73,665) 0 + (75,855) 0 + (76,248) 0 + (77,433) 0 + (78,90) 0 + (81,754) 0 + (82,243) 0 + (84,253) 0 + (86,104) 0 + (87,657) 0 + (89,825) 0 + (90,37) 0 + (91,234) 0 + (91,519) 0 + (92,74) 0 + (92,218) 0 + (92,690) 0 + (93,486) 0 + (94,637) 0 + (94,722) 0 + (96,564) 0 + (99,326) 0 + (100,281) 0 + (102,609) 0 + (104,644) 0 + (106,652) 0 + (107,239) 0 + (107,522) 0 + (108,131) 0 + (109,884) 0 + (110,402) 0 + (111,905) 0 + (112,127) 0 + (112,779) 0 + (113,278) 0 + (114,519) 0 + (115,240) 0 + (117,219) 0 + (117,338) 0 + (118,99) 0 + (120,477) 0 + (121,554) 0 + (121,715) 0 + (122,151) 0 + (125,177) 0 + (128,820) 0 + (129,660) 0 + (130,623) 0 + (131,253) 0 + (131,355) 0 + (133,492) 0 + (134,821) 0 + (135,295) 0 + (136,108) 0 + (137,834) 0 + (138,288) 0 + (139,284) 0 + (139,945) 0 + (141,199) 0 + (142,87) 0 + (142,225) 0 + (143,123) 0 + (144,574) 0 + (146,194) 0 + (148,357) 0 + (149,949) 0 + (150,717) 0 + (151,484) 0 + (156,290) 0 + (157,714) 0 + (157,974) 0 + (160,297) 0 + (162,601) 0 + (163,816) 0 + (164,221) 0 + (165,396) 0 + (166,801) 0 + (167,879) 0 + (168,321) 0 + (169,901) 0 + (172,951) 0 + (176,108) 0 + (176,188) 0 + (176,614) 0 + (176,781) 0 + (178,631) 0 + (179,932) 0 + (180,830) 0 + (182,675) 0 + (182,1001) 0 + (183,692) 0 + (184,143) 0 + (185,450) 0 + (186,779) 0 + (187,997) 0 + (188,357) 0 + (189,111) 0 + (190,990) 0 + (192,644) 0 + (192,953) 0 + (193,135) 0 + (194,137) 0 + (195,922) 0 + (197,859) 0 + (198,910) 0 + (199,531) 0 + (201,907) 0 + (202,863) 0 + (203,865) 0 + (204,614) 0 + (207,826) 0 + (208,985) 0 + (209,808) 0 + (211,71) 0 + (211,931) 0 + (212,426) 0 + (213,152) 0 + (214,928) 0 + (215,268) 0 + (216,550) 0 + (217,921) 0 + (218,704) 0 + (218,922) 0 + (219,66) 0 + (220,704) 0 + (221,56) 0 + (221,551) 0 + (222,545) 0 + (223,1016) 0 + (224,721) 0 + (225,935) 0 + (226,727) 0 + (228,743) 0 + (229,535) 0 + (231,551) 0 + (232,897) 0 + (234,520) 0 + (235,522) 0 + (236,221) 0 + (237,755) 0 + (238,964) 0 + (239,82) 0 + (240,388) 0 + (241,500) 0 + (242,124) 0 + (242,193) 0 + (243,300) 0 + (244,588) 0 + (244,1004) 0 + (245,494) 0 + (246,326) 0 + (247,115) 0 + (247,147) 0 + (248,233) 0 + (250,485) 0 + (251,708) 0 + (252,197) 0 + (253,485) 0 + (254,40) 0 + (254,238) 0 + (255,895) 0 + (256,114) 0 + (257,461) 0 + (257,796) 0 + (258,233) 0 + (260,884) 0 + (261,945) 0 + (262,368) 0 + (264,755) 0 + (265,124) 0 + (266,352) 0 + (267,10) 0 + (268,234) 0 + (269,400) 0 + (270,877) 0 + (270,924) 0 + (271,944) 0 + (272,67) 0 + (273,100) 0 + (274,979) 0 + (276,333) 0 + (277,377) 0 + (279,877) 0 + (280,18) 0 + (281,449) 0 + (282,179) 0 + (283,1007) 0 + (285,32) 0 + (286,37) 0 + (287,394) 0 + (288,848) 0 + (290,317) 0 + (291,594) 0 + (294,466) 0 + (294,960) 0 + (295,1) 0 + (295,106) 0 + (296,109) 0 + (296,183) 0 + (296,245) 0 + (297,912) 0 + (299,159) 0 + (300,554) 0 + (301,774) 0 + (302,30) 0 + (303,645) 0 + (304,229) 0 + (305,622) 0 + (307,264) 0 + (308,28) 0 + (309,328) 0 + (309,627) 0 + (310,357) 0 + (311,355) 0 + (312,61) 0 + (314,571) 0 + (315,177) 0 + (315,741) 0 + (316,177) 0 + (316,308) 0 + (320,468) 0 + (321,73) 0 + (322,235) 0 + (323,375) 0 + (323,651) 0 + (324,549) 0 + (325,306) 0 + (325,487) 0 + (326,649) 0 + (327,704) 0 + (329,176) 0 + (330,848) 0 + (330,965) 0 + (332,795) 0 + (334,695) 0 + (336,808) 0 + (337,608) 0 + (338,993) 0 + (339,680) 0 + (340,849) 0 + (342,723) 0 + (343,678) 0 + (344,384) 0 + (344,680) 0 + (345,75) 0 + (347,996) 0 + (348,60) 0 + (348,821) 0 + (350,282) 0 + (352,160) 0 + (353,536) 0 + (355,352) 0 + (356,340) 0 + (358,678) 0 + (360,679) 0 + (361,794) 0 + (361,989) 0 + (362,816) 0 + (363,206) 0 + (364,629) 0 + (365,990) 0 + (366,841) 0 + (366,971) 0 + (367,888) 0 + (368,587) 0 + (369,684) 0 + (370,270) 0 + (372,471) 0 + (373,88) 0 + (375,992) 0 + (376,336) 0 + (377,86) 0 + (378,882) 0 + (379,592) 0 + (380,77) 0 + (380,643) 0 + (381,1012) 0 + (382,816) 0 + (383,711) 0 + (385,670) 0 + (386,537) 0 + (387,347) 0 + (388,494) 0 + (389,328) 0 + (390,551) 0 + (391,59) 0 + (391,600) 0 + (394,692) 0 + (396,645) 0 + (398,107) 0 + (398,246) 0 + (399,436) 0 + (400,172) 0 + (401,790) 0 + (402,320) 0 + (403,40) 0 + (404,641) 0 + (405,49) 0 + (405,475) 0 + (407,320) 0 + (408,61) 0 + (410,754) 0 + (411,643) 0 + (412,949) 0 + (413,94) 0 + (415,26) 0 + (416,575) 0 + (417,366) 0 + (418,160) 0 + (419,209) 0 + (421,614) 0 + (422,177) 0 + (423,873) 0 + (424,542) 0 + (425,263) 0 + (426,377) 0 + (427,149) 0 + (429,305) 0 + (430,718) 0 + (431,51) 0 + (432,857) 0 + (434,604) 0 + (435,152) 0 + (436,356) 0 + (437,105) 0 + (440,338) 0 + (441,982) 0 + (442,880) 0 + (443,753) 0 + (446,741) 0 + (448,646) 0 + (448,744) 0 + (450,579) 0 + (451,147) 0 + (451,1017) 0 + (452,868) 0 + (453,26) 0 + (454,415) 0 + (454,668) 0 + (455,43) 0 + (456,849) 0 + (456,985) 0 + (457,218) 0 + (458,510) 0 + (459,737) 0 + (460,836) 0 + (461,849) 0 + (461,917) 0 + (462,900) 0 + (463,316) 0 + (464,762) 0 + (465,355) 0 + (465,801) 0 + (466,673) 0 + (468,288) 0 + (470,889) 0 + (471,650) 0 + (473,121) 0 + (473,127) 0 + (474,487) 0 + (476,44) 0 + (477,342) 0 + (480,667) 0 + (481,558) 0 + (482,680) 0 + (483,517) 0 + (484,961) 0 + (485,274) 0 + (486,1015) 0 + (487,194) 0 + (489,802) 0 + (490,811) 0 + (491,319) 0 + (492,377) 0 + (494,432) 0 + (495,809) 0 + (496,267) 0 + (496,902) 0 + (498,194) 0 + (500,84) 0 + (501,704) 0 + (503,519) 0 + (504,510) 0 + (505,574) 0 + (507,643) 0 + (508,449) 0 + (512,892) 0 + (513,271) 0 + (517,369) 0 + (518,293) 0 + (520,270) 0 + (521,1013) 0 + (522,284) 0 + (524,945) 0 + (525,94) 0 + (525,362) 0 + (526,52) 0 + (527,61) 0 + (529,998) 0 + (531,908) 0 + (533,674) 0 + (535,660) 0 + (535,776) 0 + (536,500) 0 + (537,799) 0 + (538,492) 0 + (538,861) 0 + (540,245) 0 + (542,137) 0 + (545,658) 0 + (546,213) 0 + (547,767) 0 + (547,912) 0 + (547,1018) 0 + (548,46) 0 + (548,697) 0 + (549,602) 0 + (550,927) 0 + (553,391) 0 + (554,351) 0 + (555,10) 0 + (556,26) 0 + (557,910) 0 + (560,792) 0 + (562,182) 0 + (562,862) 0 + (563,877) 0 + (564,310) 0 + (564,609) 0 + (565,490) 0 + (566,564) 0 + (566,607) 0 + (569,872) 0 + (570,465) 0 + (571,271) 0 + (571,919) 0 + (574,603) 0 + (576,256) 0 + (579,274) 0 + (580,182) 0 + (581,445) 0 + (582,177) 0 + (583,118) 0 + (584,399) 0 + (585,433) 0 + (587,254) 0 + (588,914) 0 + (589,1016) 0 + (590,95) 0 + (590,802) 0 + (592,527) 0 + (593,143) 0 + (594,430) 0 + (595,787) 0 + (598,788) 0 + (599,127) 0 + (601,478) 0 + (602,218) 0 + (603,759) 0 + (604,270) 0 + (605,76) 0 + (606,930) 0 + (608,832) 0 + (609,287) 0 + (610,794) 0 + (611,759) 0 + (613,398) 0 + (614,386) 0 + (615,115) 0 + (616,928) 0 + (617,30) 0 + (618,361) 0 + (619,996) 0 + (620,5) 0 + (621,41) 0 + (623,44) 0 + (624,19) 0 + (624,242) 0 + (624,524) 0 + (626,51) 0 + (627,361) 0 + (628,396) 0 + (629,882) 0 + (630,341) 0 + (631,49) 0 + (631,585) 0 + (632,73) 0 + (634,912) 0 + (635,882) 0 + (636,617) 0 + (637,716) 0 + (638,113) 0 + (639,616) 0 + (640,837) 0 + (641,457) 0 + (643,934) 0 + (647,783) 0 + (648,195) 0 + (649,614) 0 + (650,957) 0 + (651,281) 0 + (652,973) 0 + (653,60) 0 + (653,333) 0 + (654,605) 0 + (655,910) 0 + (656,349) 0 + (660,591) 0 + (661,512) 0 + (663,767) 0 + (665,77) 0 + (666,503) 0 + (667,951) 0 + (668,365) 0 + (669,300) 0 + (671,141) 0 + (671,565) 0 + (672,819) 0 + (674,819) 0 + (675,454) 0 + (676,242) 0 + (677,289) 0 + (678,802) 0 + (680,398) 0 + (681,390) 0 + (682,117) 0 + (683,110) 0 + (684,907) 0 + (686,202) 0 + (687,45) 0 + (688,287) 0 + (689,502) 0 + (690,299) 0 + (691,392) 0 + (692,600) 0 + (694,378) 0 + (695,702) 0 + (696,102) 0 + (698,631) 0 + (699,152) 0 + (700,840) 0 + (702,777) 0 + (703,132) 0 + (704,374) 0 + (705,579) 0 + (706,511) 0 + (707,76) 0 + (708,259) 0 + (708,925) 0 + (709,872) 0 + (709,873) 0 + (710,107) 0 + (710,293) 0 + (711,462) 0 + (714,475) 0 + (715,172) 0 + (715,751) 0 + (716,697) 0 + (717,234) 0 + (718,848) 0 + (719,331) 0 + (720,201) 0 + (720,725) 0 + (722,415) 0 + (722,934) 0 + (723,675) 0 + (724,480) 0 + (727,177) 0 + (728,797) 0 + (729,884) 0 + (730,767) 0 + (731,275) 0 + (732,910) 0 + (733,763) 0 + (734,574) 0 + (735,268) 0 + (736,115) 0 + (737,912) 0 + (738,1023) 0 + (739,335) 0 + (740,596) 0 + (741,365) 0 + (742,485) 0 + (743,186) 0 + (745,645) 0 + (746,273) 0 + (747,91) 0 + (748,886) 0 + (749,59) 0 + (749,755) 0 + (751,348) 0 + (752,313) 0 + (752,742) 0 + (752,745) 0 + (753,472) 0 + (753,592) 0 + (754,1007) 0 + (756,633) 0 + (758,847) 0 + (759,500) 0 + (760,340) 0 + (760,381) 0 + (762,962) 0 + (763,954) 0 + (764,392) 0 + (764,913) 0 + (766,915) 0 + (766,936) 0 + (767,372) 0 + (768,307) 0 + (770,458) 0 + (771,487) 0 + (773,56) 0 + (774,773) 0 + (775,115) 0 + (776,537) 0 + (777,392) 0 + (778,893) 0 + (779,644) 0 + (780,256) 0 + (782,399) 0 + (782,892) 0 + (783,614) 0 + (785,816) 0 + (786,462) 0 + (787,876) 0 + (788,273) 0 + (789,696) 0 + (790,471) 0 + (791,793) 0 + (792,636) 0 + (792,955) 0 + (793,809) 0 + (794,986) 0 + (795,656) 0 + (796,347) 0 + (797,880) 0 + (798,802) 0 + (801,130) 0 + (803,896) 0 + (804,1022) 0 + (805,32) 0 + (805,479) 0 + (806,889) 0 + (807,504) 0 + (809,719) 0 + (809,737) 0 + (810,646) 0 + (812,375) 0 + (813,200) 0 + (815,408) 0 + (816,902) 0 + (817,430) 0 + (818,985) 0 + (819,688) 0 + (821,839) 0 + (822,747) 0 + (823,39) 0 + (824,886) 0 + (825,406) 0 + (828,407) 0 + (829,511) 0 + (830,915) 0 + (831,982) 0 + (832,1003) 0 + (833,362) 0 + (833,999) 0 + (834,136) 0 + (834,295) 0 + (835,115) 0 + (836,218) 0 + (837,565) 0 + (839,541) 0 + (839,711) 0 + (840,159) 0 + (841,636) 0 + (842,136) 0 + (843,524) 0 + (844,114) 0 + (846,533) 0 + (847,741) 0 + (848,483) 0 + (849,464) 0 + (850,302) 0 + (851,567) 0 + (852,150) 0 + (852,529) 0 + (853,623) 0 + (855,106) 0 + (856,1014) 0 + (857,151) 0 + (857,650) 0 + (858,781) 0 + (858,994) 0 + (859,508) 0 + (859,716) 0 + (862,636) 0 + (863,21) 0 + (864,1022) 0 + (866,97) 0 + (867,48) 0 + (868,303) 0 + (869,364) 0 + (871,453) 0 + (873,173) 0 + (874,485) 0 + (875,168) 0 + (876,357) 0 + (877,722) 0 + (877,990) 0 + (880,176) 0 + (881,23) 0 + (882,608) 0 + (884,643) 0 + (885,687) 0 + (887,487) 0 + (888,110) 0 + (888,943) 0 + (889,892) 0 + (890,628) 0 + (891,679) 0 + (892,653) 0 + (894,33) 0 + (895,37) 0 + (895,695) 0 + (896,390) 0 + (897,42) 0 + (900,687) 0 + (901,605) 0 + (902,57) 0 + (903,1021) 0 + (904,808) 0 + (905,795) 0 + (906,479) 0 + (907,674) 0 + (909,456) 0 + (911,548) 0 + (914,924) 0 + (915,366) 0 + (915,502) 0 + (916,420) 0 + (916,823) 0 + (918,480) 0 + (920,608) 0 + (925,685) 0 + (926,755) 0 + (929,538) 0 + (930,13) 0 + (931,479) 0 + (933,860) 0 + (934,165) 0 + (935,351) 0 + (936,399) 0 + (938,215) 0 + (939,496) 0 + (940,414) 0 + (941,586) 0 + (942,356) 0 + (943,31) 0 + (943,538) 0 + (944,109) 0 + (945,671) 0 + (946,246) 0 + (947,182) 0 + (948,628) 0 + (949,316) 0 + (950,1017) 0 + (951,221) 0 + (955,457) 0 + (955,823) 0 + (956,653) 0 + (957,656) 0 + (958,644) 0 + (959,667) 0 + (960,78) 0 + (961,828) 0 + (962,877) 0 + (963,397) 0 + (964,370) 0 + (965,504) 0 + (966,483) 0 + (967,1023) 0 + (968,400) 0 + (969,564) 0 + (970,856) 0 + (971,875) 0 + (972,549) 0 + (974,934) 0 + (977,347) 0 + (978,123) 0 + (981,175) 0 + (983,58) 0 + (984,449) 0 + (984,582) 0 + (985,72) 0 + (985,743) 0 + (987,120) 0 + (987,340) 0 + (988,172) 0 + (989,585) 0 + (991,660) 0 + (992,531) 0 + (993,87) 0 + (993,674) 0 + (994,992) 0 + (995,170) 0 + (997,946) 0 + (998,678) 0 + (1001,877) 0 + (1002,286) 0 + (1004,250) 0 + (1006,1022) 0 + (1008,159) 0 + (1009,574) 0 + (1012,533) 0 + (1013,574) 0 + (1014,667) 0 + (1015,127) 0 + (1015,613) 0 + (1016,457) 0 + (1017,180) 0 + (1018,254) 0 + (1019,287) 0 + (1020,67) 0 + (1020,151) 0 + (1021,810) 0 + (1022,491) 0 + (1023,840) 0 + + + 1024x1024 GraphBLAS bool matrix, sparse by row + T actual, 893 entries, memory: 25.2 KB + + (0,478) 1 + (0,574) 1 + (5,560) 1 + (6,996) 1 + (7,183) 1 + (7,666) 1 + (8,896) 1 + (9,187) 1 + (10,446) 1 + (11,46) 1 + (11,955) 1 + (12,397) 1 + (12,953) 1 + (13,192) 1 + (14,421) 1 + (15,568) 1 + (16,788) 1 + (16,904) 1 + (17,928) 1 + (19,821) 1 + (19,886) 1 + (20,474) 1 + (21,479) 1 + (21,975) 1 + (22,569) 1 + (23,310) 1 + (24,905) 1 + (25,241) 1 + (26,428) 1 + ... + work:893 gpus:0 Getting test data +Creating problem gen +filling matrices +inside fill, using seed 4567 +fill_random nrows=1024ncols=1024 need 1024 values, invsparse = 1024 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling sparse +1024 nonzeroes left to fill.. +35 nonzeroes left to fill.. +inside fill, using seed 4567 +fill_random nrows=1024ncols=1024 need 1024 values, invsparse = 1024 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling sparse +1024 nonzeroes left to fill.. +35 nonzeroes left to fill.. + rmm_wrap_alloc 8192 bytes +inside fill, using seed 543210 +fill_random nrows=1024ncols=1024 need 2048 values, invsparse = 512 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling sparse +2048 nonzeroes left to fill.. +504 nonzeroes left to fill.. + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 4096 bytes + rmm_wrap_alloc 16384 bytes + rmm_wrap_alloc 16384 bytes + rmm_wrap_alloc 16384 bytes + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 8192 bytes +inside fill, using seed 32 +fill_random nrows=1024ncols=1024 need 10240 values, invsparse = 103 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling sparse +10240 nonzeroes left to fill.. +4633 nonzeroes left to fill.. + rmm_wrap_alloc 16384 bytes + rmm_wrap_alloc 256 bytes +1024 slots to fill +all pairs to bucket 6, no filling +done assigning buckets +Building semiring factgory + calling stringify semiring: 0x7f1ff5416f00 +inside enumify: 0x7f1ff5416f00 + + GraphBLAS Semiring: semiring (user-defined) + GraphBLAS Monoid: semiring->add (built-in) + GraphBLAS BinaryOp: monoid->op (built-in) z=plus(x,y) + GraphBLAS type: ztype int32_t size: 4 + GraphBLAS type: xtype int32_t size: 4 + GraphBLAS type: ytype int32_t size: 4 + identity: [ 0 ] + + GraphBLAS BinaryOp: semiring->multiply (built-in) z=times(x,y) + GraphBLAS type: ztype int32_t size: 4 + GraphBLAS type: xtype int32_t size: 4 + GraphBLAS type: ytype int32_t size: 4 +Getting semiring add +Getting semiring mult +Getting semiring add op +Getting types +Getting opcodes +Getting typecodes +Performing asserts +Invoking boolean rename +Invoking boolean rename +Invoking enumify binop +e 14 +Invoking enumify monoid +Calling enumify binop +Inside plus binop code +e 11 +Calling enumify identity +Calling enumify terminal +Done enumify monoid +Done invoking enumify monoid +atype +btype +ctype +Invoking enumify_mask, mtype 0x7f2028b56f40 +GB_enumify_mask gets mcode: 6 Mask_struct: 0 Mask_comp: 0 +got mask_ecode: 8 +constructing semiring scode +before: add_ecode: 11, id_ecode: 0, term_ecode: 31, mult_ecode: 14, flipxy: 0, zcode: 6, xcode: 6, ycode: 6, mask_ecode: 8, ccode: 6, acode: 6, bcode: 6, csparsity: 0, msparsity: 0, asparsity: 0, bsparsity: 0 +serialized_scode: 397409434378593792 +done enumify semiring +scode=397409434378593792 +done stringify semiring + returned from stringify semiring + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 8192 bytes +1024 slots to fill +all pairs to bucket 6, no filling +done assigning buckets +bucket 6 has 1024 dots to do +LAUNCHING BUCKET CODE: 6 +INside get cached file +looking for prog in file /home/cjnolet/.SuiteSparse/GraphBLAS/6.3.0/GB_semiring_397409434378593792.h +opening /home/cjnolet/.SuiteSparse/GraphBLAS/6.3.0/GB_semiring_397409434378593792.h for write +about to close + read cache file /home/cjnolet/.SuiteSparse/GraphBLAS/6.3.0/GB_semiring_397409434378593792.h +successful_read: 1 +Just closed + jit_cache get program GB_jit_AxB_dot3_phase3_vssp +found memory-cached prog GB_jit_AxB_dot3_phase3_vssp + got kernel instance AxB_dot3_phase3_vssp_int32_t_int32_t_int32_t +found memory-cached prog AxB_dot3_phase3_vssp_int32_t_int32_t_int32_t +Launching _Z20AxB_dot3_phase3_vsspIiiiEvxxPxP16GB_Matrix_opaqueS2_S2_S2_i<<<32,32,0,0>>>(long,long,long*,GB_Matrix_opaque*,GB_Matrix_opaque*,GB_Matrix_opaque*,GB_Matrix_opaque*,int) +Printing bucketp +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +Done. +returned from kernel 1.00352ms + + 1024x1024 GraphBLAS int32_t matrix, sparse by row + sparsity control: sparse only + C GPU, 1024 entries, memory: 28.2 KB + pending tuples: 0 max pending: 0 zombies: 1001 + + (0,478) zombie + (0,574) zombie + (2,376) zombie + (5,560) zombie + (6,996) zombie + (7,183) zombie + (7,666) zombie + (8,896) zombie + (9,187) zombie + (10,446) zombie + (11,46) zombie + (11,955) zombie + (12,397) zombie + (12,953) zombie + (13,192) zombie + (14,421) zombie + (15,568) zombie + (16,788) zombie + (16,904) zombie + (17,928) zombie + (18,103) zombie + (19,821) zombie + (19,886) zombie + (20,474) zombie + (21,479) zombie + (21,975) zombie + (22,569) zombie + (23,310) zombie + (24,905) zombie + ... + rmm_wrap_alloc 256 bytes +Not using cuda path. M_is_hypersparse: 0, A->iso: 0, B->iso: 0, A_BITMAP: 0, B_BITMAP: 0, GB_IS_FULL(A): 0, GB_IS_FULL(B): 0, semiring header size: 32 + rmm_wrap_alloc 16384 bytes + + 1024x1024 GraphBLAS int32_t matrix, sparse by row + sparsity control: sparse only + M actual, 1024 entries, memory: 28.2 KB + + (0,478) 0 + (0,574) 0 + (2,376) 1 + (5,560) 0 + (6,996) 0 + (7,183) 1 + (7,666) 1 + (8,896) 0 + (9,187) 0 + (10,446) 1 + (11,46) 1 + (11,955) 1 + (12,397) 0 + (12,953) 1 + (13,192) 1 + (14,421) 0 + (15,568) 1 + (16,788) 1 + (16,904) 1 + (17,928) 0 + (18,103) 1 + (19,821) 0 + (19,886) 0 + (20,474) 0 + (21,479) 1 + (21,975) 0 + (22,569) 1 + (23,310) 0 + (24,905) 1 + ... + rmm_wrap_alloc 16384 bytes + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes + + 1024x1024 GraphBLAS int32_t matrix, sparse by row + sparsity control: sparse only + C GPU, 23 entries, memory: 16.6 KB + + (42,324) 0 + (73,665) 1 + (106,652) 0 + (138,288) 1 + (242,124) 1 + (295,1) 0 + (300,554) 1 + (312,61) 0 + (344,384) 0 + (496,267) 0 + (587,254) 1 + (686,202) 0 + (708,925) 1 + (715,751) 0 + (729,884) 0 + (741,365) 1 + (751,348) 1 + (792,636) 0 + (857,151) 0 + (876,357) 0 + (940,414) 0 + (945,671) 0 + (968,400) 1 + + + 1024x1024 GraphBLAS int32_t matrix, hypersparse by row + C_actual, 23 entries, memory: 1.1 KB + + (42,324) 0 + (73,665) 1 + (106,652) 0 + (138,288) 1 + (242,124) 1 + (295,1) 0 + (300,554) 1 + (312,61) 0 + (344,384) 0 + (496,267) 0 + (587,254) 1 + (686,202) 0 + (708,925) 1 + (715,751) 0 + (729,884) 0 + (741,365) 1 + (751,348) 1 + (792,636) 0 + (857,151) 0 + (876,357) 0 + (940,414) 0 + (945,671) 0 + (968,400) 1 + + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes + + 1024x1024 GraphBLAS double matrix, hypersparse by row + Diff actual, 23 entries, memory: 1.2 KB + + (42,324) 0 + (73,665) 0 + (106,652) 0 + (138,288) 0 + (242,124) 0 + (295,1) 0 + (300,554) 0 + (312,61) 0 + (344,384) 0 + (496,267) 0 + (587,254) 0 + (686,202) 0 + (708,925) 0 + (715,751) 0 + (729,884) 0 + (741,365) 0 + (751,348) 0 + (792,636) 0 + (857,151) 0 + (876,357) 0 + (940,414) 0 + (945,671) 0 + (968,400) 0 + + + 1024x1024 GraphBLAS bool matrix, hypersparse by row + T actual, 23 entries, memory: 1.0 KB + + (42,324) 1 + (73,665) 1 + (106,652) 1 + (138,288) 1 + (242,124) 1 + (295,1) 1 + (300,554) 1 + (312,61) 1 + (344,384) 1 + (496,267) 1 + (587,254) 1 + (686,202) 1 + (708,925) 1 + (715,751) 1 + (729,884) 1 + (741,365) 1 + (751,348) 1 + (792,636) 1 + (857,151) 1 + (876,357) 1 + (940,414) 1 + (945,671) 1 + (968,400) 1 + work:23 gpus:0 Getting test data +Creating problem gen +filling matrices +inside fill, using seed 4567 +fill_random nrows=1024ncols=1024 need 1024 values, invsparse = 1024 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling sparse +1024 nonzeroes left to fill.. +35 nonzeroes left to fill.. +inside fill, using seed 4567 +fill_random nrows=1024ncols=1024 need 1024 values, invsparse = 1024 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling sparse +1024 nonzeroes left to fill.. +35 nonzeroes left to fill.. +inside fill, using seed 543210 +fill_random nrows=1024ncols=1024 need 2048 values, invsparse = 512 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling sparse +2048 nonzeroes left to fill.. +504 nonzeroes left to fill.. + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 16384 bytes +inside fill, using seed 32 +fill_random nrows=1024ncols=1024 need 4096 values, invsparse = 256 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling sparse +4096 nonzeroes left to fill.. +1491 nonzeroes left to fill.. + rmm_wrap_alloc 16384 bytes +1024 slots to fill +all pairs to bucket 7, no filling +done assigning buckets +Building semiring factgory + calling stringify semiring: 0x7f1ff542a200 +inside enumify: 0x7f1ff542a200 + + GraphBLAS Semiring: semiring (user-defined) + GraphBLAS Monoid: semiring->add (built-in) + GraphBLAS BinaryOp: monoid->op (built-in) z=plus(x,y) + GraphBLAS type: ztype int32_t size: 4 + GraphBLAS type: xtype int32_t size: 4 + GraphBLAS type: ytype int32_t size: 4 + identity: [ 0 ] + + GraphBLAS BinaryOp: semiring->multiply (built-in) z=times(x,y) + GraphBLAS type: ztype int32_t size: 4 + GraphBLAS type: xtype int32_t size: 4 + GraphBLAS type: ytype int32_t size: 4 +Getting semiring add +Getting semiring mult +Getting semiring add op +Getting types +Getting opcodes +Getting typecodes +Performing asserts +Invoking boolean rename +Invoking boolean rename +Invoking enumify binop +e 14 +Invoking enumify monoid +Calling enumify binop +Inside plus binop code +e 11 +Calling enumify identity +Calling enumify terminal +Done enumify monoid +Done invoking enumify monoid +atype +btype +ctype +Invoking enumify_mask, mtype 0x7f2028b56f40 +GB_enumify_mask gets mcode: 6 Mask_struct: 0 Mask_comp: 0 +got mask_ecode: 8 +constructing semiring scode +before: add_ecode: 11, id_ecode: 0, term_ecode: 31, mult_ecode: 14, flipxy: 0, zcode: 6, xcode: 6, ycode: 6, mask_ecode: 8, ccode: 6, acode: 6, bcode: 6, csparsity: 0, msparsity: 0, asparsity: 0, bsparsity: 0 +serialized_scode: 397409434378593792 +done enumify semiring +scode=397409434378593792 +done stringify semiring + returned from stringify semiring + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 8192 bytes +1024 slots to fill +all pairs to bucket 7, no filling +done assigning buckets +bucket 7 has 1024 dots to do +LAUNCHING BUCKET CODE: 7 +INside get cached file +looking for prog in file /home/cjnolet/.SuiteSparse/GraphBLAS/6.3.0/GB_semiring_397409434378593792.h +opening /home/cjnolet/.SuiteSparse/GraphBLAS/6.3.0/GB_semiring_397409434378593792.h for write +about to close + read cache file /home/cjnolet/.SuiteSparse/GraphBLAS/6.3.0/GB_semiring_397409434378593792.h +successful_read: 1 +Just closed + jit_cache get program GB_jit_AxB_dot3_phase3_vsvs +found memory-cached prog GB_jit_AxB_dot3_phase3_vsvs + got kernel instance AxB_dot3_phase3_vsvs_int32_t_int32_t_int32_t +found memory-cached prog AxB_dot3_phase3_vsvs_int32_t_int32_t_int32_t +Launching _Z20AxB_dot3_phase3_vsvsIiiiEvxxPxP16GB_Matrix_opaqueS2_S2_S2_i<<<2,512,0,0>>>(long,long,long*,GB_Matrix_opaque*,GB_Matrix_opaque*,GB_Matrix_opaque*,GB_Matrix_opaque*,int) +Printing bucketp +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +Done. +returned from kernel 0.776192ms + + 1024x1024 GraphBLAS int32_t matrix, sparse (jumbled) by row + sparsity control: sparse only + C GPU, 1024 entries, memory: 28.2 KB + pending tuples: 0 max pending: 0 zombies: 1010 + + (0,478) zombie + (0,574) zombie + (2,376) zombie + (5,560) zombie + (6,996) zombie + (7,183) zombie + (7,666) zombie + (8,896) zombie + (9,187) zombie + (10,446) zombie + (11,46) zombie + (11,955) zombie + (12,397) zombie + (12,953) zombie + (13,192) zombie + (14,421) zombie + (15,568) zombie + (16,788) zombie + (16,904) zombie + (17,928) zombie + (18,103) zombie + (19,821) zombie + (19,886) zombie + (20,474) zombie + (21,479) zombie + (21,975) zombie + (22,569) zombie + (23,310) zombie + (24,905) zombie + ... +Not using cuda path. M_is_hypersparse: 0, A->iso: 0, B->iso: 0, A_BITMAP: 0, B_BITMAP: 0, GB_IS_FULL(A): 0, GB_IS_FULL(B): 0, semiring header size: 32 + rmm_wrap_alloc 16384 bytes + rmm_wrap_alloc 16384 bytes + + 1024x1024 GraphBLAS int32_t matrix, sparse by row + sparsity control: sparse only + M actual, 1024 entries, memory: 28.2 KB + + (0,478) 0 + (0,574) 0 + (2,376) 1 + (5,560) 0 + (6,996) 0 + (7,183) 1 + (7,666) 1 + (8,896) 0 + (9,187) 0 + (10,446) 1 + (11,46) 1 + (11,955) 1 + (12,397) 0 + (12,953) 1 + (13,192) 1 + (14,421) 0 + (15,568) 1 + (16,788) 1 + (16,904) 1 + (17,928) 0 + (18,103) 1 + (19,821) 0 + (19,886) 0 + (20,474) 0 + (21,479) 1 + (21,975) 0 + (22,569) 1 + (23,310) 0 + (24,905) 1 + ... + rmm_wrap_alloc 16384 bytes + rmm_wrap_alloc 256 bytes + + 1024x1024 GraphBLAS int32_t matrix, sparse by row + sparsity control: sparse only + C GPU, 14 entries, memory: 16.4 KB + + (99,326) 0 + (115,240) 0 + (176,614) 0 + (180,830) 1 + (343,678) 0 + (398,246) 0 + (411,643) 0 + (557,910) 0 + (590,95) 0 + (601,478) 0 + (623,44) 0 + (729,884) 0 + (825,406) 1 + (891,679) 0 + + + 1024x1024 GraphBLAS int32_t matrix, hypersparse by row + C_actual, 14 entries, memory: 704 bytes + + (99,326) 0 + (115,240) 0 + (176,614) 0 + (180,830) 1 + (343,678) 0 + (398,246) 0 + (411,643) 0 + (557,910) 0 + (590,95) 0 + (601,478) 0 + (623,44) 0 + (729,884) 0 + (825,406) 1 + (891,679) 0 + + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes + + 1024x1024 GraphBLAS double matrix, hypersparse by row + Diff actual, 14 entries, memory: 768 bytes + + (99,326) 0 + (115,240) 0 + (176,614) 0 + (180,830) 0 + (343,678) 0 + (398,246) 0 + (411,643) 0 + (557,910) 0 + (590,95) 0 + (601,478) 0 + (623,44) 0 + (729,884) 0 + (825,406) 0 + (891,679) 0 + + + 1024x1024 GraphBLAS bool matrix, hypersparse by row + T actual, 14 entries, memory: 656 bytes + + (99,326) 1 + (115,240) 1 + (176,614) 1 + (180,830) 1 + (343,678) 1 + (398,246) 1 + (411,643) 1 + (557,910) 1 + (590,95) 1 + (601,478) 1 + (623,44) 1 + (729,884) 1 + (825,406) 1 + (891,679) 1 + work:14 gpus:0 Getting test data +Creating problem gen +filling matrices +inside fill, using seed 4567 +fill_random nrows=1024ncols=1024 need 1024 values, invsparse = 1024 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling sparse +1024 nonzeroes left to fill.. +35 nonzeroes left to fill.. +inside fill, using seed 4567 +fill_random nrows=1024ncols=1024 need 1024 values, invsparse = 1024 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling sparse +1024 nonzeroes left to fill.. +35 nonzeroes left to fill.. +inside fill, using seed 543210 +fill_random nrows=1024ncols=1024 need 2048 values, invsparse = 512 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling sparse +2048 nonzeroes left to fill.. +504 nonzeroes left to fill.. + rmm_wrap_alloc 256 bytes +inside fill, using seed 32 +fill_random nrows=1024ncols=1024 need 4096 values, invsparse = 256 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling sparse +4096 nonzeroes left to fill.. +1491 nonzeroes left to fill.. + rmm_wrap_alloc 16384 bytes +1024 slots to fill +all pairs to bucket 8, no filling +done assigning buckets +Building semiring factgory + calling stringify semiring: 0x7f1ff5447700 +inside enumify: 0x7f1ff5447700 + + GraphBLAS Semiring: semiring (user-defined) + GraphBLAS Monoid: semiring->add (built-in) + GraphBLAS BinaryOp: monoid->op (built-in) z=plus(x,y) + GraphBLAS type: ztype int32_t size: 4 + GraphBLAS type: xtype int32_t size: 4 + GraphBLAS type: ytype int32_t size: 4 + identity: [ 0 ] + + GraphBLAS BinaryOp: semiring->multiply (built-in) z=times(x,y) + GraphBLAS type: ztype int32_t size: 4 + GraphBLAS type: xtype int32_t size: 4 + GraphBLAS type: ytype int32_t size: 4 +Getting semiring add +Getting semiring mult +Getting semiring add op +Getting types +Getting opcodes +Getting typecodes +Performing asserts +Invoking boolean rename +Invoking boolean rename +Invoking enumify binop +e 14 +Invoking enumify monoid +Calling enumify binop +Inside plus binop code +e 11 +Calling enumify identity +Calling enumify terminal +Done enumify monoid +Done invoking enumify monoid +atype +btype +ctype +Invoking enumify_mask, mtype 0x7f2028b56f40 +GB_enumify_mask gets mcode: 6 Mask_struct: 0 Mask_comp: 0 +got mask_ecode: 8 +constructing semiring scode +before: add_ecode: 11, id_ecode: 0, term_ecode: 31, mult_ecode: 14, flipxy: 0, zcode: 6, xcode: 6, ycode: 6, mask_ecode: 8, ccode: 6, acode: 6, bcode: 6, csparsity: 0, msparsity: 0, asparsity: 0, bsparsity: 0 +serialized_scode: 397409434378593792 +done enumify semiring +scode=397409434378593792 +done stringify semiring + returned from stringify semiring + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 8192 bytes +1024 slots to fill +all pairs to bucket 8, no filling +done assigning buckets +bucket 8 has 1024 dots to do +LAUNCHING BUCKET CODE: 8 +INside get cached file +looking for prog in file /home/cjnolet/.SuiteSparse/GraphBLAS/6.3.0/GB_semiring_397409434378593792.h +opening /home/cjnolet/.SuiteSparse/GraphBLAS/6.3.0/GB_semiring_397409434378593792.h for write +about to close + read cache file /home/cjnolet/.SuiteSparse/GraphBLAS/6.3.0/GB_semiring_397409434378593792.h +successful_read: 1 +Just closed + jit_cache get program GB_jit_AxB_dot3_phase3_vsvs +found memory-cached prog GB_jit_AxB_dot3_phase3_vsvs + got kernel instance AxB_dot3_phase3_vsvs_int32_t_int32_t_int32_t +found memory-cached prog AxB_dot3_phase3_vsvs_int32_t_int32_t_int32_t +Launching _Z20AxB_dot3_phase3_vsvsIiiiEvxxPxP16GB_Matrix_opaqueS2_S2_S2_i<<<2,512,0,0>>>(long,long,long*,GB_Matrix_opaque*,GB_Matrix_opaque*,GB_Matrix_opaque*,GB_Matrix_opaque*,int) +Printing bucketp +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +Done. +returned from kernel 0.867296ms + + 1024x1024 GraphBLAS int32_t matrix, sparse (jumbled) by row + sparsity control: sparse only + C GPU, 1024 entries, memory: 28.2 KB + pending tuples: 0 max pending: 0 zombies: 1010 + + (0,478) zombie + (0,574) zombie + (2,376) zombie + (5,560) zombie + (6,996) zombie + (7,183) zombie + (7,666) zombie + (8,896) zombie + (9,187) zombie + (10,446) zombie + (11,46) zombie + (11,955) zombie + (12,397) zombie + (12,953) zombie + (13,192) zombie + (14,421) zombie + (15,568) zombie + (16,788) zombie + (16,904) zombie + (17,928) zombie + (18,103) zombie + (19,821) zombie + (19,886) zombie + (20,474) zombie + (21,479) zombie + (21,975) zombie + (22,569) zombie + (23,310) zombie + (24,905) zombie + ... + rmm_wrap_alloc 256 bytes +Not using cuda path. M_is_hypersparse: 0, A->iso: 0, B->iso: 0, A_BITMAP: 0, B_BITMAP: 0, GB_IS_FULL(A): 0, GB_IS_FULL(B): 0, semiring header size: 32 + rmm_wrap_alloc 16384 bytes + rmm_wrap_alloc 16384 bytes + + 1024x1024 GraphBLAS int32_t matrix, sparse by row + sparsity control: sparse only + M actual, 1024 entries, memory: 28.2 KB + + (0,478) 0 + (0,574) 0 + (2,376) 1 + (5,560) 0 + (6,996) 0 + (7,183) 1 + (7,666) 1 + (8,896) 0 + (9,187) 0 + (10,446) 1 + (11,46) 1 + (11,955) 1 + (12,397) 0 + (12,953) 1 + (13,192) 1 + (14,421) 0 + (15,568) 1 + (16,788) 1 + (16,904) 1 + (17,928) 0 + (18,103) 1 + (19,821) 0 + (19,886) 0 + (20,474) 0 + (21,479) 1 + (21,975) 0 + (22,569) 1 + (23,310) 0 + (24,905) 1 + ... + rmm_wrap_alloc 16384 bytes + + 1024x1024 GraphBLAS int32_t matrix, sparse by row + sparsity control: sparse only + C GPU, 14 entries, memory: 16.4 KB + + (99,326) 0 + (115,240) 0 + (176,614) 0 + (180,830) 1 + (343,678) 0 + (398,246) 0 + (411,643) 0 + (557,910) 0 + (590,95) 0 + (601,478) 0 + (623,44) 0 + (729,884) 0 + (825,406) 1 + (891,679) 0 + + + 1024x1024 GraphBLAS int32_t matrix, hypersparse by row + C_actual, 14 entries, memory: 704 bytes + + (99,326) 0 + (115,240) 0 + (176,614) 0 + (180,830) 1 + (343,678) 0 + (398,246) 0 + (411,643) 0 + (557,910) 0 + (590,95) 0 + (601,478) 0 + (623,44) 0 + (729,884) 0 + (825,406) 1 + (891,679) 0 + + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes + + 1024x1024 GraphBLAS double matrix, hypersparse by row + Diff actual, 14 entries, memory: 768 bytes + + (99,326) 0 + (115,240) 0 + (176,614) 0 + (180,830) 0 + (343,678) 0 + (398,246) 0 + (411,643) 0 + (557,910) 0 + (590,95) 0 + (601,478) 0 + (623,44) 0 + (729,884) 0 + (825,406) 0 + (891,679) 0 + + + 1024x1024 GraphBLAS bool matrix, hypersparse by row + T actual, 14 entries, memory: 656 bytes + + (99,326) 1 + (115,240) 1 + (176,614) 1 + (180,830) 1 + (343,678) 1 + (398,246) 1 + (411,643) 1 + (557,910) 1 + (590,95) 1 + (601,478) 1 + (623,44) 1 + (729,884) 1 + (825,406) 1 + (891,679) 1 + work:14 gpus:0 Getting test data +Creating problem gen +filling matrices +inside fill, using seed 4567 +fill_random nrows=1024ncols=1024 need 1024 values, invsparse = 1024 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling sparse +1024 nonzeroes left to fill.. +35 nonzeroes left to fill.. +inside fill, using seed 4567 +fill_random nrows=1024ncols=1024 need 1024 values, invsparse = 1024 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling sparse +1024 nonzeroes left to fill.. +35 nonzeroes left to fill.. +inside fill, using seed 543210 +fill_random nrows=1024ncols=1024 need 2048 values, invsparse = 512 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling sparse +2048 nonzeroes left to fill.. +504 nonzeroes left to fill.. + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes +inside fill, using seed 32 +fill_random nrows=1024ncols=1024 need 4096 values, invsparse = 256 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling sparse +4096 nonzeroes left to fill.. +1491 nonzeroes left to fill.. + rmm_wrap_alloc 16384 bytes + rmm_wrap_alloc 256 bytes +1024 slots to fill +all pairs to bucket 9, no filling +done assigning buckets +Building semiring factgory + calling stringify semiring: 0x7f1ff5447800 +inside enumify: 0x7f1ff5447800 + + GraphBLAS Semiring: semiring (user-defined) + GraphBLAS Monoid: semiring->add (built-in) + GraphBLAS BinaryOp: monoid->op (built-in) z=plus(x,y) + GraphBLAS type: ztype int32_t size: 4 + GraphBLAS type: xtype int32_t size: 4 + GraphBLAS type: ytype int32_t size: 4 + identity: [ 0 ] + + GraphBLAS BinaryOp: semiring->multiply (built-in) z=times(x,y) + GraphBLAS type: ztype int32_t size: 4 + GraphBLAS type: xtype int32_t size: 4 + GraphBLAS type: ytype int32_t size: 4 +Getting semiring add +Getting semiring mult +Getting semiring add op +Getting types +Getting opcodes +Getting typecodes +Performing asserts +Invoking boolean rename +Invoking boolean rename +Invoking enumify binop +e 14 +Invoking enumify monoid +Calling enumify binop +Inside plus binop code +e 11 +Calling enumify identity +Calling enumify terminal +Done enumify monoid +Done invoking enumify monoid +atype +btype +ctype +Invoking enumify_mask, mtype 0x7f2028b56f40 +GB_enumify_mask gets mcode: 6 Mask_struct: 0 Mask_comp: 0 +got mask_ecode: 8 +constructing semiring scode +before: add_ecode: 11, id_ecode: 0, term_ecode: 31, mult_ecode: 14, flipxy: 0, zcode: 6, xcode: 6, ycode: 6, mask_ecode: 8, ccode: 6, acode: 6, bcode: 6, csparsity: 0, msparsity: 0, asparsity: 0, bsparsity: 0 +serialized_scode: 397409434378593792 +done enumify semiring +scode=397409434378593792 +done stringify semiring + returned from stringify semiring + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 8192 bytes +1024 slots to fill +all pairs to bucket 9, no filling +done assigning buckets +bucket 9 has 1024 dots to do +LAUNCHING BUCKET CODE: 9 +INside get cached file +looking for prog in file /home/cjnolet/.SuiteSparse/GraphBLAS/6.3.0/GB_semiring_397409434378593792.h +opening /home/cjnolet/.SuiteSparse/GraphBLAS/6.3.0/GB_semiring_397409434378593792.h for write +about to close + read cache file /home/cjnolet/.SuiteSparse/GraphBLAS/6.3.0/GB_semiring_397409434378593792.h +successful_read: 1 +Just closed + jit_cache get program GB_jit_AxB_dot3_phase3_vsvs +found memory-cached prog GB_jit_AxB_dot3_phase3_vsvs + got kernel instance AxB_dot3_phase3_vsvs_int32_t_int32_t_int32_t +found memory-cached prog AxB_dot3_phase3_vsvs_int32_t_int32_t_int32_t +Launching _Z20AxB_dot3_phase3_vsvsIiiiEvxxPxP16GB_Matrix_opaqueS2_S2_S2_i<<<2,512,0,0>>>(long,long,long*,GB_Matrix_opaque*,GB_Matrix_opaque*,GB_Matrix_opaque*,GB_Matrix_opaque*,int) +Printing bucketp +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +Done. +returned from kernel 0.913408ms + + 1024x1024 GraphBLAS int32_t matrix, sparse (jumbled) by row + sparsity control: sparse only + C GPU, 1024 entries, memory: 28.2 KB + pending tuples: 0 max pending: 0 zombies: 1010 + + (0,478) zombie + (0,574) zombie + (2,376) zombie + (5,560) zombie + (6,996) zombie + (7,183) zombie + (7,666) zombie + (8,896) zombie + (9,187) zombie + (10,446) zombie + (11,46) zombie + (11,955) zombie + (12,397) zombie + (12,953) zombie + (13,192) zombie + (14,421) zombie + (15,568) zombie + (16,788) zombie + (16,904) zombie + (17,928) zombie + (18,103) zombie + (19,821) zombie + (19,886) zombie + (20,474) zombie + (21,479) zombie + (21,975) zombie + (22,569) zombie + (23,310) zombie + (24,905) zombie + ... + rmm_wrap_alloc 256 bytes +Not using cuda path. M_is_hypersparse: 0, A->iso: 0, B->iso: 0, A_BITMAP: 0, B_BITMAP: 0, GB_IS_FULL(A): 0, GB_IS_FULL(B): 0, semiring header size: 32 + rmm_wrap_alloc 16384 bytes + rmm_wrap_alloc 16384 bytes + + 1024x1024 GraphBLAS int32_t matrix, sparse by row + sparsity control: sparse only + M actual, 1024 entries, memory: 28.2 KB + + (0,478) 0 + (0,574) 0 + (2,376) 1 + (5,560) 0 + (6,996) 0 + (7,183) 1 + (7,666) 1 + (8,896) 0 + (9,187) 0 + (10,446) 1 + (11,46) 1 + (11,955) 1 + (12,397) 0 + (12,953) 1 + (13,192) 1 + (14,421) 0 + (15,568) 1 + (16,788) 1 + (16,904) 1 + (17,928) 0 + (18,103) 1 + (19,821) 0 + (19,886) 0 + (20,474) 0 + (21,479) 1 + (21,975) 0 + (22,569) 1 + (23,310) 0 + (24,905) 1 + ... + rmm_wrap_alloc 16384 bytes + + 1024x1024 GraphBLAS int32_t matrix, sparse by row + sparsity control: sparse only + C GPU, 14 entries, memory: 16.4 KB + + (99,326) 0 + (115,240) 0 + (176,614) 0 + (180,830) 1 + (343,678) 0 + (398,246) 0 + (411,643) 0 + (557,910) 0 + (590,95) 0 + (601,478) 0 + (623,44) 0 + (729,884) 0 + (825,406) 1 + (891,679) 0 + + + 1024x1024 GraphBLAS int32_t matrix, hypersparse by row + C_actual, 14 entries, memory: 704 bytes + + (99,326) 0 + (115,240) 0 + (176,614) 0 + (180,830) 1 + (343,678) 0 + (398,246) 0 + (411,643) 0 + (557,910) 0 + (590,95) 0 + (601,478) 0 + (623,44) 0 + (729,884) 0 + (825,406) 1 + (891,679) 0 + + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes + + 1024x1024 GraphBLAS double matrix, hypersparse by row + Diff actual, 14 entries, memory: 768 bytes + + (99,326) 0 + (115,240) 0 + (176,614) 0 + (180,830) 0 + (343,678) 0 + (398,246) 0 + (411,643) 0 + (557,910) 0 + (590,95) 0 + (601,478) 0 + (623,44) 0 + (729,884) 0 + (825,406) 0 + (891,679) 0 + + + 1024x1024 GraphBLAS bool matrix, hypersparse by row + T actual, 14 entries, memory: 656 bytes + + (99,326) 1 + (115,240) 1 + (176,614) 1 + (180,830) 1 + (343,678) 1 + (398,246) 1 + (411,643) 1 + (557,910) 1 + (590,95) 1 + (601,478) 1 + (623,44) 1 + (729,884) 1 + (825,406) 1 + (891,679) 1 + work:14 gpus:0 Getting test data +Creating problem gen +filling matrices +inside fill, using seed 4567 +fill_random nrows=1024ncols=1024 need 1024 values, invsparse = 1024 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling sparse +1024 nonzeroes left to fill.. +35 nonzeroes left to fill.. +inside fill, using seed 4567 +fill_random nrows=1024ncols=1024 need 1024 values, invsparse = 1024 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling sparse +1024 nonzeroes left to fill.. +35 nonzeroes left to fill.. +inside fill, using seed 543210 +fill_random nrows=1024ncols=1024 need 2048 values, invsparse = 512 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling sparse +2048 nonzeroes left to fill.. +504 nonzeroes left to fill.. + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes +inside fill, using seed 32 +fill_random nrows=1024ncols=1024 need 4096 values, invsparse = 256 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling sparse +4096 nonzeroes left to fill.. +1491 nonzeroes left to fill.. + rmm_wrap_alloc 16384 bytes + rmm_wrap_alloc 256 bytes +1024 slots to fill +all pairs to bucket 10, no filling +done assigning buckets +Building semiring factgory + calling stringify semiring: 0x7f1ff542a400 +inside enumify: 0x7f1ff542a400 + + GraphBLAS Semiring: semiring (user-defined) + GraphBLAS Monoid: semiring->add (built-in) + GraphBLAS BinaryOp: monoid->op (built-in) z=plus(x,y) + GraphBLAS type: ztype int32_t size: 4 + GraphBLAS type: xtype int32_t size: 4 + GraphBLAS type: ytype int32_t size: 4 + identity: [ 0 ] + + GraphBLAS BinaryOp: semiring->multiply (built-in) z=times(x,y) + GraphBLAS type: ztype int32_t size: 4 + GraphBLAS type: xtype int32_t size: 4 + GraphBLAS type: ytype int32_t size: 4 +Getting semiring add +Getting semiring mult +Getting semiring add op +Getting types +Getting opcodes +Getting typecodes +Performing asserts +Invoking boolean rename +Invoking boolean rename +Invoking enumify binop +e 14 +Invoking enumify monoid +Calling enumify binop +Inside plus binop code +e 11 +Calling enumify identity +Calling enumify terminal +Done enumify monoid +Done invoking enumify monoid +atype +btype +ctype +Invoking enumify_mask, mtype 0x7f2028b56f40 +GB_enumify_mask gets mcode: 6 Mask_struct: 0 Mask_comp: 0 +got mask_ecode: 8 +constructing semiring scode +before: add_ecode: 11, id_ecode: 0, term_ecode: 31, mult_ecode: 14, flipxy: 0, zcode: 6, xcode: 6, ycode: 6, mask_ecode: 8, ccode: 6, acode: 6, bcode: 6, csparsity: 0, msparsity: 0, asparsity: 0, bsparsity: 0 +serialized_scode: 397409434378593792 +done enumify semiring +scode=397409434378593792 +done stringify semiring + returned from stringify semiring + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 8192 bytes +1024 slots to fill +all pairs to bucket 10, no filling +done assigning buckets +bucket 10 has 1024 dots to do +LAUNCHING BUCKET CODE: 10 +INside get cached file +looking for prog in file /home/cjnolet/.SuiteSparse/GraphBLAS/6.3.0/GB_semiring_397409434378593792.h +opening /home/cjnolet/.SuiteSparse/GraphBLAS/6.3.0/GB_semiring_397409434378593792.h for write +about to close + read cache file /home/cjnolet/.SuiteSparse/GraphBLAS/6.3.0/GB_semiring_397409434378593792.h +successful_read: 1 +Just closed + jit_cache get program GB_jit_AxB_dot3_phase3_vsvs +found memory-cached prog GB_jit_AxB_dot3_phase3_vsvs + got kernel instance AxB_dot3_phase3_vsvs_int32_t_int32_t_int32_t +found memory-cached prog AxB_dot3_phase3_vsvs_int32_t_int32_t_int32_t +Launching _Z20AxB_dot3_phase3_vsvsIiiiEvxxPxP16GB_Matrix_opaqueS2_S2_S2_i<<<2,512,0,0>>>(long,long,long*,GB_Matrix_opaque*,GB_Matrix_opaque*,GB_Matrix_opaque*,GB_Matrix_opaque*,int) +Printing bucketp +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +Done. +returned from kernel 0.871424ms + + 1024x1024 GraphBLAS int32_t matrix, sparse (jumbled) by row + sparsity control: sparse only + C GPU, 1024 entries, memory: 28.2 KB + pending tuples: 0 max pending: 0 zombies: 1010 + + (0,478) zombie + (0,574) zombie + (2,376) zombie + (5,560) zombie + (6,996) zombie + (7,183) zombie + (7,666) zombie + (8,896) zombie + (9,187) zombie + (10,446) zombie + (11,46) zombie + (11,955) zombie + (12,397) zombie + (12,953) zombie + (13,192) zombie + (14,421) zombie + (15,568) zombie + (16,788) zombie + (16,904) zombie + (17,928) zombie + (18,103) zombie + (19,821) zombie + (19,886) zombie + (20,474) zombie + (21,479) zombie + (21,975) zombie + (22,569) zombie + (23,310) zombie + (24,905) zombie + ... + rmm_wrap_alloc 256 bytes +Not using cuda path. M_is_hypersparse: 0, A->iso: 0, B->iso: 0, A_BITMAP: 0, B_BITMAP: 0, GB_IS_FULL(A): 0, GB_IS_FULL(B): 0, semiring header size: 32 + rmm_wrap_alloc 16384 bytes + rmm_wrap_alloc 16384 bytes + + 1024x1024 GraphBLAS int32_t matrix, sparse by row + sparsity control: sparse only + M actual, 1024 entries, memory: 28.2 KB + + (0,478) 0 + (0,574) 0 + (2,376) 1 + (5,560) 0 + (6,996) 0 + (7,183) 1 + (7,666) 1 + (8,896) 0 + (9,187) 0 + (10,446) 1 + (11,46) 1 + (11,955) 1 + (12,397) 0 + (12,953) 1 + (13,192) 1 + (14,421) 0 + (15,568) 1 + (16,788) 1 + (16,904) 1 + (17,928) 0 + (18,103) 1 + (19,821) 0 + (19,886) 0 + (20,474) 0 + (21,479) 1 + (21,975) 0 + (22,569) 1 + (23,310) 0 + (24,905) 1 + ... + rmm_wrap_alloc 16384 bytes + + 1024x1024 GraphBLAS int32_t matrix, sparse by row + sparsity control: sparse only + C GPU, 14 entries, memory: 16.4 KB + + (99,326) 0 + (115,240) 0 + (176,614) 0 + (180,830) 1 + (343,678) 0 + (398,246) 0 + (411,643) 0 + (557,910) 0 + (590,95) 0 + (601,478) 0 + (623,44) 0 + (729,884) 0 + (825,406) 1 + (891,679) 0 + + + 1024x1024 GraphBLAS int32_t matrix, hypersparse by row + C_actual, 14 entries, memory: 704 bytes + + (99,326) 0 + (115,240) 0 + (176,614) 0 + (180,830) 1 + (343,678) 0 + (398,246) 0 + (411,643) 0 + (557,910) 0 + (590,95) 0 + (601,478) 0 + (623,44) 0 + (729,884) 0 + (825,406) 1 + (891,679) 0 + + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes + + 1024x1024 GraphBLAS double matrix, hypersparse by row + Diff actual, 14 entries, memory: 768 bytes + + (99,326) 0 + (115,240) 0 + (176,614) 0 + (180,830) 0 + (343,678) 0 + (398,246) 0 + (411,643) 0 + (557,910) 0 + (590,95) 0 + (601,478) 0 + (623,44) 0 + (729,884) 0 + (825,406) 0 + (891,679) 0 + + + 1024x1024 GraphBLAS bool matrix, hypersparse by row + T actual, 14 entries, memory: 656 bytes + + (99,326) 1 + (115,240) 1 + (176,614) 1 + (180,830) 1 + (343,678) 1 + (398,246) 1 + (411,643) 1 + (557,910) 1 + (590,95) 1 + (601,478) 1 + (623,44) 1 + (729,884) 1 + (825,406) 1 + (891,679) 1 + work:14 gpus:0 Getting test data +Creating problem gen +filling matrices +inside fill, using seed 4567 +fill_random nrows=1024ncols=1024 need 1024 values, invsparse = 1024 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling sparse +1024 nonzeroes left to fill.. +35 nonzeroes left to fill.. +inside fill, using seed 4567 +fill_random nrows=1024ncols=1024 need 1024 values, invsparse = 1024 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling sparse +1024 nonzeroes left to fill.. +35 nonzeroes left to fill.. +inside fill, using seed 543210 +fill_random nrows=1024ncols=1024 need 5120 values, invsparse = 205 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling sparse +5120 nonzeroes left to fill.. +2091 nonzeroes left to fill.. + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes +inside fill, using seed 32 +fill_random nrows=1024ncols=1024 need 2048 values, invsparse = 512 +fill_random after alloc values +vdim ready +vlen ready +ready to fill p +filling sparse +2048 nonzeroes left to fill.. +569 nonzeroes left to fill.. + rmm_wrap_alloc 16384 bytes + rmm_wrap_alloc 256 bytes +1024 slots to fill +all pairs to bucket 11, no filling +done assigning buckets +Building semiring factgory + calling stringify semiring: 0x7f1ff542b400 +inside enumify: 0x7f1ff542b400 + + GraphBLAS Semiring: semiring (user-defined) + GraphBLAS Monoid: semiring->add (built-in) + GraphBLAS BinaryOp: monoid->op (built-in) z=plus(x,y) + GraphBLAS type: ztype int32_t size: 4 + GraphBLAS type: xtype int32_t size: 4 + GraphBLAS type: ytype int32_t size: 4 + identity: [ 0 ] + + GraphBLAS BinaryOp: semiring->multiply (built-in) z=times(x,y) + GraphBLAS type: ztype int32_t size: 4 + GraphBLAS type: xtype int32_t size: 4 + GraphBLAS type: ytype int32_t size: 4 +Getting semiring add +Getting semiring mult +Getting semiring add op +Getting types +Getting opcodes +Getting typecodes +Performing asserts +Invoking boolean rename +Invoking boolean rename +Invoking enumify binop +e 14 +Invoking enumify monoid +Calling enumify binop +Inside plus binop code +e 11 +Calling enumify identity +Calling enumify terminal +Done enumify monoid +Done invoking enumify monoid +atype +btype +ctype +Invoking enumify_mask, mtype 0x7f2028b56f40 +GB_enumify_mask gets mcode: 6 Mask_struct: 0 Mask_comp: 0 +got mask_ecode: 8 +constructing semiring scode +before: add_ecode: 11, id_ecode: 0, term_ecode: 31, mult_ecode: 14, flipxy: 0, zcode: 6, xcode: 6, ycode: 6, mask_ecode: 8, ccode: 6, acode: 6, bcode: 6, csparsity: 0, msparsity: 0, asparsity: 0, bsparsity: 0 +serialized_scode: 397409434378593792 +done enumify semiring +scode=397409434378593792 +done stringify semiring + returned from stringify semiring + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 8192 bytes +1024 slots to fill +all pairs to bucket 11, no filling +done assigning buckets +bucket 11 has 1024 dots to do +LAUNCHING BUCKET CODE: 11 +INside get cached file +looking for prog in file /home/cjnolet/.SuiteSparse/GraphBLAS/6.3.0/GB_semiring_397409434378593792.h +opening /home/cjnolet/.SuiteSparse/GraphBLAS/6.3.0/GB_semiring_397409434378593792.h for write +about to close + read cache file /home/cjnolet/.SuiteSparse/GraphBLAS/6.3.0/GB_semiring_397409434378593792.h +successful_read: 1 +Just closed + jit_cache get program GB_jit_AxB_dot3_phase3_mp +found memory-cached prog GB_jit_AxB_dot3_phase3_mp + got kernel instance AxB_dot3_phase3_mp_int32_t_int32_t_int32_t +found memory-cached prog AxB_dot3_phase3_mp_int32_t_int32_t_int32_t +Launching _Z18AxB_dot3_phase3_mpIiiiEvxxPxP16GB_Matrix_opaqueS2_S2_S2_i<<<32,32,0,0>>>(long,long,long*,GB_Matrix_opaque*,GB_Matrix_opaque*,GB_Matrix_opaque*,GB_Matrix_opaque*,int) +warp 22 zombie count = 31, nzombies = 0 +warp 17 zombie count = 32, nzombies = 0 +warp 11 zombie count = 32, nzombies = 0 +warp 1 zombie count = 32, nzombies = 0 +warp 3 zombie count = 32, nzombies = 0 +warp 21 zombie count = 32, nzombies = 0 +warp 27 zombie count = 32, nzombies = 0 +warp 9 zombie count = 31, nzombies = 0 +warp 15 zombie count = 32, nzombies = 0 +warp 5 zombie count = 32, nzombies = 0 +warp 30 zombie count = 32, nzombies = 0 +warp 6 zombie count = 32, nzombies = 0 +warp 24 zombie count = 32, nzombies = 0 +warp 13 zombie count = 31, nzombies = 0 +warp 7 zombie count = 31, nzombies = 0 +warp 20 zombie count = 32, nzombies = 0 +warp 14 zombie count = 32, nzombies = 0 +warp 2 zombie count = 32, nzombies = 0 +warp 19 zombie count = 32, nzombies = 0 +warp 25 zombie count = 32, nzombies = 0 +warp 31 zombie count = 32, nzombies = 0 +warp 16 zombie count = 32, nzombies = 0 +warp 26 zombie count = 31, nzombies = 0 +warp 4 zombie count = 31, nzombies = 0 +warp 29 zombie count = 32, nzombies = 0 +warp 12 zombie count = 32, nzombies = 0 +warp 18 zombie count = 32, nzombies = 0 +warp 10 zombie count = 32, nzombies = 0 +warp 28 zombie count = 31, nzombies = 0 +warp 23 zombie count = 32, nzombies = 0 +warp 0 zombie count = 32, nzombies = 0 + Czombie = 64 + Czombie = 95 + Czombie = 127 + Czombie = 127 + Czombie = 191 + Czombie = 254 + Czombie = 254 + Czombie = 254 + Czombie = 349 + Czombie = 349 + Czombie = 349 + Czombie = 381 + Czombie = 444 + Czombie = 444 + Czombie = 540 + Czombie = 540 + Czombie = 540 + Czombie = 636 + Czombie = 636 + Czombie = 636 + Czombie = 668 + Czombie = 731 + Czombie = 731 +warp 8 zombie count = 31, nzombies = 668 + Czombie = 762 + Czombie = 857 + Czombie = 857 + Czombie = 857 + Czombie = 953 + Czombie = 953 + Czombie = 953 + Czombie = 985 + Czombie = 1016 +Printing bucketp +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +Done. +returned from kernel 1.59642ms + + 1024x1024 GraphBLAS int32_t matrix, sparse (jumbled) by row + sparsity control: sparse only + C GPU, 1024 entries, memory: 28.2 KB + pending tuples: 0 max pending: 0 zombies: 1016 + + (0,478) zombie + (0,574) zombie + (2,376) zombie + (5,560) zombie + (6,996) zombie + (7,183) zombie + (7,666) zombie + (8,896) zombie + (9,187) zombie + (10,446) zombie + (11,46) zombie + (11,955) zombie + (12,397) zombie + (12,953) zombie + (13,192) zombie + (14,421) zombie + (15,568) zombie + (16,788) zombie + (16,904) zombie + (17,928) zombie + (18,103) zombie + (19,821) zombie + (19,886) zombie + (20,474) zombie + (21,479) zombie + (21,975) zombie + (22,569) zombie + (23,310) zombie + (24,905) zombie + ... + rmm_wrap_alloc 256 bytes +Not using cuda path. M_is_hypersparse: 0, A->iso: 0, B->iso: 0, A_BITMAP: 0, B_BITMAP: 0, GB_IS_FULL(A): 0, GB_IS_FULL(B): 0, semiring header size: 32 + rmm_wrap_alloc 16384 bytes + + 1024x1024 GraphBLAS int32_t matrix, sparse by row + sparsity control: sparse only + M actual, 1024 entries, memory: 28.2 KB + + (0,478) 0 + (0,574) 0 + (2,376) 1 + (5,560) 0 + (6,996) 0 + (7,183) 1 + (7,666) 1 + (8,896) 0 + (9,187) 0 + (10,446) 1 + (11,46) 1 + (11,955) 1 + (12,397) 0 + (12,953) 1 + (13,192) 1 + (14,421) 0 + (15,568) 1 + (16,788) 1 + (16,904) 1 + (17,928) 0 + (18,103) 1 + (19,821) 0 + (19,886) 0 + (20,474) 0 + (21,479) 1 + (21,975) 0 + (22,569) 1 + (23,310) 0 + (24,905) 1 + ... + rmm_wrap_alloc 16384 bytes + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes + + 1024x1024 GraphBLAS int32_t matrix, sparse by row + sparsity control: sparse only + C GPU, 8 entries, memory: 16.3 KB + + (235,522) 1 + (309,328) 1 + (417,366) 0 + (565,490) 0 + (611,759) 0 + (714,475) 1 + (766,915) 0 + (877,722) 0 + + + 1024x1024 GraphBLAS int32_t matrix, hypersparse by row + C_actual, 8 entries, memory: 544 bytes + + (235,522) 1 + (309,328) 1 + (417,366) 0 + (565,490) 0 + (611,759) 0 + (714,475) 1 + (766,915) 0 + (877,722) 0 + + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes + + 1024x1024 GraphBLAS double matrix, hypersparse by row + Diff actual, 8 entries, memory: 576 bytes + + (235,522) 0 + (309,328) 0 + (417,366) 0 + (565,490) 0 + (611,759) 0 + (714,475) 0 + (766,915) 0 + (877,722) 0 + + + 1024x1024 GraphBLAS bool matrix, hypersparse by row + T actual, 8 entries, memory: 520 bytes + + (235,522) 1 + (309,328) 1 + (417,366) 1 + (565,490) 1 + (611,759) 1 + (714,475) 1 + (766,915) 1 + (877,722) 1 + work:8 gpus:0 [ OK ] AxB_dot3_tests_PLUS_TIMES_3.smallxsmallPLUS_TIMES_Cint32_tMint32_tAint32_tBint32_tXint32_tYint32_tZint32_t (480 ms) +[----------] 4 tests from AxB_dot3_tests_PLUS_TIMES_3 (3226 ms total) + +[----------] 4 tests from AxB_dot3_tests_PLUS_TIMES_4 +[ RUN ] AxB_dot3_tests_PLUS_TIMES_4.tinyxtinyPLUS_TIMES_Cint32_tMboolAint32_tBint32_tXint32_tYint32_tZint32_t + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes + + 32x32 GraphBLAS int32_t matrix, sparse by row + sparsity control: sparse only + my mat, 32 entries, memory: 1.5 KB + + (0,1) 0 + (1,1) 1 + (2,1) 2 + (3,1) 3 + (4,1) 4 + (5,1) 5 + (6,1) 6 + (7,1) 7 + (8,1) 8 + (9,1) 9 + (10,1) 10 + (11,1) 11 + (12,1) 12 + (13,1) 13 + (14,1) 14 + (15,1) 15 + (16,1) 16 + (17,1) 17 + (18,1) 18 + (19,1) 19 + (20,1) 20 + (21,1) 21 + (22,1) 22 + (23,1) 23 + (24,1) 24 + (25,1) 25 + (26,1) 26 + (27,1) 27 + (28,1) 28 + ... + + 32x32 GraphBLAS int32_t matrix, sparse by row + sparsity control: sparse only + A, 32 entries, memory: 1.5 KB + + (0,1) 0 + (1,1) 1 + (2,1) 2 + (3,1) 3 + (4,1) 4 + (5,1) 5 + (6,1) 6 + (7,1) 7 + (8,1) 8 + (9,1) 9 + (10,1) 10 + (11,1) 11 + (12,1) 12 + (13,1) 13 + (14,1) 14 + (15,1) 15 + (16,1) 16 + (17,1) 17 + (18,1) 18 + (19,1) 19 + (20,1) 20 + (21,1) 21 + (22,1) 22 + (23,1) 23 + (24,1) 24 + (25,1) 25 + (26,1) 26 + (27,1) 27 + (28,1) 28 + (29,1) 29 + (30,1) 30 + (31,1) 31 + + jit_cache get program GB_jit_reduceNonZombiesWarp +about to close + read cache file /home/cjnolet/.SuiteSparse/GraphBLAS/6.3.0/GB_jit_reduceNonZombiesWarp + got kernel instance reduceNonZombiesWarp_int32_t_int32_t_true +about to close + read cache file /home/cjnolet/.SuiteSparse/GraphBLAS/6.3.0/reduceNonZombiesWarp_int32_t_int32_t_true +--------------------------------------- +--- Linker for void reduceNonZombiesWarp(GB_Matrix_opaque*, GB_Scalar_opaque*, unsigned int) --- +--------------------------------------- +info : 0 bytes gmem +info : Function properties for '_Z20reduceNonZombiesWarpIiiLb1EEvP16GB_Matrix_opaqueP16GB_Scalar_opaquej': +info : used 32 registers, 328 stack, 128 bytes smem, 372 bytes cmem[0], 0 bytes lmem + + +--------------------------------------- +Launching _Z20reduceNonZombiesWarpIiiLb1EEvP16GB_Matrix_opaqueP16GB_Scalar_opaquej<<<1,128,0,0>>>(GB_Matrix_opaque*,GB_Scalar_opaque*,unsigned int) +Sum: 496 +Invoking grb reduce + work:32 gpus:0 Done. +Results matched! +[ OK ] AxB_dot3_tests_PLUS_TIMES_4.tinyxtinyPLUS_TIMES_Cint32_tMboolAint32_tBint32_tXint32_tYint32_tZint32_t (2 ms) +[ RUN ] AxB_dot3_tests_PLUS_TIMES_4.smallxsmallPLUS_TIMES_Cint32_tMboolAint32_tBint32_tXint32_tYint32_tZint32_t + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 512 bytes + + 1024x1024 GraphBLAS int32_t matrix, sparse by row + sparsity control: sparse only + my mat, 1024 entries, memory: 40.2 KB + + (0,1) 0 + (1,1) 1 + (2,1) 2 + (3,1) 3 + (4,1) 4 + (5,1) 5 + (6,1) 6 + (7,1) 7 + (8,1) 8 + (9,1) 9 + (10,1) 10 + (11,1) 11 + (12,1) 12 + (13,1) 13 + (14,1) 14 + (15,1) 15 + (16,1) 16 + (17,1) 17 + (18,1) 18 + (19,1) 19 + (20,1) 20 + (21,1) 21 + (22,1) 22 + (23,1) 23 + (24,1) 24 + (25,1) 25 + (26,1) 26 + (27,1) 27 + (28,1) 28 + ... + + 1024x1024 GraphBLAS int32_t matrix, sparse by row + sparsity control: sparse only + A, 1024 entries, memory: 40.2 KB + + (0,1) 0 + (1,1) 1 + (2,1) 2 + (3,1) 3 + (4,1) 4 + (5,1) 5 + (6,1) 6 + (7,1) 7 + (8,1) 8 + (9,1) 9 + (10,1) 10 + (11,1) 11 + (12,1) 12 + (13,1) 13 + (14,1) 14 + (15,1) 15 + (16,1) 16 + (17,1) 17 + (18,1) 18 + (19,1) 19 + (20,1) 20 + (21,1) 21 + (22,1) 22 + (23,1) 23 + (24,1) 24 + (25,1) 25 + (26,1) 26 + (27,1) 27 + (28,1) 28 + (29,1) 29 + (30,1) 30 + (31,1) 31 + (32,1) 32 + (33,1) 33 + (34,1) 34 + (35,1) 35 + (36,1) 36 + (37,1) 37 + (38,1) 38 + (39,1) 39 + (40,1) 40 + (41,1) 41 + (42,1) 42 + (43,1) 43 + (44,1) 44 + (45,1) 45 + (46,1) 46 + (47,1) 47 + (48,1) 48 + (49,1) 49 + (50,1) 50 + (51,1) 51 + (52,1) 52 + (53,1) 53 + (54,1) 54 + (55,1) 55 + (56,1) 56 + (57,1) 57 + (58,1) 58 + (59,1) 59 + (60,1) 60 + (61,1) 61 + (62,1) 62 + (63,1) 63 + (64,1) 64 + (65,1) 65 + (66,1) 66 + (67,1) 67 + (68,1) 68 + (69,1) 69 + (70,1) 70 + (71,1) 71 + (72,1) 72 + (73,1) 73 + (74,1) 74 + (75,1) 75 + (76,1) 76 + (77,1) 77 + (78,1) 78 + (79,1) 79 + (80,1) 80 + (81,1) 81 + (82,1) 82 + (83,1) 83 + (84,1) 84 + (85,1) 85 + (86,1) 86 + (87,1) 87 + (88,1) 88 + (89,1) 89 + (90,1) 90 + (91,1) 91 + (92,1) 92 + (93,1) 93 + (94,1) 94 + (95,1) 95 + (96,1) 96 + (97,1) 97 + (98,1) 98 + (99,1) 99 + (100,1) 100 + (101,1) 101 + (102,1) 102 + (103,1) 103 + (104,1) 104 + (105,1) 105 + (106,1) 106 + (107,1) 107 + (108,1) 108 + (109,1) 109 + (110,1) 110 + (111,1) 111 + (112,1) 112 + (113,1) 113 + (114,1) 114 + (115,1) 115 + (116,1) 116 + (117,1) 117 + (118,1) 118 + (119,1) 119 + (120,1) 120 + (121,1) 121 + (122,1) 122 + (123,1) 123 + (124,1) 124 + (125,1) 125 + (126,1) 126 + (127,1) 127 + (128,1) 128 + (129,1) 129 + (130,1) 130 + (131,1) 131 + (132,1) 132 + (133,1) 133 + (134,1) 134 + (135,1) 135 + (136,1) 136 + (137,1) 137 + (138,1) 138 + (139,1) 139 + (140,1) 140 + (141,1) 141 + (142,1) 142 + (143,1) 143 + (144,1) 144 + (145,1) 145 + (146,1) 146 + (147,1) 147 + (148,1) 148 + (149,1) 149 + (150,1) 150 + (151,1) 151 + (152,1) 152 + (153,1) 153 + (154,1) 154 + (155,1) 155 + (156,1) 156 + (157,1) 157 + (158,1) 158 + (159,1) 159 + (160,1) 160 + (161,1) 161 + (162,1) 162 + (163,1) 163 + (164,1) 164 + (165,1) 165 + (166,1) 166 + (167,1) 167 + (168,1) 168 + (169,1) 169 + (170,1) 170 + (171,1) 171 + (172,1) 172 + (173,1) 173 + (174,1) 174 + (175,1) 175 + (176,1) 176 + (177,1) 177 + (178,1) 178 + (179,1) 179 + (180,1) 180 + (181,1) 181 + (182,1) 182 + (183,1) 183 + (184,1) 184 + (185,1) 185 + (186,1) 186 + (187,1) 187 + (188,1) 188 + (189,1) 189 + (190,1) 190 + (191,1) 191 + (192,1) 192 + (193,1) 193 + (194,1) 194 + (195,1) 195 + (196,1) 196 + (197,1) 197 + (198,1) 198 + (199,1) 199 + (200,1) 200 + (201,1) 201 + (202,1) 202 + (203,1) 203 + (204,1) 204 + (205,1) 205 + (206,1) 206 + (207,1) 207 + (208,1) 208 + (209,1) 209 + (210,1) 210 + (211,1) 211 + (212,1) 212 + (213,1) 213 + (214,1) 214 + (215,1) 215 + (216,1) 216 + (217,1) 217 + (218,1) 218 + (219,1) 219 + (220,1) 220 + (221,1) 221 + (222,1) 222 + (223,1) 223 + (224,1) 224 + (225,1) 225 + (226,1) 226 + (227,1) 227 + (228,1) 228 + (229,1) 229 + (230,1) 230 + (231,1) 231 + (232,1) 232 + (233,1) 233 + (234,1) 234 + (235,1) 235 + (236,1) 236 + (237,1) 237 + (238,1) 238 + (239,1) 239 + (240,1) 240 + (241,1) 241 + (242,1) 242 + (243,1) 243 + (244,1) 244 + (245,1) 245 + (246,1) 246 + (247,1) 247 + (248,1) 248 + (249,1) 249 + (250,1) 250 + (251,1) 251 + (252,1) 252 + (253,1) 253 + (254,1) 254 + (255,1) 255 + (256,1) 256 + (257,1) 257 + (258,1) 258 + (259,1) 259 + (260,1) 260 + (261,1) 261 + (262,1) 262 + (263,1) 263 + (264,1) 264 + (265,1) 265 + (266,1) 266 + (267,1) 267 + (268,1) 268 + (269,1) 269 + (270,1) 270 + (271,1) 271 + (272,1) 272 + (273,1) 273 + (274,1) 274 + (275,1) 275 + (276,1) 276 + (277,1) 277 + (278,1) 278 + (279,1) 279 + (280,1) 280 + (281,1) 281 + (282,1) 282 + (283,1) 283 + (284,1) 284 + (285,1) 285 + (286,1) 286 + (287,1) 287 + (288,1) 288 + (289,1) 289 + (290,1) 290 + (291,1) 291 + (292,1) 292 + (293,1) 293 + (294,1) 294 + (295,1) 295 + (296,1) 296 + (297,1) 297 + (298,1) 298 + (299,1) 299 + (300,1) 300 + (301,1) 301 + (302,1) 302 + (303,1) 303 + (304,1) 304 + (305,1) 305 + (306,1) 306 + (307,1) 307 + (308,1) 308 + (309,1) 309 + (310,1) 310 + (311,1) 311 + (312,1) 312 + (313,1) 313 + (314,1) 314 + (315,1) 315 + (316,1) 316 + (317,1) 317 + (318,1) 318 + (319,1) 319 + (320,1) 320 + (321,1) 321 + (322,1) 322 + (323,1) 323 + (324,1) 324 + (325,1) 325 + (326,1) 326 + (327,1) 327 + (328,1) 328 + (329,1) 329 + (330,1) 330 + (331,1) 331 + (332,1) 332 + (333,1) 333 + (334,1) 334 + (335,1) 335 + (336,1) 336 + (337,1) 337 + (338,1) 338 + (339,1) 339 + (340,1) 340 + (341,1) 341 + (342,1) 342 + (343,1) 343 + (344,1) 344 + (345,1) 345 + (346,1) 346 + (347,1) 347 + (348,1) 348 + (349,1) 349 + (350,1) 350 + (351,1) 351 + (352,1) 352 + (353,1) 353 + (354,1) 354 + (355,1) 355 + (356,1) 356 + (357,1) 357 + (358,1) 358 + (359,1) 359 + (360,1) 360 + (361,1) 361 + (362,1) 362 + (363,1) 363 + (364,1) 364 + (365,1) 365 + (366,1) 366 + (367,1) 367 + (368,1) 368 + (369,1) 369 + (370,1) 370 + (371,1) 371 + (372,1) 372 + (373,1) 373 + (374,1) 374 + (375,1) 375 + (376,1) 376 + (377,1) 377 + (378,1) 378 + (379,1) 379 + (380,1) 380 + (381,1) 381 + (382,1) 382 + (383,1) 383 + (384,1) 384 + (385,1) 385 + (386,1) 386 + (387,1) 387 + (388,1) 388 + (389,1) 389 + (390,1) 390 + (391,1) 391 + (392,1) 392 + (393,1) 393 + (394,1) 394 + (395,1) 395 + (396,1) 396 + (397,1) 397 + (398,1) 398 + (399,1) 399 + (400,1) 400 + (401,1) 401 + (402,1) 402 + (403,1) 403 + (404,1) 404 + (405,1) 405 + (406,1) 406 + (407,1) 407 + (408,1) 408 + (409,1) 409 + (410,1) 410 + (411,1) 411 + (412,1) 412 + (413,1) 413 + (414,1) 414 + (415,1) 415 + (416,1) 416 + (417,1) 417 + (418,1) 418 + (419,1) 419 + (420,1) 420 + (421,1) 421 + (422,1) 422 + (423,1) 423 + (424,1) 424 + (425,1) 425 + (426,1) 426 + (427,1) 427 + (428,1) 428 + (429,1) 429 + (430,1) 430 + (431,1) 431 + (432,1) 432 + (433,1) 433 + (434,1) 434 + (435,1) 435 + (436,1) 436 + (437,1) 437 + (438,1) 438 + (439,1) 439 + (440,1) 440 + (441,1) 441 + (442,1) 442 + (443,1) 443 + (444,1) 444 + (445,1) 445 + (446,1) 446 + (447,1) 447 + (448,1) 448 + (449,1) 449 + (450,1) 450 + (451,1) 451 + (452,1) 452 + (453,1) 453 + (454,1) 454 + (455,1) 455 + (456,1) 456 + (457,1) 457 + (458,1) 458 + (459,1) 459 + (460,1) 460 + (461,1) 461 + (462,1) 462 + (463,1) 463 + (464,1) 464 + (465,1) 465 + (466,1) 466 + (467,1) 467 + (468,1) 468 + (469,1) 469 + (470,1) 470 + (471,1) 471 + (472,1) 472 + (473,1) 473 + (474,1) 474 + (475,1) 475 + (476,1) 476 + (477,1) 477 + (478,1) 478 + (479,1) 479 + (480,1) 480 + (481,1) 481 + (482,1) 482 + (483,1) 483 + (484,1) 484 + (485,1) 485 + (486,1) 486 + (487,1) 487 + (488,1) 488 + (489,1) 489 + (490,1) 490 + (491,1) 491 + (492,1) 492 + (493,1) 493 + (494,1) 494 + (495,1) 495 + (496,1) 496 + (497,1) 497 + (498,1) 498 + (499,1) 499 + (500,1) 500 + (501,1) 501 + (502,1) 502 + (503,1) 503 + (504,1) 504 + (505,1) 505 + (506,1) 506 + (507,1) 507 + (508,1) 508 + (509,1) 509 + (510,1) 510 + (511,1) 511 + (512,1) 512 + (513,1) 513 + (514,1) 514 + (515,1) 515 + (516,1) 516 + (517,1) 517 + (518,1) 518 + (519,1) 519 + (520,1) 520 + (521,1) 521 + (522,1) 522 + (523,1) 523 + (524,1) 524 + (525,1) 525 + (526,1) 526 + (527,1) 527 + (528,1) 528 + (529,1) 529 + (530,1) 530 + (531,1) 531 + (532,1) 532 + (533,1) 533 + (534,1) 534 + (535,1) 535 + (536,1) 536 + (537,1) 537 + (538,1) 538 + (539,1) 539 + (540,1) 540 + (541,1) 541 + (542,1) 542 + (543,1) 543 + (544,1) 544 + (545,1) 545 + (546,1) 546 + (547,1) 547 + (548,1) 548 + (549,1) 549 + (550,1) 550 + (551,1) 551 + (552,1) 552 + (553,1) 553 + (554,1) 554 + (555,1) 555 + (556,1) 556 + (557,1) 557 + (558,1) 558 + (559,1) 559 + (560,1) 560 + (561,1) 561 + (562,1) 562 + (563,1) 563 + (564,1) 564 + (565,1) 565 + (566,1) 566 + (567,1) 567 + (568,1) 568 + (569,1) 569 + (570,1) 570 + (571,1) 571 + (572,1) 572 + (573,1) 573 + (574,1) 574 + (575,1) 575 + (576,1) 576 + (577,1) 577 + (578,1) 578 + (579,1) 579 + (580,1) 580 + (581,1) 581 + (582,1) 582 + (583,1) 583 + (584,1) 584 + (585,1) 585 + (586,1) 586 + (587,1) 587 + (588,1) 588 + (589,1) 589 + (590,1) 590 + (591,1) 591 + (592,1) 592 + (593,1) 593 + (594,1) 594 + (595,1) 595 + (596,1) 596 + (597,1) 597 + (598,1) 598 + (599,1) 599 + (600,1) 600 + (601,1) 601 + (602,1) 602 + (603,1) 603 + (604,1) 604 + (605,1) 605 + (606,1) 606 + (607,1) 607 + (608,1) 608 + (609,1) 609 + (610,1) 610 + (611,1) 611 + (612,1) 612 + (613,1) 613 + (614,1) 614 + (615,1) 615 + (616,1) 616 + (617,1) 617 + (618,1) 618 + (619,1) 619 + (620,1) 620 + (621,1) 621 + (622,1) 622 + (623,1) 623 + (624,1) 624 + (625,1) 625 + (626,1) 626 + (627,1) 627 + (628,1) 628 + (629,1) 629 + (630,1) 630 + (631,1) 631 + (632,1) 632 + (633,1) 633 + (634,1) 634 + (635,1) 635 + (636,1) 636 + (637,1) 637 + (638,1) 638 + (639,1) 639 + (640,1) 640 + (641,1) 641 + (642,1) 642 + (643,1) 643 + (644,1) 644 + (645,1) 645 + (646,1) 646 + (647,1) 647 + (648,1) 648 + (649,1) 649 + (650,1) 650 + (651,1) 651 + (652,1) 652 + (653,1) 653 + (654,1) 654 + (655,1) 655 + (656,1) 656 + (657,1) 657 + (658,1) 658 + (659,1) 659 + (660,1) 660 + (661,1) 661 + (662,1) 662 + (663,1) 663 + (664,1) 664 + (665,1) 665 + (666,1) 666 + (667,1) 667 + (668,1) 668 + (669,1) 669 + (670,1) 670 + (671,1) 671 + (672,1) 672 + (673,1) 673 + (674,1) 674 + (675,1) 675 + (676,1) 676 + (677,1) 677 + (678,1) 678 + (679,1) 679 + (680,1) 680 + (681,1) 681 + (682,1) 682 + (683,1) 683 + (684,1) 684 + (685,1) 685 + (686,1) 686 + (687,1) 687 + (688,1) 688 + (689,1) 689 + (690,1) 690 + (691,1) 691 + (692,1) 692 + (693,1) 693 + (694,1) 694 + (695,1) 695 + (696,1) 696 + (697,1) 697 + (698,1) 698 + (699,1) 699 + (700,1) 700 + (701,1) 701 + (702,1) 702 + (703,1) 703 + (704,1) 704 + (705,1) 705 + (706,1) 706 + (707,1) 707 + (708,1) 708 + (709,1) 709 + (710,1) 710 + (711,1) 711 + (712,1) 712 + (713,1) 713 + (714,1) 714 + (715,1) 715 + (716,1) 716 + (717,1) 717 + (718,1) 718 + (719,1) 719 + (720,1) 720 + (721,1) 721 + (722,1) 722 + (723,1) 723 + (724,1) 724 + (725,1) 725 + (726,1) 726 + (727,1) 727 + (728,1) 728 + (729,1) 729 + (730,1) 730 + (731,1) 731 + (732,1) 732 + (733,1) 733 + (734,1) 734 + (735,1) 735 + (736,1) 736 + (737,1) 737 + (738,1) 738 + (739,1) 739 + (740,1) 740 + (741,1) 741 + (742,1) 742 + (743,1) 743 + (744,1) 744 + (745,1) 745 + (746,1) 746 + (747,1) 747 + (748,1) 748 + (749,1) 749 + (750,1) 750 + (751,1) 751 + (752,1) 752 + (753,1) 753 + (754,1) 754 + (755,1) 755 + (756,1) 756 + (757,1) 757 + (758,1) 758 + (759,1) 759 + (760,1) 760 + (761,1) 761 + (762,1) 762 + (763,1) 763 + (764,1) 764 + (765,1) 765 + (766,1) 766 + (767,1) 767 + (768,1) 768 + (769,1) 769 + (770,1) 770 + (771,1) 771 + (772,1) 772 + (773,1) 773 + (774,1) 774 + (775,1) 775 + (776,1) 776 + (777,1) 777 + (778,1) 778 + (779,1) 779 + (780,1) 780 + (781,1) 781 + (782,1) 782 + (783,1) 783 + (784,1) 784 + (785,1) 785 + (786,1) 786 + (787,1) 787 + (788,1) 788 + (789,1) 789 + (790,1) 790 + (791,1) 791 + (792,1) 792 + (793,1) 793 + (794,1) 794 + (795,1) 795 + (796,1) 796 + (797,1) 797 + (798,1) 798 + (799,1) 799 + (800,1) 800 + (801,1) 801 + (802,1) 802 + (803,1) 803 + (804,1) 804 + (805,1) 805 + (806,1) 806 + (807,1) 807 + (808,1) 808 + (809,1) 809 + (810,1) 810 + (811,1) 811 + (812,1) 812 + (813,1) 813 + (814,1) 814 + (815,1) 815 + (816,1) 816 + (817,1) 817 + (818,1) 818 + (819,1) 819 + (820,1) 820 + (821,1) 821 + (822,1) 822 + (823,1) 823 + (824,1) 824 + (825,1) 825 + (826,1) 826 + (827,1) 827 + (828,1) 828 + (829,1) 829 + (830,1) 830 + (831,1) 831 + (832,1) 832 + (833,1) 833 + (834,1) 834 + (835,1) 835 + (836,1) 836 + (837,1) 837 + (838,1) 838 + (839,1) 839 + (840,1) 840 + (841,1) 841 + (842,1) 842 + (843,1) 843 + (844,1) 844 + (845,1) 845 + (846,1) 846 + (847,1) 847 + (848,1) 848 + (849,1) 849 + (850,1) 850 + (851,1) 851 + (852,1) 852 + (853,1) 853 + (854,1) 854 + (855,1) 855 + (856,1) 856 + (857,1) 857 + (858,1) 858 + (859,1) 859 + (860,1) 860 + (861,1) 861 + (862,1) 862 + (863,1) 863 + (864,1) 864 + (865,1) 865 + (866,1) 866 + (867,1) 867 + (868,1) 868 + (869,1) 869 + (870,1) 870 + (871,1) 871 + (872,1) 872 + (873,1) 873 + (874,1) 874 + (875,1) 875 + (876,1) 876 + (877,1) 877 + (878,1) 878 + (879,1) 879 + (880,1) 880 + (881,1) 881 + (882,1) 882 + (883,1) 883 + (884,1) 884 + (885,1) 885 + (886,1) 886 + (887,1) 887 + (888,1) 888 + (889,1) 889 + (890,1) 890 + (891,1) 891 + (892,1) 892 + (893,1) 893 + (894,1) 894 + (895,1) 895 + (896,1) 896 + (897,1) 897 + (898,1) 898 + (899,1) 899 + (900,1) 900 + (901,1) 901 + (902,1) 902 + (903,1) 903 + (904,1) 904 + (905,1) 905 + (906,1) 906 + (907,1) 907 + (908,1) 908 + (909,1) 909 + (910,1) 910 + (911,1) 911 + (912,1) 912 + (913,1) 913 + (914,1) 914 + (915,1) 915 + (916,1) 916 + (917,1) 917 + (918,1) 918 + (919,1) 919 + (920,1) 920 + (921,1) 921 + (922,1) 922 + (923,1) 923 + (924,1) 924 + (925,1) 925 + (926,1) 926 + (927,1) 927 + (928,1) 928 + (929,1) 929 + (930,1) 930 + (931,1) 931 + (932,1) 932 + (933,1) 933 + (934,1) 934 + (935,1) 935 + (936,1) 936 + (937,1) 937 + (938,1) 938 + (939,1) 939 + (940,1) 940 + (941,1) 941 + (942,1) 942 + (943,1) 943 + (944,1) 944 + (945,1) 945 + (946,1) 946 + (947,1) 947 + (948,1) 948 + (949,1) 949 + (950,1) 950 + (951,1) 951 + (952,1) 952 + (953,1) 953 + (954,1) 954 + (955,1) 955 + (956,1) 956 + (957,1) 957 + (958,1) 958 + (959,1) 959 + (960,1) 960 + (961,1) 961 + (962,1) 962 + (963,1) 963 + (964,1) 964 + (965,1) 965 + (966,1) 966 + (967,1) 967 + (968,1) 968 + (969,1) 969 + (970,1) 970 + (971,1) 971 + (972,1) 972 + (973,1) 973 + (974,1) 974 + (975,1) 975 + (976,1) 976 + (977,1) 977 + (978,1) 978 + (979,1) 979 + (980,1) 980 + (981,1) 981 + (982,1) 982 + (983,1) 983 + (984,1) 984 + (985,1) 985 + (986,1) 986 + (987,1) 987 + (988,1) 988 + (989,1) 989 + (990,1) 990 + (991,1) 991 + (992,1) 992 + (993,1) 993 + (994,1) 994 + (995,1) 995 + (996,1) 996 + (997,1) 997 + (998,1) 998 + (999,1) 999 + (1000,1) 1000 + (1001,1) 1001 + (1002,1) 1002 + (1003,1) 1003 + (1004,1) 1004 + (1005,1) 1005 + (1006,1) 1006 + (1007,1) 1007 + (1008,1) 1008 + (1009,1) 1009 + (1010,1) 1010 + (1011,1) 1011 + (1012,1) 1012 + (1013,1) 1013 + (1014,1) 1014 + (1015,1) 1015 + (1016,1) 1016 + (1017,1) 1017 + (1018,1) 1018 + (1019,1) 1019 + (1020,1) 1020 + (1021,1) 1021 + (1022,1) 1022 + (1023,1) 1023 + + jit_cache get program GB_jit_reduceNonZombiesWarp +found memory-cached prog GB_jit_reduceNonZombiesWarp + got kernel instance reduceNonZombiesWarp_int32_t_int32_t_true +found memory-cached prog reduceNonZombiesWarp_int32_t_int32_t_true +Launching _Z20reduceNonZombiesWarpIiiLb1EEvP16GB_Matrix_opaqueP16GB_Scalar_opaquej<<<8,128,0,0>>>(GB_Matrix_opaque*,GB_Scalar_opaque*,unsigned int) +Sum: 523776 +Invoking grb reduce + rmm_wrap_alloc 256 bytes + work:1024 gpus:0 Done. +Results matched! +[ OK ] AxB_dot3_tests_PLUS_TIMES_4.smallxsmallPLUS_TIMES_Cint32_tMboolAint32_tBint32_tXint32_tYint32_tZint32_t (5 ms) +[ RUN ] AxB_dot3_tests_PLUS_TIMES_4.tinyxtinyPLUS_TIMES_Cint32_tMint32_tAint32_tBint32_tXint32_tYint32_tZint32_t + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 512 bytes + + 32x32 GraphBLAS int32_t matrix, sparse by row + sparsity control: sparse only + my mat, 32 entries, memory: 1.5 KB + + (0,1) 0 + (1,1) 1 + (2,1) 2 + (3,1) 3 + (4,1) 4 + (5,1) 5 + (6,1) 6 + (7,1) 7 + (8,1) 8 + (9,1) 9 + (10,1) 10 + (11,1) 11 + (12,1) 12 + (13,1) 13 + (14,1) 14 + (15,1) 15 + (16,1) 16 + (17,1) 17 + (18,1) 18 + (19,1) 19 + (20,1) 20 + (21,1) 21 + (22,1) 22 + (23,1) 23 + (24,1) 24 + (25,1) 25 + (26,1) 26 + (27,1) 27 + (28,1) 28 + ... + + 32x32 GraphBLAS int32_t matrix, sparse by row + sparsity control: sparse only + A, 32 entries, memory: 1.5 KB + + (0,1) 0 + (1,1) 1 + (2,1) 2 + (3,1) 3 + (4,1) 4 + (5,1) 5 + (6,1) 6 + (7,1) 7 + (8,1) 8 + (9,1) 9 + (10,1) 10 + (11,1) 11 + (12,1) 12 + (13,1) 13 + (14,1) 14 + (15,1) 15 + (16,1) 16 + (17,1) 17 + (18,1) 18 + (19,1) 19 + (20,1) 20 + (21,1) 21 + (22,1) 22 + (23,1) 23 + (24,1) 24 + (25,1) 25 + (26,1) 26 + (27,1) 27 + (28,1) 28 + (29,1) 29 + (30,1) 30 + (31,1) 31 + + jit_cache get program GB_jit_reduceNonZombiesWarp +found memory-cached prog GB_jit_reduceNonZombiesWarp + got kernel instance reduceNonZombiesWarp_int32_t_int32_t_true +found memory-cached prog reduceNonZombiesWarp_int32_t_int32_t_true +Launching _Z20reduceNonZombiesWarpIiiLb1EEvP16GB_Matrix_opaqueP16GB_Scalar_opaquej<<<1,128,0,0>>>(GB_Matrix_opaque*,GB_Scalar_opaque*,unsigned int) +Sum: 496 +Invoking grb reduce + work:32 gpus:0 Done. +Results matched! +[ OK ] AxB_dot3_tests_PLUS_TIMES_4.tinyxtinyPLUS_TIMES_Cint32_tMint32_tAint32_tBint32_tXint32_tYint32_tZint32_t (0 ms) +[ RUN ] AxB_dot3_tests_PLUS_TIMES_4.smallxsmallPLUS_TIMES_Cint32_tMint32_tAint32_tBint32_tXint32_tYint32_tZint32_t + rmm_wrap_alloc 256 bytes + rmm_wrap_alloc 512 bytes + rmm_wrap_alloc 16384 bytes + + 1024x1024 GraphBLAS int32_t matrix, sparse by row + sparsity control: sparse only + my mat, 1024 entries, memory: 40.2 KB + + (0,1) 0 + (1,1) 1 + (2,1) 2 + (3,1) 3 + (4,1) 4 + (5,1) 5 + (6,1) 6 + (7,1) 7 + (8,1) 8 + (9,1) 9 + (10,1) 10 + (11,1) 11 + (12,1) 12 + (13,1) 13 + (14,1) 14 + (15,1) 15 + (16,1) 16 + (17,1) 17 + (18,1) 18 + (19,1) 19 + (20,1) 20 + (21,1) 21 + (22,1) 22 + (23,1) 23 + (24,1) 24 + (25,1) 25 + (26,1) 26 + (27,1) 27 + (28,1) 28 + ... + + 1024x1024 GraphBLAS int32_t matrix, sparse by row + sparsity control: sparse only + A, 1024 entries, memory: 40.2 KB + + (0,1) 0 + (1,1) 1 + (2,1) 2 + (3,1) 3 + (4,1) 4 + (5,1) 5 + (6,1) 6 + (7,1) 7 + (8,1) 8 + (9,1) 9 + (10,1) 10 + (11,1) 11 + (12,1) 12 + (13,1) 13 + (14,1) 14 + (15,1) 15 + (16,1) 16 + (17,1) 17 + (18,1) 18 + (19,1) 19 + (20,1) 20 + (21,1) 21 + (22,1) 22 + (23,1) 23 + (24,1) 24 + (25,1) 25 + (26,1) 26 + (27,1) 27 + (28,1) 28 + (29,1) 29 + (30,1) 30 + (31,1) 31 + (32,1) 32 + (33,1) 33 + (34,1) 34 + (35,1) 35 + (36,1) 36 + (37,1) 37 + (38,1) 38 + (39,1) 39 + (40,1) 40 + (41,1) 41 + (42,1) 42 + (43,1) 43 + (44,1) 44 + (45,1) 45 + (46,1) 46 + (47,1) 47 + (48,1) 48 + (49,1) 49 + (50,1) 50 + (51,1) 51 + (52,1) 52 + (53,1) 53 + (54,1) 54 + (55,1) 55 + (56,1) 56 + (57,1) 57 + (58,1) 58 + (59,1) 59 + (60,1) 60 + (61,1) 61 + (62,1) 62 + (63,1) 63 + (64,1) 64 + (65,1) 65 + (66,1) 66 + (67,1) 67 + (68,1) 68 + (69,1) 69 + (70,1) 70 + (71,1) 71 + (72,1) 72 + (73,1) 73 + (74,1) 74 + (75,1) 75 + (76,1) 76 + (77,1) 77 + (78,1) 78 + (79,1) 79 + (80,1) 80 + (81,1) 81 + (82,1) 82 + (83,1) 83 + (84,1) 84 + (85,1) 85 + (86,1) 86 + (87,1) 87 + (88,1) 88 + (89,1) 89 + (90,1) 90 + (91,1) 91 + (92,1) 92 + (93,1) 93 + (94,1) 94 + (95,1) 95 + (96,1) 96 + (97,1) 97 + (98,1) 98 + (99,1) 99 + (100,1) 100 + (101,1) 101 + (102,1) 102 + (103,1) 103 + (104,1) 104 + (105,1) 105 + (106,1) 106 + (107,1) 107 + (108,1) 108 + (109,1) 109 + (110,1) 110 + (111,1) 111 + (112,1) 112 + (113,1) 113 + (114,1) 114 + (115,1) 115 + (116,1) 116 + (117,1) 117 + (118,1) 118 + (119,1) 119 + (120,1) 120 + (121,1) 121 + (122,1) 122 + (123,1) 123 + (124,1) 124 + (125,1) 125 + (126,1) 126 + (127,1) 127 + (128,1) 128 + (129,1) 129 + (130,1) 130 + (131,1) 131 + (132,1) 132 + (133,1) 133 + (134,1) 134 + (135,1) 135 + (136,1) 136 + (137,1) 137 + (138,1) 138 + (139,1) 139 + (140,1) 140 + (141,1) 141 + (142,1) 142 + (143,1) 143 + (144,1) 144 + (145,1) 145 + (146,1) 146 + (147,1) 147 + (148,1) 148 + (149,1) 149 + (150,1) 150 + (151,1) 151 + (152,1) 152 + (153,1) 153 + (154,1) 154 + (155,1) 155 + (156,1) 156 + (157,1) 157 + (158,1) 158 + (159,1) 159 + (160,1) 160 + (161,1) 161 + (162,1) 162 + (163,1) 163 + (164,1) 164 + (165,1) 165 + (166,1) 166 + (167,1) 167 + (168,1) 168 + (169,1) 169 + (170,1) 170 + (171,1) 171 + (172,1) 172 + (173,1) 173 + (174,1) 174 + (175,1) 175 + (176,1) 176 + (177,1) 177 + (178,1) 178 + (179,1) 179 + (180,1) 180 + (181,1) 181 + (182,1) 182 + (183,1) 183 + (184,1) 184 + (185,1) 185 + (186,1) 186 + (187,1) 187 + (188,1) 188 + (189,1) 189 + (190,1) 190 + (191,1) 191 + (192,1) 192 + (193,1) 193 + (194,1) 194 + (195,1) 195 + (196,1) 196 + (197,1) 197 + (198,1) 198 + (199,1) 199 + (200,1) 200 + (201,1) 201 + (202,1) 202 + (203,1) 203 + (204,1) 204 + (205,1) 205 + (206,1) 206 + (207,1) 207 + (208,1) 208 + (209,1) 209 + (210,1) 210 + (211,1) 211 + (212,1) 212 + (213,1) 213 + (214,1) 214 + (215,1) 215 + (216,1) 216 + (217,1) 217 + (218,1) 218 + (219,1) 219 + (220,1) 220 + (221,1) 221 + (222,1) 222 + (223,1) 223 + (224,1) 224 + (225,1) 225 + (226,1) 226 + (227,1) 227 + (228,1) 228 + (229,1) 229 + (230,1) 230 + (231,1) 231 + (232,1) 232 + (233,1) 233 + (234,1) 234 + (235,1) 235 + (236,1) 236 + (237,1) 237 + (238,1) 238 + (239,1) 239 + (240,1) 240 + (241,1) 241 + (242,1) 242 + (243,1) 243 + (244,1) 244 + (245,1) 245 + (246,1) 246 + (247,1) 247 + (248,1) 248 + (249,1) 249 + (250,1) 250 + (251,1) 251 + (252,1) 252 + (253,1) 253 + (254,1) 254 + (255,1) 255 + (256,1) 256 + (257,1) 257 + (258,1) 258 + (259,1) 259 + (260,1) 260 + (261,1) 261 + (262,1) 262 + (263,1) 263 + (264,1) 264 + (265,1) 265 + (266,1) 266 + (267,1) 267 + (268,1) 268 + (269,1) 269 + (270,1) 270 + (271,1) 271 + (272,1) 272 + (273,1) 273 + (274,1) 274 + (275,1) 275 + (276,1) 276 + (277,1) 277 + (278,1) 278 + (279,1) 279 + (280,1) 280 + (281,1) 281 + (282,1) 282 + (283,1) 283 + (284,1) 284 + (285,1) 285 + (286,1) 286 + (287,1) 287 + (288,1) 288 + (289,1) 289 + (290,1) 290 + (291,1) 291 + (292,1) 292 + (293,1) 293 + (294,1) 294 + (295,1) 295 + (296,1) 296 + (297,1) 297 + (298,1) 298 + (299,1) 299 + (300,1) 300 + (301,1) 301 + (302,1) 302 + (303,1) 303 + (304,1) 304 + (305,1) 305 + (306,1) 306 + (307,1) 307 + (308,1) 308 + (309,1) 309 + (310,1) 310 + (311,1) 311 + (312,1) 312 + (313,1) 313 + (314,1) 314 + (315,1) 315 + (316,1) 316 + (317,1) 317 + (318,1) 318 + (319,1) 319 + (320,1) 320 + (321,1) 321 + (322,1) 322 + (323,1) 323 + (324,1) 324 + (325,1) 325 + (326,1) 326 + (327,1) 327 + (328,1) 328 + (329,1) 329 + (330,1) 330 + (331,1) 331 + (332,1) 332 + (333,1) 333 + (334,1) 334 + (335,1) 335 + (336,1) 336 + (337,1) 337 + (338,1) 338 + (339,1) 339 + (340,1) 340 + (341,1) 341 + (342,1) 342 + (343,1) 343 + (344,1) 344 + (345,1) 345 + (346,1) 346 + (347,1) 347 + (348,1) 348 + (349,1) 349 + (350,1) 350 + (351,1) 351 + (352,1) 352 + (353,1) 353 + (354,1) 354 + (355,1) 355 + (356,1) 356 + (357,1) 357 + (358,1) 358 + (359,1) 359 + (360,1) 360 + (361,1) 361 + (362,1) 362 + (363,1) 363 + (364,1) 364 + (365,1) 365 + (366,1) 366 + (367,1) 367 + (368,1) 368 + (369,1) 369 + (370,1) 370 + (371,1) 371 + (372,1) 372 + (373,1) 373 + (374,1) 374 + (375,1) 375 + (376,1) 376 + (377,1) 377 + (378,1) 378 + (379,1) 379 + (380,1) 380 + (381,1) 381 + (382,1) 382 + (383,1) 383 + (384,1) 384 + (385,1) 385 + (386,1) 386 + (387,1) 387 + (388,1) 388 + (389,1) 389 + (390,1) 390 + (391,1) 391 + (392,1) 392 + (393,1) 393 + (394,1) 394 + (395,1) 395 + (396,1) 396 + (397,1) 397 + (398,1) 398 + (399,1) 399 + (400,1) 400 + (401,1) 401 + (402,1) 402 + (403,1) 403 + (404,1) 404 + (405,1) 405 + (406,1) 406 + (407,1) 407 + (408,1) 408 + (409,1) 409 + (410,1) 410 + (411,1) 411 + (412,1) 412 + (413,1) 413 + (414,1) 414 + (415,1) 415 + (416,1) 416 + (417,1) 417 + (418,1) 418 + (419,1) 419 + (420,1) 420 + (421,1) 421 + (422,1) 422 + (423,1) 423 + (424,1) 424 + (425,1) 425 + (426,1) 426 + (427,1) 427 + (428,1) 428 + (429,1) 429 + (430,1) 430 + (431,1) 431 + (432,1) 432 + (433,1) 433 + (434,1) 434 + (435,1) 435 + (436,1) 436 + (437,1) 437 + (438,1) 438 + (439,1) 439 + (440,1) 440 + (441,1) 441 + (442,1) 442 + (443,1) 443 + (444,1) 444 + (445,1) 445 + (446,1) 446 + (447,1) 447 + (448,1) 448 + (449,1) 449 + (450,1) 450 + (451,1) 451 + (452,1) 452 + (453,1) 453 + (454,1) 454 + (455,1) 455 + (456,1) 456 + (457,1) 457 + (458,1) 458 + (459,1) 459 + (460,1) 460 + (461,1) 461 + (462,1) 462 + (463,1) 463 + (464,1) 464 + (465,1) 465 + (466,1) 466 + (467,1) 467 + (468,1) 468 + (469,1) 469 + (470,1) 470 + (471,1) 471 + (472,1) 472 + (473,1) 473 + (474,1) 474 + (475,1) 475 + (476,1) 476 + (477,1) 477 + (478,1) 478 + (479,1) 479 + (480,1) 480 + (481,1) 481 + (482,1) 482 + (483,1) 483 + (484,1) 484 + (485,1) 485 + (486,1) 486 + (487,1) 487 + (488,1) 488 + (489,1) 489 + (490,1) 490 + (491,1) 491 + (492,1) 492 + (493,1) 493 + (494,1) 494 + (495,1) 495 + (496,1) 496 + (497,1) 497 + (498,1) 498 + (499,1) 499 + (500,1) 500 + (501,1) 501 + (502,1) 502 + (503,1) 503 + (504,1) 504 + (505,1) 505 + (506,1) 506 + (507,1) 507 + (508,1) 508 + (509,1) 509 + (510,1) 510 + (511,1) 511 + (512,1) 512 + (513,1) 513 + (514,1) 514 + (515,1) 515 + (516,1) 516 + (517,1) 517 + (518,1) 518 + (519,1) 519 + (520,1) 520 + (521,1) 521 + (522,1) 522 + (523,1) 523 + (524,1) 524 + (525,1) 525 + (526,1) 526 + (527,1) 527 + (528,1) 528 + (529,1) 529 + (530,1) 530 + (531,1) 531 + (532,1) 532 + (533,1) 533 + (534,1) 534 + (535,1) 535 + (536,1) 536 + (537,1) 537 + (538,1) 538 + (539,1) 539 + (540,1) 540 + (541,1) 541 + (542,1) 542 + (543,1) 543 + (544,1) 544 + (545,1) 545 + (546,1) 546 + (547,1) 547 + (548,1) 548 + (549,1) 549 + (550,1) 550 + (551,1) 551 + (552,1) 552 + (553,1) 553 + (554,1) 554 + (555,1) 555 + (556,1) 556 + (557,1) 557 + (558,1) 558 + (559,1) 559 + (560,1) 560 + (561,1) 561 + (562,1) 562 + (563,1) 563 + (564,1) 564 + (565,1) 565 + (566,1) 566 + (567,1) 567 + (568,1) 568 + (569,1) 569 + (570,1) 570 + (571,1) 571 + (572,1) 572 + (573,1) 573 + (574,1) 574 + (575,1) 575 + (576,1) 576 + (577,1) 577 + (578,1) 578 + (579,1) 579 + (580,1) 580 + (581,1) 581 + (582,1) 582 + (583,1) 583 + (584,1) 584 + (585,1) 585 + (586,1) 586 + (587,1) 587 + (588,1) 588 + (589,1) 589 + (590,1) 590 + (591,1) 591 + (592,1) 592 + (593,1) 593 + (594,1) 594 + (595,1) 595 + (596,1) 596 + (597,1) 597 + (598,1) 598 + (599,1) 599 + (600,1) 600 + (601,1) 601 + (602,1) 602 + (603,1) 603 + (604,1) 604 + (605,1) 605 + (606,1) 606 + (607,1) 607 + (608,1) 608 + (609,1) 609 + (610,1) 610 + (611,1) 611 + (612,1) 612 + (613,1) 613 + (614,1) 614 + (615,1) 615 + (616,1) 616 + (617,1) 617 + (618,1) 618 + (619,1) 619 + (620,1) 620 + (621,1) 621 + (622,1) 622 + (623,1) 623 + (624,1) 624 + (625,1) 625 + (626,1) 626 + (627,1) 627 + (628,1) 628 + (629,1) 629 + (630,1) 630 + (631,1) 631 + (632,1) 632 + (633,1) 633 + (634,1) 634 + (635,1) 635 + (636,1) 636 + (637,1) 637 + (638,1) 638 + (639,1) 639 + (640,1) 640 + (641,1) 641 + (642,1) 642 + (643,1) 643 + (644,1) 644 + (645,1) 645 + (646,1) 646 + (647,1) 647 + (648,1) 648 + (649,1) 649 + (650,1) 650 + (651,1) 651 + (652,1) 652 + (653,1) 653 + (654,1) 654 + (655,1) 655 + (656,1) 656 + (657,1) 657 + (658,1) 658 + (659,1) 659 + (660,1) 660 + (661,1) 661 + (662,1) 662 + (663,1) 663 + (664,1) 664 + (665,1) 665 + (666,1) 666 + (667,1) 667 + (668,1) 668 + (669,1) 669 + (670,1) 670 + (671,1) 671 + (672,1) 672 + (673,1) 673 + (674,1) 674 + (675,1) 675 + (676,1) 676 + (677,1) 677 + (678,1) 678 + (679,1) 679 + (680,1) 680 + (681,1) 681 + (682,1) 682 + (683,1) 683 + (684,1) 684 + (685,1) 685 + (686,1) 686 + (687,1) 687 + (688,1) 688 + (689,1) 689 + (690,1) 690 + (691,1) 691 + (692,1) 692 + (693,1) 693 + (694,1) 694 + (695,1) 695 + (696,1) 696 + (697,1) 697 + (698,1) 698 + (699,1) 699 + (700,1) 700 + (701,1) 701 + (702,1) 702 + (703,1) 703 + (704,1) 704 + (705,1) 705 + (706,1) 706 + (707,1) 707 + (708,1) 708 + (709,1) 709 + (710,1) 710 + (711,1) 711 + (712,1) 712 + (713,1) 713 + (714,1) 714 + (715,1) 715 + (716,1) 716 + (717,1) 717 + (718,1) 718 + (719,1) 719 + (720,1) 720 + (721,1) 721 + (722,1) 722 + (723,1) 723 + (724,1) 724 + (725,1) 725 + (726,1) 726 + (727,1) 727 + (728,1) 728 + (729,1) 729 + (730,1) 730 + (731,1) 731 + (732,1) 732 + (733,1) 733 + (734,1) 734 + (735,1) 735 + (736,1) 736 + (737,1) 737 + (738,1) 738 + (739,1) 739 + (740,1) 740 + (741,1) 741 + (742,1) 742 + (743,1) 743 + (744,1) 744 + (745,1) 745 + (746,1) 746 + (747,1) 747 + (748,1) 748 + (749,1) 749 + (750,1) 750 + (751,1) 751 + (752,1) 752 + (753,1) 753 + (754,1) 754 + (755,1) 755 + (756,1) 756 + (757,1) 757 + (758,1) 758 + (759,1) 759 + (760,1) 760 + (761,1) 761 + (762,1) 762 + (763,1) 763 + (764,1) 764 + (765,1) 765 + (766,1) 766 + (767,1) 767 + (768,1) 768 + (769,1) 769 + (770,1) 770 + (771,1) 771 + (772,1) 772 + (773,1) 773 + (774,1) 774 + (775,1) 775 + (776,1) 776 + (777,1) 777 + (778,1) 778 + (779,1) 779 + (780,1) 780 + (781,1) 781 + (782,1) 782 + (783,1) 783 + (784,1) 784 + (785,1) 785 + (786,1) 786 + (787,1) 787 + (788,1) 788 + (789,1) 789 + (790,1) 790 + (791,1) 791 + (792,1) 792 + (793,1) 793 + (794,1) 794 + (795,1) 795 + (796,1) 796 + (797,1) 797 + (798,1) 798 + (799,1) 799 + (800,1) 800 + (801,1) 801 + (802,1) 802 + (803,1) 803 + (804,1) 804 + (805,1) 805 + (806,1) 806 + (807,1) 807 + (808,1) 808 + (809,1) 809 + (810,1) 810 + (811,1) 811 + (812,1) 812 + (813,1) 813 + (814,1) 814 + (815,1) 815 + (816,1) 816 + (817,1) 817 + (818,1) 818 + (819,1) 819 + (820,1) 820 + (821,1) 821 + (822,1) 822 + (823,1) 823 + (824,1) 824 + (825,1) 825 + (826,1) 826 + (827,1) 827 + (828,1) 828 + (829,1) 829 + (830,1) 830 + (831,1) 831 + (832,1) 832 + (833,1) 833 + (834,1) 834 + (835,1) 835 + (836,1) 836 + (837,1) 837 + (838,1) 838 + (839,1) 839 + (840,1) 840 + (841,1) 841 + (842,1) 842 + (843,1) 843 + (844,1) 844 + (845,1) 845 + (846,1) 846 + (847,1) 847 + (848,1) 848 + (849,1) 849 + (850,1) 850 + (851,1) 851 + (852,1) 852 + (853,1) 853 + (854,1) 854 + (855,1) 855 + (856,1) 856 + (857,1) 857 + (858,1) 858 + (859,1) 859 + (860,1) 860 + (861,1) 861 + (862,1) 862 + (863,1) 863 + (864,1) 864 + (865,1) 865 + (866,1) 866 + (867,1) 867 + (868,1) 868 + (869,1) 869 + (870,1) 870 + (871,1) 871 + (872,1) 872 + (873,1) 873 + (874,1) 874 + (875,1) 875 + (876,1) 876 + (877,1) 877 + (878,1) 878 + (879,1) 879 + (880,1) 880 + (881,1) 881 + (882,1) 882 + (883,1) 883 + (884,1) 884 + (885,1) 885 + (886,1) 886 + (887,1) 887 + (888,1) 888 + (889,1) 889 + (890,1) 890 + (891,1) 891 + (892,1) 892 + (893,1) 893 + (894,1) 894 + (895,1) 895 + (896,1) 896 + (897,1) 897 + (898,1) 898 + (899,1) 899 + (900,1) 900 + (901,1) 901 + (902,1) 902 + (903,1) 903 + (904,1) 904 + (905,1) 905 + (906,1) 906 + (907,1) 907 + (908,1) 908 + (909,1) 909 + (910,1) 910 + (911,1) 911 + (912,1) 912 + (913,1) 913 + (914,1) 914 + (915,1) 915 + (916,1) 916 + (917,1) 917 + (918,1) 918 + (919,1) 919 + (920,1) 920 + (921,1) 921 + (922,1) 922 + (923,1) 923 + (924,1) 924 + (925,1) 925 + (926,1) 926 + (927,1) 927 + (928,1) 928 + (929,1) 929 + (930,1) 930 + (931,1) 931 + (932,1) 932 + (933,1) 933 + (934,1) 934 + (935,1) 935 + (936,1) 936 + (937,1) 937 + (938,1) 938 + (939,1) 939 + (940,1) 940 + (941,1) 941 + (942,1) 942 + (943,1) 943 + (944,1) 944 + (945,1) 945 + (946,1) 946 + (947,1) 947 + (948,1) 948 + (949,1) 949 + (950,1) 950 + (951,1) 951 + (952,1) 952 + (953,1) 953 + (954,1) 954 + (955,1) 955 + (956,1) 956 + (957,1) 957 + (958,1) 958 + (959,1) 959 + (960,1) 960 + (961,1) 961 + (962,1) 962 + (963,1) 963 + (964,1) 964 + (965,1) 965 + (966,1) 966 + (967,1) 967 + (968,1) 968 + (969,1) 969 + (970,1) 970 + (971,1) 971 + (972,1) 972 + (973,1) 973 + (974,1) 974 + (975,1) 975 + (976,1) 976 + (977,1) 977 + (978,1) 978 + (979,1) 979 + (980,1) 980 + (981,1) 981 + (982,1) 982 + (983,1) 983 + (984,1) 984 + (985,1) 985 + (986,1) 986 + (987,1) 987 + (988,1) 988 + (989,1) 989 + (990,1) 990 + (991,1) 991 + (992,1) 992 + (993,1) 993 + (994,1) 994 + (995,1) 995 + (996,1) 996 + (997,1) 997 + (998,1) 998 + (999,1) 999 + (1000,1) 1000 + (1001,1) 1001 + (1002,1) 1002 + (1003,1) 1003 + (1004,1) 1004 + (1005,1) 1005 + (1006,1) 1006 + (1007,1) 1007 + (1008,1) 1008 + (1009,1) 1009 + (1010,1) 1010 + (1011,1) 1011 + (1012,1) 1012 + (1013,1) 1013 + (1014,1) 1014 + (1015,1) 1015 + (1016,1) 1016 + (1017,1) 1017 + (1018,1) 1018 + (1019,1) 1019 + (1020,1) 1020 + (1021,1) 1021 + (1022,1) 1022 + (1023,1) 1023 + + jit_cache get program GB_jit_reduceNonZombiesWarp +found memory-cached prog GB_jit_reduceNonZombiesWarp + got kernel instance reduceNonZombiesWarp_int32_t_int32_t_true +found memory-cached prog reduceNonZombiesWarp_int32_t_int32_t_true +Launching _Z20reduceNonZombiesWarpIiiLb1EEvP16GB_Matrix_opaqueP16GB_Scalar_opaquej<<<8,128,0,0>>>(GB_Matrix_opaque*,GB_Scalar_opaque*,unsigned int) +Sum: 523776 +Invoking grb reduce + rmm_wrap_alloc 256 bytes + work:1024 gpus:0 Done. +Results matched! +[ OK ] AxB_dot3_tests_PLUS_TIMES_4.smallxsmallPLUS_TIMES_Cint32_tMint32_tAint32_tBint32_tXint32_tYint32_tZint32_t (5 ms) +[----------] 4 tests from AxB_dot3_tests_PLUS_TIMES_4 (14 ms total) + +[----------] Global test environment tear-down +[==========] 16 tests from 4 test suites ran. (4206 ms total) +[ PASSED ] 16 tests. +Tests complete diff --git a/GraphBLAS/CUDA/test/run_tests.cpp b/GraphBLAS/CUDA/test/run_tests.cpp index 70d84dd4d5..55a666e865 100644 --- a/GraphBLAS/CUDA/test/run_tests.cpp +++ b/GraphBLAS/CUDA/test/run_tests.cpp @@ -25,9 +25,11 @@ int main(int argc, char **argv) { auto r = RUN_ALL_TESTS(); rmm_wrap_deallocate( p, buff_size); + GRB_TRY (GrB_finalize()); rmm_wrap_finalize(); + std::cout << "Tests complete" << std::endl; + - GRB_TRY (GrB_finalize()); return r; } diff --git a/GraphBLAS/CUDA/test/testGen_cmake.py b/GraphBLAS/CUDA/test/testGen_cmake.py index 1a9cece985..f48996869d 100644 --- a/GraphBLAS/CUDA/test/testGen_cmake.py +++ b/GraphBLAS/CUDA/test/testGen_cmake.py @@ -8,6 +8,8 @@ "uint32_t": "UINT32" } +DOT3_BUCKETS = [1, 5, 6, 7, 8, 9, 10, 11] + def std_type_to_gb_type(t): return SUPPORTED_TYPES[t] @@ -21,7 +23,7 @@ def build_gb_binop(t, b): gb_type = std_type_to_gb_type(t) return f"{GB_TYPE_PREFIX}_{b}_{gb_type}" -def buildTest(ts="TestsuiteName",kern="vsvs", ds= "tiny-tiny", SUM="PLUS", PRODUCT="TIMES",phase=3, +def buildTest(ts="TestsuiteName",kernels=DOT3_BUCKETS, ds= "tiny-tiny", SUM="PLUS", PRODUCT="TIMES",phase=3, typeC="int32_t",typeM="int32_t",typeA="int32_t",typeB="int32_t",type_x="int32_t",type_y="int32_t",type_z="int32_t"): # build string interpolation from pieces @@ -40,9 +42,9 @@ def buildTest(ts="TestsuiteName",kern="vsvs", ds= "tiny-tiny", SUM="PLUS", PRODU phase1_body= f""" test_AxB_phase1_factory< {typeC}, {typeM}, {typeA}, {typeB}>( 5, {N}, {Anz}, {Bnz}, monoid, binop);""" phase2_body= f""" test_AxB_phase2_factory< {typeC} >( 5, {N}, {Anz},{Bnz});""" - # phase2_end_body= f""" test_AxB_dot3_phase2end_factory< {typeC} >( 5, {N}, {Anz},{Bnz});""" - phase3_body = f""" test_AxB_dot3_full_factory< {typeC},{typeM},{typeA},{typeB},{type_x},{type_y},{type_z} > (5, {N}, {Anz}, {Bnz}, monoid, binop);""" - phasedict = { 1: phase1_body, 2: phase2_body, 3: phase3_body } + phase3_body = ''.join([f""" test_AxB_dot3_full_factory< {typeC},{typeM},{typeA},{typeB},{type_x},{type_y},{type_z} > ({kern}, {N}, {Anz}, {Bnz}, monoid, binop);\n""" for kern in kernels]) + reduce_body = f""" test_reduce_factory<{typeC}>({N}, monoid);""" + phasedict = { 1: phase1_body, 2: phase2_body, 3: phase3_body, 4: reduce_body } TEST_BODY= phasedict[phase] return TEST_HEAD,TEST_BODY, gb_monoid, gb_binop @@ -57,6 +59,7 @@ def load_types(argv): DataTypes = argv[6].split(";") # Hard-coding data shapes for now + DataShapes ={ "tinyxtiny": {'N':32, 'Anz':256, 'Bnz':128}, "smallxsmall": {'N':1024, 'Anz': 65_536, 'Bnz':65_536} @@ -69,12 +72,12 @@ def load_types(argv): return argv[1], test_suite_name, Monoids, Binops, Semirings, DataTypes, DataShapes, Kernels def write_test_instances_header(test_suite_name, Monoids, Binops, Semirings, DataTypes, DataShapes, Kernels): - outfile = f'{test_suite_name}_{Semirings}_{Kernels}_test_instances.hpp' + outfile = f'{test_suite_name}_{Semirings}_test_instances.hpp' with open(outfile, 'w') as fp: fp.write("#pragma once\n"); for m in Monoids: for b in Binops: - Test_suite = f'{test_suite_name}_tests_{m}_{b}_{Kernels}' + Test_suite = f'{test_suite_name}_tests_{m}_{b}' for dtC in DataTypes: dtX = dtC dtY = dtC @@ -83,7 +86,7 @@ def write_test_instances_header(test_suite_name, Monoids, Binops, Semirings, Dat for dtA in DataTypes: for dtB in DataTypes: for ds in DataShapes: - for phase in [1, 2, 3]: + for phase in [1, 2, 3, 4]: TEST_HEAD, TEST_BODY, gb_monoid, gb_binop = buildTest( Test_suite, Kernels, ds, m, b, phase, dtC, dtM, dtA, dtB, dtX, dtY, dtZ) @@ -95,12 +98,12 @@ def write_test_instances_header(test_suite_name, Monoids, Binops, Semirings, Dat def write_cuda_test(source_dir, test_suite_name, semiring, kernel): import shutil - shutil.copy(f"{source_dir}/test/cuda_tests_template.cpp", f"{test_suite_name}_{semiring}_{kernel}_cuda_tests.cu") + shutil.copy(f"{source_dir}/test/cuda_tests_template.cpp", f"{test_suite_name}_{semiring}_cuda_tests.cpp") - with open(f"{test_suite_name}_{semiring}_{kernel}_cuda_tests.cu", "a") as file_object: + with open(f"{test_suite_name}_{semiring}_cuda_tests.cpp", "a") as file_object: # Keeping this as a separate file for now to allow for further nesting # of test instances for each test_suite_name - file_object.write(f"\n#include \"{test_suite_name}_{semiring}_{kernel}_test_instances.hpp\"") + file_object.write(f"\n#include \"{test_suite_name}_{semiring}_test_instances.hpp\"") if __name__ == "__main__": import sys @@ -113,6 +116,6 @@ def write_cuda_test(source_dir, test_suite_name, semiring, kernel): """ source_dir, test_suite_name, Monoids, Binops, Semirings, DataTypes, DataShapes, Kernels = load_types(sys.argv) - write_test_instances_header(test_suite_name, Monoids, Binops, Semirings, DataTypes, DataShapes, Kernels) + write_test_instances_header(test_suite_name, Monoids, Binops, Semirings, DataTypes, DataShapes, DOT3_BUCKETS) write_cuda_test(source_dir, test_suite_name, Semirings, Kernels) diff --git a/GraphBLAS/CUDA/type_convert.hpp b/GraphBLAS/CUDA/type_convert.hpp deleted file mode 100644 index 880cd1acba..0000000000 --- a/GraphBLAS/CUDA/type_convert.hpp +++ /dev/null @@ -1,77 +0,0 @@ -/* - * Copyright (c) 2019,2020 NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef GB_CONV_TYPE_H -#define GB_CONV_TYPE_H -extern "C" { -#include "GB.h" -}; -#include - -/**---------------------------------------------------------------------------* - * @file type_convert.hpp - * @brief Defines the mapping between concrete C++ types and Grb types. - *---------------------------------------------------------------------------**/ -namespace cuda { - -template -GrB_Type to_grb_type(); - -template<> GrB_Type to_grb_type() { return GrB_INT8; } -template<> GrB_Type to_grb_type() { return GrB_INT16; } -template<> GrB_Type to_grb_type() { return GrB_INT32; } -template<> GrB_Type to_grb_type() { return GrB_INT64; } -template<> GrB_Type to_grb_type() { return GrB_UINT8; } -template<> GrB_Type to_grb_type() { return GrB_UINT16; } -template<> GrB_Type to_grb_type() { return GrB_UINT32; } -template<> GrB_Type to_grb_type() { return GrB_UINT64; } -template<> GrB_Type to_grb_type() { return GrB_FP32; } -template<> GrB_Type to_grb_type() { return GrB_FP64; } -template<> GrB_Type to_grb_type() { return GrB_BOOL; } - -template -void set_element(GrB_Matrix A, T x, int64_t i, int64_t j); - -template<> void set_element(GrB_Matrix A, int8_t x, int64_t i, int64_t j) { GrB_Matrix_setElement_INT8(A, x, i, j); } -template<> void set_element(GrB_Matrix A, int16_t x, int64_t i, int64_t j) { GrB_Matrix_setElement_INT16(A, x, i, j); } -template<> void set_element(GrB_Matrix A, int32_t x, int64_t i, int64_t j) { GrB_Matrix_setElement_INT32(A, x, i, j); } -template<> void set_element(GrB_Matrix A, int64_t x, int64_t i, int64_t j) { GrB_Matrix_setElement_INT64(A, x, i, j); } -template<> void set_element(GrB_Matrix A, uint8_t x, int64_t i, int64_t j) { GrB_Matrix_setElement_UINT8(A, x, i, j); } -template<> void set_element(GrB_Matrix A, uint16_t x, int64_t i, int64_t j) { GrB_Matrix_setElement_UINT16(A, x, i, j); } -template<> void set_element(GrB_Matrix A, uint32_t x, int64_t i, int64_t j) { GrB_Matrix_setElement_UINT32(A, x, i, j); } -template<> void set_element(GrB_Matrix A, uint64_t x, int64_t i, int64_t j) { GrB_Matrix_setElement_UINT64(A, x, i, j); } -template<> void set_element(GrB_Matrix A, float x, int64_t i, int64_t j) { GrB_Matrix_setElement_FP32(A, x, i, j); } -template<> void set_element(GrB_Matrix A, double x, int64_t i, int64_t j) { GrB_Matrix_setElement_FP64(A, x, i, j); } -template<> void set_element(GrB_Matrix A, bool x, int64_t i, int64_t j) { GrB_Matrix_setElement_BOOL(A, x, i, j); } - -template -GrB_Info get_element(GrB_Matrix A, T* x, int64_t i, int64_t j); - -template<> GrB_Info get_element(GrB_Matrix A, int8_t *x, int64_t i, int64_t j) { return GrB_Matrix_extractElement_INT8(x, A, i, j); } -template<> GrB_Info get_element(GrB_Matrix A, int16_t *x, int64_t i, int64_t j) { return GrB_Matrix_extractElement_INT16(x, A, i, j); } -template<> GrB_Info get_element(GrB_Matrix A, int32_t *x, int64_t i, int64_t j) { return GrB_Matrix_extractElement_INT32(x, A, i, j); } -template<> GrB_Info get_element(GrB_Matrix A, int64_t *x, int64_t i, int64_t j) { return GrB_Matrix_extractElement_INT64(x, A, i, j); } -template<> GrB_Info get_element(GrB_Matrix A, uint8_t *x, int64_t i, int64_t j) { return GrB_Matrix_extractElement_UINT8(x, A, i, j); } -template<> GrB_Info get_element(GrB_Matrix A, uint16_t *x, int64_t i, int64_t j) { return GrB_Matrix_extractElement_UINT16(x, A, i, j); } -template<> GrB_Info get_element(GrB_Matrix A, uint32_t *x, int64_t i, int64_t j) { return GrB_Matrix_extractElement_UINT32(x, A, i, j); } -template<> GrB_Info get_element(GrB_Matrix A, uint64_t *x, int64_t i, int64_t j) { return GrB_Matrix_extractElement_UINT64(x, A, i, j); } -template<> GrB_Info get_element(GrB_Matrix A, float *x, int64_t i, int64_t j) { return GrB_Matrix_extractElement_FP32(x, A, i, j); } -template<> GrB_Info get_element(GrB_Matrix A, double *x, int64_t i, int64_t j) { return GrB_Matrix_extractElement_FP64(x, A, i, j); } -template<> GrB_Info get_element(GrB_Matrix A, bool *x, int64_t i, int64_t j) { return GrB_Matrix_extractElement_BOOL(x, A, i, j); } - - -} // namespace cuda -#endif diff --git a/GraphBLAS/CUDA/type_name.hpp b/GraphBLAS/CUDA/type_name.hpp deleted file mode 100644 index 691f791768..0000000000 --- a/GraphBLAS/CUDA/type_name.hpp +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2019,2020 NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -#ifndef GB_TYPE_NAME_H -#define GB_TYPE_NAME_H - -#include -#include -#include -#include -#include -#include - -/**---------------------------------------------------------------------------* - * @file type_name.hpp - * @brief Defines the mapping between concrete C++ types and strings. - *---------------------------------------------------------------------------**/ -namespace jit { - -template -class type_name { -public: - static const char *name; -}; - -#define DECLARE_TYPE_NAME(x) template<> inline const char *jit::type_name::name = #x; -#define GET_TYPE_NAME(x) (jit::type_name::name) - -DECLARE_TYPE_NAME(int); -DECLARE_TYPE_NAME(int&); -DECLARE_TYPE_NAME(int*); -DECLARE_TYPE_NAME(int8_t); -DECLARE_TYPE_NAME(int8_t&); -DECLARE_TYPE_NAME(int8_t*); -DECLARE_TYPE_NAME(unsigned char); -DECLARE_TYPE_NAME(unsigned char&); -DECLARE_TYPE_NAME(unsigned char*); -DECLARE_TYPE_NAME(unsigned int); -DECLARE_TYPE_NAME(unsigned int&); -DECLARE_TYPE_NAME(unsigned int*); -DECLARE_TYPE_NAME(unsigned int64_t); -DECLARE_TYPE_NAME(unsigned int64_t&); -DECLARE_TYPE_NAME(unsigned int64_t*); -DECLARE_TYPE_NAME(long); -DECLARE_TYPE_NAME(long&); -DECLARE_TYPE_NAME(long*); -DECLARE_TYPE_NAME(float); -DECLARE_TYPE_NAME(float&); -DECLARE_TYPE_NAME(float*); -DECLARE_TYPE_NAME(double); -DECLARE_TYPE_NAME(double&); -DECLARE_TYPE_NAME(double*); -DECLARE_TYPE_NAME(bool); - - - -} // namespace jit -#endif diff --git a/GraphBLAS/Config/GraphBLAS.h.in b/GraphBLAS/Config/GraphBLAS.h.in index b64b068df4..25ca36c7ad 100644 --- a/GraphBLAS/Config/GraphBLAS.h.in +++ b/GraphBLAS/Config/GraphBLAS.h.in @@ -285,7 +285,7 @@ typedef uint64_t GrB_Index ; // GxB_INDEX_MAX is historical; use GrB_INDEX_MAX+1 instead. It differs by one // from GrB_INDEX_MAX, since it defined the largest valid matrix or vector -// dimension. +// dimension. #define GxB_INDEX_MAX ((GrB_Index) (1ULL << 60)) //============================================================================== @@ -1148,18 +1148,18 @@ GB_PUBLIC GrB_BinaryOp // the same type. The value z is either 1 for true or 0 for false, but it // is a value with the same type as x and y. - // z = (x == y) z = (x != y) - GxB_ISEQ_BOOL, GxB_ISNE_BOOL, - GxB_ISEQ_INT8, GxB_ISNE_INT8, - GxB_ISEQ_INT16, GxB_ISNE_INT16, - GxB_ISEQ_INT32, GxB_ISNE_INT32, - GxB_ISEQ_INT64, GxB_ISNE_INT64, - GxB_ISEQ_UINT8, GxB_ISNE_UINT8, - GxB_ISEQ_UINT16, GxB_ISNE_UINT16, - GxB_ISEQ_UINT32, GxB_ISNE_UINT32, - GxB_ISEQ_UINT64, GxB_ISNE_UINT64, - GxB_ISEQ_FP32, GxB_ISNE_FP32, - GxB_ISEQ_FP64, GxB_ISNE_FP64, + // z = (x == y) z = (x != y) + GxB_ISEQ_BOOL, GxB_ISNE_BOOL, + GxB_ISEQ_INT8, GxB_ISNE_INT8, + GxB_ISEQ_INT16, GxB_ISNE_INT16, + GxB_ISEQ_INT32, GxB_ISNE_INT32, + GxB_ISEQ_INT64, GxB_ISNE_INT64, + GxB_ISEQ_UINT8, GxB_ISNE_UINT8, + GxB_ISEQ_UINT16, GxB_ISNE_UINT16, + GxB_ISEQ_UINT32, GxB_ISNE_UINT32, + GxB_ISEQ_UINT64, GxB_ISNE_UINT64, + GxB_ISEQ_FP32, GxB_ISNE_FP32, + GxB_ISEQ_FP64, GxB_ISNE_FP64, // complex: GxB_ISEQ_FC32, GxB_ISNE_FC32, GxB_ISEQ_FC64, GxB_ISNE_FC64, @@ -4251,16 +4251,26 @@ GrB_Info GxB_Matrix_split // split a matrix into 2D array of matrices // GxB_Matrix_diag, GxB_Vector_diag, GrB_Matrix_diag //------------------------------------------------------------------------------ -// GxB_Matrix_diag constructs a matrix from a vector. Let n be the length of -// the v vector, from GrB_Vector_size (&n, v). If k = 0, then C is an n-by-n -// diagonal matrix with the entries from v along the main diagonal of C, with -// C(i,i) = v(i). If k is nonzero, C is square with dimension n+abs(k). If k -// is positive, it denotes diagonals above the main diagonal, with C(i,i+k) = -// v(i). If k is negative, it denotes diagonals below the main diagonal of C, -// with C(i-k,i) = v(i). +// GrB_Matrix_diag constructs a new matrix from a vector. Let n be the length +// of the v vector, from GrB_Vector_size (&n, v). If k = 0, then C is an +// n-by-n diagonal matrix with the entries from v along the main diagonal of C, +// with C(i,i) = v(i). If k is nonzero, C is square with dimension n+abs(k). +// If k is positive, it denotes diagonals above the main diagonal, with +// C(i,i+k) = v(i). If k is negative, it denotes diagonals below the main +// diagonal of C, with C(i-k,i) = v(i). C is constructed with the same type +// as v. -// C must already exist on input, of the correct size. Any existing entries in -// C are discarded. The type of C is preserved, so that if the type of C and v +GB_PUBLIC +GrB_Info GrB_Matrix_diag // build a diagonal matrix from a vector +( + GrB_Matrix *C, // output matrix + const GrB_Vector v, // input vector + int64_t k +) ; + +// GrB_Matrix_diag is like GxB_Matrix_diag (&C, v, k, NULL), except that C must +// already exist on input, of the correct size. Any existing entries in C are +// discarded. The type of C is preserved, so that if the type of C and v // differ, the entries are typecasted into the type of C. Any settings made to // C by GxB_Matrix_Option_set (format by row or by column, bitmap switch, hyper // switch, and sparsity control) are unchanged. @@ -4274,22 +4284,11 @@ GrB_Info GxB_Matrix_diag // construct a diagonal matrix from a vector const GrB_Descriptor desc // to specify # of threads ) ; -// GrB_Matrix_diag is identical to GxB_Matrix_diag (C, v, k, NULL), -// using the default # of threads from the global setting. - -GB_PUBLIC -GrB_Info GrB_Matrix_diag // construct a diagonal matrix from a vector -( - GrB_Matrix C, // output matrix - const GrB_Vector v, // input vector - int64_t k -) ; - // GxB_Vector_diag extracts a vector v from an input matrix A, which may be // rectangular. If k = 0, the main diagonal of A is extracted; k > 0 denotes // diagonals above the main diagonal of A, and k < 0 denotes diagonals below // the main diagonal of A. Let A have dimension m-by-n. If k is in the range -// 0 to n-1, then v has length min(m,n-k). If k is negative and in the range +// 0 to n-1, then v has length min(m,n-k). If k is negative and in the range // -1 to -m+1, then v has length min(m+k,n). If k is outside these ranges, // v has length 0 (this is not an error). @@ -5116,8 +5115,9 @@ GrB_Info GrB_Matrix_eWiseAdd_BinaryOp // C = accum (C, A+B) // GxB_eWiseUnion: a variant of GrB_eWiseAdd //============================================================================== -// GxB_eWiseUnion is a variant of eWiseAdd. They differ when an entry is -// present in A but not B, or in B but not A. +// GxB_eWiseUnion is a variant of eWiseAdd. The methods create a result with +// the same sparsity structure. They differ when an entry is present in A but +// not B, or in B but not A. // eWiseAdd does the following, for a matrix, where "+" is the add binary op: @@ -5128,7 +5128,7 @@ GrB_Info GrB_Matrix_eWiseAdd_BinaryOp // C = accum (C, A+B) // else if B(i,j) is present but not A(i,j) // C(i,j) = B(i,j) -// by constrast, eWiseUnion always applies the operator: +// by contrast, eWiseUnion always applies the operator: // if A(i,j) and B(i,j) are both present: // C(i,j) = A(i,j) + B(i,j) @@ -9268,8 +9268,8 @@ GB_PUBLIC GrB_Semiring // 64 bitwise semirings //------------------------------------------------------------------------------ - // monoids: (BOR, BAND, BXOR, BXNOR) x - // mult: (BOR, BAND, BXOR, BXNOR) x + // monoids: (BOR, BAND, BXOR, BXNOR) x + // mult: (BOR, BAND, BXOR, BXNOR) x // types: (UINT8, UINT16, UINT32, UINT64) GxB_BOR_BOR_UINT8 , GxB_BOR_BOR_UINT16 , GxB_BOR_BOR_UINT32 , GxB_BOR_BOR_UINT64 , @@ -9364,7 +9364,7 @@ GB_PUBLIC GrB_Semiring // MIN_PLUS, MIN_TIMES, MIN_FIRST, MIN_SECOND, MIN_MAX, // MAX_PLUS, MAX_TIMES, MAX_FIRST, MAX_SECOND, MAX_MIN -// and 4 semirings for boolean only: +// and 4 semirings for boolean only: // LOR_LAND, LAND_LOR, LXOR_LAND, LXNOR_LOR. @@ -9386,8 +9386,8 @@ GB_PUBLIC GrB_Semiring GrB_PLUS_TIMES_SEMIRING_UINT16, // GxB_PLUS_TIMES_UINT16 GrB_PLUS_TIMES_SEMIRING_UINT32, // GxB_PLUS_TIMES_UINT32 GrB_PLUS_TIMES_SEMIRING_UINT64, // GxB_PLUS_TIMES_UINT64 - GrB_PLUS_TIMES_SEMIRING_FP32, // GxB_PLUS_TIMES_FP32 - GrB_PLUS_TIMES_SEMIRING_FP64, // GxB_PLUS_TIMES_FP64 + GrB_PLUS_TIMES_SEMIRING_FP32, // GxB_PLUS_TIMES_FP32 + GrB_PLUS_TIMES_SEMIRING_FP64, // GxB_PLUS_TIMES_FP64 // PLUS_MIN semirings for all 10 real, non-boolean types: GrB_PLUS_MIN_SEMIRING_INT8, // GxB_PLUS_MIN_INT8 @@ -9398,8 +9398,8 @@ GB_PUBLIC GrB_Semiring GrB_PLUS_MIN_SEMIRING_UINT16, // GxB_PLUS_MIN_UINT16 GrB_PLUS_MIN_SEMIRING_UINT32, // GxB_PLUS_MIN_UINT32 GrB_PLUS_MIN_SEMIRING_UINT64, // GxB_PLUS_MIN_UINT64 - GrB_PLUS_MIN_SEMIRING_FP32, // GxB_PLUS_MIN_FP32 - GrB_PLUS_MIN_SEMIRING_FP64, // GxB_PLUS_MIN_FP64 + GrB_PLUS_MIN_SEMIRING_FP32, // GxB_PLUS_MIN_FP32 + GrB_PLUS_MIN_SEMIRING_FP64, // GxB_PLUS_MIN_FP64 //-------------------------------------------------------------------------- // 50 semirings with MIN monoids @@ -9414,8 +9414,8 @@ GB_PUBLIC GrB_Semiring GrB_MIN_PLUS_SEMIRING_UINT16, // GxB_MIN_PLUS_UINT16 GrB_MIN_PLUS_SEMIRING_UINT32, // GxB_MIN_PLUS_UINT32 GrB_MIN_PLUS_SEMIRING_UINT64, // GxB_MIN_PLUS_UINT64 - GrB_MIN_PLUS_SEMIRING_FP32, // GxB_MIN_PLUS_FP32 - GrB_MIN_PLUS_SEMIRING_FP64, // GxB_MIN_PLUS_FP64 + GrB_MIN_PLUS_SEMIRING_FP32, // GxB_MIN_PLUS_FP32 + GrB_MIN_PLUS_SEMIRING_FP64, // GxB_MIN_PLUS_FP64 // MIN_TIMES semirings for all 10 real, non-boolean types: GrB_MIN_TIMES_SEMIRING_INT8, // GxB_MIN_TIMES_INT8 @@ -9426,8 +9426,8 @@ GB_PUBLIC GrB_Semiring GrB_MIN_TIMES_SEMIRING_UINT16, // GxB_MIN_TIMES_UINT16 GrB_MIN_TIMES_SEMIRING_UINT32, // GxB_MIN_TIMES_UINT32 GrB_MIN_TIMES_SEMIRING_UINT64, // GxB_MIN_TIMES_UINT64 - GrB_MIN_TIMES_SEMIRING_FP32, // GxB_MIN_TIMES_FP32 - GrB_MIN_TIMES_SEMIRING_FP64, // GxB_MIN_TIMES_FP64 + GrB_MIN_TIMES_SEMIRING_FP32, // GxB_MIN_TIMES_FP32 + GrB_MIN_TIMES_SEMIRING_FP64, // GxB_MIN_TIMES_FP64 // MIN_FIRST semirings for all 10 real, non-boolean types: GrB_MIN_FIRST_SEMIRING_INT8, // GxB_MIN_FIRST_INT8 @@ -9438,8 +9438,8 @@ GB_PUBLIC GrB_Semiring GrB_MIN_FIRST_SEMIRING_UINT16, // GxB_MIN_FIRST_UINT16 GrB_MIN_FIRST_SEMIRING_UINT32, // GxB_MIN_FIRST_UINT32 GrB_MIN_FIRST_SEMIRING_UINT64, // GxB_MIN_FIRST_UINT64 - GrB_MIN_FIRST_SEMIRING_FP32, // GxB_MIN_FIRST_FP32 - GrB_MIN_FIRST_SEMIRING_FP64, // GxB_MIN_FIRST_FP64 + GrB_MIN_FIRST_SEMIRING_FP32, // GxB_MIN_FIRST_FP32 + GrB_MIN_FIRST_SEMIRING_FP64, // GxB_MIN_FIRST_FP64 // MIN_SECOND semirings for all 10 real, non-boolean types: GrB_MIN_SECOND_SEMIRING_INT8, // GxB_MIN_SECOND_INT8 @@ -9450,8 +9450,8 @@ GB_PUBLIC GrB_Semiring GrB_MIN_SECOND_SEMIRING_UINT16, // GxB_MIN_SECOND_UINT16 GrB_MIN_SECOND_SEMIRING_UINT32, // GxB_MIN_SECOND_UINT32 GrB_MIN_SECOND_SEMIRING_UINT64, // GxB_MIN_SECOND_UINT64 - GrB_MIN_SECOND_SEMIRING_FP32, // GxB_MIN_SECOND_FP32 - GrB_MIN_SECOND_SEMIRING_FP64, // GxB_MIN_SECOND_FP64 + GrB_MIN_SECOND_SEMIRING_FP32, // GxB_MIN_SECOND_FP32 + GrB_MIN_SECOND_SEMIRING_FP64, // GxB_MIN_SECOND_FP64 // MIN_MAX semirings for all 10 real, non-boolean types: GrB_MIN_MAX_SEMIRING_INT8, // GxB_MIN_MAX_INT8 @@ -9462,8 +9462,8 @@ GB_PUBLIC GrB_Semiring GrB_MIN_MAX_SEMIRING_UINT16, // GxB_MIN_MAX_UINT16 GrB_MIN_MAX_SEMIRING_UINT32, // GxB_MIN_MAX_UINT32 GrB_MIN_MAX_SEMIRING_UINT64, // GxB_MIN_MAX_UINT64 - GrB_MIN_MAX_SEMIRING_FP32, // GxB_MIN_MAX_FP32 - GrB_MIN_MAX_SEMIRING_FP64, // GxB_MIN_MAX_FP64 + GrB_MIN_MAX_SEMIRING_FP32, // GxB_MIN_MAX_FP32 + GrB_MIN_MAX_SEMIRING_FP64, // GxB_MIN_MAX_FP64 //-------------------------------------------------------------------------- // 50 semirings with MAX monoids @@ -9478,8 +9478,8 @@ GB_PUBLIC GrB_Semiring GrB_MAX_PLUS_SEMIRING_UINT16, // GxB_MAX_PLUS_UINT16 GrB_MAX_PLUS_SEMIRING_UINT32, // GxB_MAX_PLUS_UINT32 GrB_MAX_PLUS_SEMIRING_UINT64, // GxB_MAX_PLUS_UINT64 - GrB_MAX_PLUS_SEMIRING_FP32, // GxB_MAX_PLUS_FP32 - GrB_MAX_PLUS_SEMIRING_FP64, // GxB_MAX_PLUS_FP64 + GrB_MAX_PLUS_SEMIRING_FP32, // GxB_MAX_PLUS_FP32 + GrB_MAX_PLUS_SEMIRING_FP64, // GxB_MAX_PLUS_FP64 // MAX_TIMES semirings for all 10 real, non-boolean types: GrB_MAX_TIMES_SEMIRING_INT8, // GxB_MAX_TIMES_INT8 @@ -9490,8 +9490,8 @@ GB_PUBLIC GrB_Semiring GrB_MAX_TIMES_SEMIRING_UINT16, // GxB_MAX_TIMES_UINT16 GrB_MAX_TIMES_SEMIRING_UINT32, // GxB_MAX_TIMES_UINT32 GrB_MAX_TIMES_SEMIRING_UINT64, // GxB_MAX_TIMES_UINT64 - GrB_MAX_TIMES_SEMIRING_FP32, // GxB_MAX_TIMES_FP32 - GrB_MAX_TIMES_SEMIRING_FP64, // GxB_MAX_TIMES_FP64 + GrB_MAX_TIMES_SEMIRING_FP32, // GxB_MAX_TIMES_FP32 + GrB_MAX_TIMES_SEMIRING_FP64, // GxB_MAX_TIMES_FP64 // MAX_FIRST semirings for all 10 real, non-boolean types: GrB_MAX_FIRST_SEMIRING_INT8, // GxB_MAX_FIRST_INT8 @@ -9502,8 +9502,8 @@ GB_PUBLIC GrB_Semiring GrB_MAX_FIRST_SEMIRING_UINT16, // GxB_MAX_FIRST_UINT16 GrB_MAX_FIRST_SEMIRING_UINT32, // GxB_MAX_FIRST_UINT32 GrB_MAX_FIRST_SEMIRING_UINT64, // GxB_MAX_FIRST_UINT64 - GrB_MAX_FIRST_SEMIRING_FP32, // GxB_MAX_FIRST_FP32 - GrB_MAX_FIRST_SEMIRING_FP64, // GxB_MAX_FIRST_FP64 + GrB_MAX_FIRST_SEMIRING_FP32, // GxB_MAX_FIRST_FP32 + GrB_MAX_FIRST_SEMIRING_FP64, // GxB_MAX_FIRST_FP64 // MAX_SECOND semirings for all 10 real, non-boolean types: GrB_MAX_SECOND_SEMIRING_INT8, // GxB_MAX_SECOND_INT8 @@ -9514,8 +9514,8 @@ GB_PUBLIC GrB_Semiring GrB_MAX_SECOND_SEMIRING_UINT16, // GxB_MAX_SECOND_UINT16 GrB_MAX_SECOND_SEMIRING_UINT32, // GxB_MAX_SECOND_UINT32 GrB_MAX_SECOND_SEMIRING_UINT64, // GxB_MAX_SECOND_UINT64 - GrB_MAX_SECOND_SEMIRING_FP32, // GxB_MAX_SECOND_FP32 - GrB_MAX_SECOND_SEMIRING_FP64, // GxB_MAX_SECOND_FP64 + GrB_MAX_SECOND_SEMIRING_FP32, // GxB_MAX_SECOND_FP32 + GrB_MAX_SECOND_SEMIRING_FP64, // GxB_MAX_SECOND_FP64 // MAX_MIN semirings for all 10 real, non-boolean types: GrB_MAX_MIN_SEMIRING_INT8, // GxB_MAX_MIN_INT8 @@ -9526,8 +9526,8 @@ GB_PUBLIC GrB_Semiring GrB_MAX_MIN_SEMIRING_UINT16, // GxB_MAX_MIN_UINT16 GrB_MAX_MIN_SEMIRING_UINT32, // GxB_MAX_MIN_UINT32 GrB_MAX_MIN_SEMIRING_UINT64, // GxB_MAX_MIN_UINT64 - GrB_MAX_MIN_SEMIRING_FP32, // GxB_MAX_MIN_FP32 - GrB_MAX_MIN_SEMIRING_FP64, // GxB_MAX_MIN_FP64 + GrB_MAX_MIN_SEMIRING_FP32, // GxB_MAX_MIN_FP32 + GrB_MAX_MIN_SEMIRING_FP64, // GxB_MAX_MIN_FP64 //-------------------------------------------------------------------------- // 4 boolean semirings: @@ -11292,7 +11292,8 @@ GrB_Info GrB_Matrix_exportHint // suggest the best export format FILE *f = fopen ("myblob", "r") ; fread (&blob_size, sizeof (size_t), 1, f) ; blob = malloc (blob_size) ; - fread (&blob, sizeof (uint8_t), 1, f) ; + fread (blob, sizeof (uint8_t), blob_size, f) ; + fclose (f) ; char type_name [GxB_MAX_NAME_LEN] ; GxB_deserialize_type_name (type_name, blob, blob_size) ; printf ("blob type is: %s\n", type_name) ; @@ -11331,7 +11332,8 @@ GrB_Info GrB_Matrix_exportHint // suggest the best export format FILE *f = fopen ("myblob", "r") ; fread (&blob_size, sizeof (size_t), 1, f) ; blob = malloc (blob_size) ; - fread (&blob, sizeof (uint8_t), 1, f) ; + fread (blob, sizeof (uint8_t), blob_size, f) ; + fclose (f) ; // the user must know the type of A is MyQType GrB_Matrix_deserialize (&A, MyQtype, blob, blob_size) ; free (blob) ; @@ -11634,7 +11636,7 @@ struct GB_Iterator_opaque // only changes when the iterator is created: size_t header_size ; // size of this iterator object - // these components only change when the iterator is attached: + // these components only change when the iterator is attached: int64_t pmax ; // avlen*avdim for bitmap; nvals(A) otherwise int64_t avlen ; // length of each vector in the matrix int64_t avdim ; // number of vectors in the matrix dimension @@ -11895,7 +11897,7 @@ GrB_Index GxB_rowIterator_kount (GxB_Iterator iterator) ; // For SuiteSparse:GraphBLAS: If the matrix is hypersparse, and the row // does not appear in the hyperlist, then the iterator is moved to the first -// row after the given row that does appear in the hyperlist. +// row after the given row that does appear in the hyperlist. // The method is always successful; the following are conditions are returned: // GxB_EXHAUSTED: if the row index is >= nrows(A); the row iterator is @@ -12223,7 +12225,7 @@ GrB_Info GxB_Matrix_Iterator_next (GxB_Iterator iterator) ; // GxB_Matrix_Iterator_next. Results are undefined if these conditions are not // met. -GB_PUBLIC +GB_PUBLIC GrB_Index GxB_Matrix_Iterator_getp (GxB_Iterator iterator) ; //------------------------------------------------------------------------------ @@ -12236,7 +12238,7 @@ GrB_Index GxB_Matrix_Iterator_getp (GxB_Iterator iterator) ; // GxB_Matrix_Iterator_next, with a return value of GrB_SUCCESS. Results are // undefined if these conditions are not met. -GB_PUBLIC +GB_PUBLIC void GxB_Matrix_Iterator_getIndex ( GxB_Iterator iterator, @@ -12341,7 +12343,8 @@ GrB_Index GxB_Vector_Iterator_getpmax (GxB_Iterator iterator) ; // vector, or GxB_EXHAUSTED if the iterator is exhausted. GB_PUBLIC -GrB_Info GB_Vector_Iterator_bitmap_seek (GxB_Iterator iterator, GrB_Index p) ; +GrB_Info GB_Vector_Iterator_bitmap_seek (GxB_Iterator iterator, + GrB_Index unused) ; // unused parameter to be removed in v8.x GB_PUBLIC GrB_Info GxB_Vector_Iterator_seek (GxB_Iterator iterator, GrB_Index p) ; @@ -12360,7 +12363,7 @@ GrB_Info GxB_Vector_Iterator_seek (GxB_Iterator iterator, GrB_Index p) ; iterator->p = q, \ (iterator->A_sparsity == GxB_BITMAP) ? \ ( \ - GB_Vector_Iterator_bitmap_seek (iterator, q) \ + GB_Vector_Iterator_bitmap_seek (iterator, 0) \ ) \ : \ ( \ @@ -12401,7 +12404,16 @@ GrB_Info GxB_Vector_Iterator_next (GxB_Iterator iterator) ; ) \ : \ ( \ - GrB_SUCCESS \ + (iterator->A_sparsity == GxB_BITMAP) ? \ + ( \ + /* bitmap: seek to the next entry present in the bitmap */ \ + GB_Vector_Iterator_bitmap_seek (iterator, 0) \ + ) \ + : \ + ( \ + /* other formats: already at the next entry */ \ + GrB_SUCCESS \ + ) \ ) \ ) @@ -12515,5 +12527,47 @@ GB_PUBLIC void GxB_Iterator_get_UDT (GxB_Iterator iterator, (iterator)->type_size) \ ) +//------------------------------------------------------------------------------ +// Rapids Memory Manager wrappers for SuiteSparse:GraphBLAS +//------------------------------------------------------------------------------ + +#ifndef RMM_WRAP_H +#define RMM_WRAP_H + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +// TODO describe the modes +typedef enum { rmm_wrap_host=0, rmm_wrap_host_pinned=1, rmm_wrap_device=2, rmm_wrap_managed=3 } RMM_MODE ; + +void rmm_wrap_finalize (void) ; +int rmm_wrap_initialize (RMM_MODE mode, size_t init_pool_size, size_t max_pool_size) ; + +// example usage: + // rmm_wrap_initialize (rmm_wrap_managed, INT32_MAX, INT64_MAX) ; + // GxB_init (GrB_NONBLOCKING, rmm_wrap_malloc, rmm_wrap_calloc, rmm_wrap_realloc, rmm_wrap_free) ; + // use GraphBLAS ... + // GrB_finalize ( ) ; + // rmm_wrap_finalize ( ) ; + +// The two PMR-based allocate/deallocate signatures (C-style): +void *rmm_wrap_allocate (size_t *size) ; +void rmm_wrap_deallocate (void *p, size_t size) ; + +// The four malloc/calloc/realloc/free signatures: +void *rmm_wrap_malloc (size_t size) ; +void *rmm_wrap_calloc (size_t n, size_t size) ; +void *rmm_wrap_realloc (void *p, size_t newsize) ; +void rmm_wrap_free (void *p) ; + +#ifdef __cplusplus +} +#endif +#endif + #endif diff --git a/GraphBLAS/Config/README.md.in b/GraphBLAS/Config/README.md.in index f0c1297082..4885666319 100644 --- a/GraphBLAS/Config/README.md.in +++ b/GraphBLAS/Config/README.md.in @@ -55,9 +55,9 @@ To remove all compiled files: make clean -To compile the demos: +To compile and run the demos: - make all + make run See the GraphBLAS/ subfolder for the Octave/MATLAB interface, which contains a README.md file with further details. diff --git a/GraphBLAS/Demo/Program/wathen_demo.c b/GraphBLAS/Demo/Program/wathen_demo.c new file mode 100644 index 0000000000..a40dd7f540 --- /dev/null +++ b/GraphBLAS/Demo/Program/wathen_demo.c @@ -0,0 +1,241 @@ +//------------------------------------------------------------------------------ +// GraphBLAS/Demo/Program/wathen_demo.c: test wathen +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2022, All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +//------------------------------------------------------------------------------ + +// Construct a matrix using the Demo/Source/wathen.c method. +// +// wathen_demo nx ny method nthreads + +// macro used by OK(...) to free workspace if an error occurs +#define FREE_ALL \ + GrB_Matrix_free (&A) ; \ + +#include "graphblas_demos.h" +#ifdef _OPENMP +#include "omp.h" +#endif + +int main (int argc, char **argv) +{ + GrB_Matrix A = NULL ; + GrB_Info info ; + OK (GrB_init (GrB_NONBLOCKING)) ; + + //-------------------------------------------------------------------------- + // get inputs + //-------------------------------------------------------------------------- + + int64_t nx = 10, ny = 10 ; + int method = 0 ; + int nthreads ; + if (argc > 1) nx = strtol (argv [1], NULL, 0) ; + if (argc > 2) ny = strtol (argv [2], NULL, 0) ; + if (argc > 3) method = strtol (argv [3], NULL, 0) ; + if (argc > 4) + { + nthreads = strtol (argv [4], NULL, 0) ; + OK (GxB_Global_Option_set (GxB_GLOBAL_NTHREADS, nthreads)) ; + } + OK (GxB_Global_Option_get (GxB_GLOBAL_NTHREADS, &nthreads)) ; + + fprintf (stderr, "Wathen: nx %ld ny %ld method: %d nthreads: %d ", + nx, ny, method, nthreads) ; + + //-------------------------------------------------------------------------- + // create a Wathen matrix + //-------------------------------------------------------------------------- + + #ifdef _OPENMP + double t = omp_get_wtime ( ) ; + #endif + OK (wathen (&A, nx, ny, false, method, NULL)) ; + #ifdef _OPENMP + t = omp_get_wtime ( ) - t ; + fprintf (stderr, "time: %g", t) ; + #endif + fprintf (stderr, "\n") ; + + OK (GxB_print (A, GxB_SUMMARY)) ; + + FREE_ALL ; + +#if 0 + + //-------------------------------------------------------------------------- + // benchmark Wathen matrices (for ACM TOMS submission) + //-------------------------------------------------------------------------- + + GrB_Descriptor Desc_Gustavson = NULL, Desc_Hash = NULL ; + GrB_Descriptor_new (&Desc_Gustavson) ; + GrB_Descriptor_new (&Desc_Hash) ; + GxB_Desc_set (Desc_Gustavson, GxB_AxB_METHOD, GxB_AxB_GUSTAVSON) ; + GxB_Desc_set (Desc_Hash , GxB_AxB_METHOD, GxB_AxB_HASH ) ; + + for (nx = 100 ; nx <= 2200 ; nx += 100) + { + printf ("\n ------------------- nx %ld\n", nx) ; + GxB_set (GxB_NTHREADS, 40) ; + + // create the wathen matrix + t = omp_get_wtime ( ) ; + OK (wathen (&A, nx, nx, false, 0, NULL)) ; + t = omp_get_wtime ( ) - t ; + GrB_Index nvals1, nvals2, n ; + OK (GrB_Matrix_nrows (&n, A)) ; + OK (GrB_Matrix_nvals (&nvals1, A)) ; + double t2_sequential, t4_sequential, t8_sequential ; + + printf ("\n=================================\n" + "nx %5ld n %10.3fM nvals %10.3fM create: %10.3f sec\n", + nx, (double) n / 1e6, (double) nvals1 / 1e6, t) ; + + double T2 [3][7], T4 [3][7], T8 [3][7] ; + double T2best [7], T4best [7], T8best [7] ; + + for (int algo = 0 ; algo <= 2 ; algo++) + // for (int algo = 0 ; algo <= 1 ; algo++) + { + GrB_Descriptor desc = NULL ; + if (algo == 1) desc = Desc_Gustavson ; + if (algo == 2) desc = Desc_Hash ; + + int Nthreads [7] = {1, 2, 4, 8, 16, 20, 40} ; + for (int k = 0 ; k < 7 ; k++) + { + // set the # of threads to use + int nth = Nthreads [k] ; + GxB_set (GxB_NTHREADS, nth) ; + + GrB_Matrix C = NULL ; + OK (GrB_Matrix_new (&C, GrB_FP64, n, n)) ; + char *algo_name = (algo == 0) ? "Auto" : + ((algo == 1) ? "Gustavson" : "Hash") ; + printf ("\nalgo: %s nthreads: %d\n", algo_name, nth) ; + // if (nth == 2 || nth == 40) + GxB_set (GxB_BURBLE, true) ; + + // square it: C = A*A + double t2 = omp_get_wtime ( ) ; + OK (GrB_mxm (C, NULL, NULL, GrB_PLUS_TIMES_SEMIRING_FP64, + A, A, desc)) ; + t2 = omp_get_wtime ( ) - t2 ; + GxB_set (GxB_BURBLE, false) ; + OK (GrB_Matrix_nvals (&nvals2, C)) ; + GxB_set (GxB_BURBLE, true) ; + printf ("C=A^2 threads: %2d mxm: %10.3f nvals %10.3fM ", + nth, t2, ((double) nvals2) / 1e6) ; + if (nth == 1) t2_sequential = t2 ; + printf ("speedup: %g\n", t2_sequential/t2) ; + T2 [algo][k] = t2 ; + + // square it again: C = C*C to get A^4 + // if (nx <= 4000) + { + double t4 = omp_get_wtime ( ) ; + OK (GrB_mxm (C, NULL, NULL, GrB_PLUS_TIMES_SEMIRING_FP64, + C, C, desc)) ; + t4 = omp_get_wtime ( ) - t4 ; + GxB_set (GxB_BURBLE, false) ; + OK (GrB_Matrix_nvals (&nvals2, C)) ; + GxB_set (GxB_BURBLE, true) ; + printf ("C=A^4 threads: %2d mxm: %10.3f nvals %10.3fM ", + nth, t4, ((double) nvals2) / 1e6) ; + if (nth == 1) t4_sequential = t4 ; + printf ("speedup: %g\n", t4_sequential/t4) ; + T4 [algo][k] = t4 ; + } + + // square it again: C = C*C to get A^8 + // if (nx <= 1000) + { + double t8 = omp_get_wtime ( ) ; + OK (GrB_mxm (C, NULL, NULL, GrB_PLUS_TIMES_SEMIRING_FP64, + C, C, desc)) ; + t8 = omp_get_wtime ( ) - t8 ; + GxB_set (GxB_BURBLE, false) ; + OK (GrB_Matrix_nvals (&nvals2, C)) ; + GxB_set (GxB_BURBLE, true) ; + printf ("C=A^8 threads: %2d mxm: %10.3f nvals %10.3fM ", + nth, t8, ((double) nvals2) / 1e6) ; + if (nth == 1) t8_sequential = t8 ; + printf ("speedup: %g\n", t8_sequential/t8) ; + T8 [algo][k] = t8 ; + } + + GxB_set (GxB_BURBLE, false) ; + GrB_Matrix_free (&C) ; + } + } + GrB_Matrix_free (&A) ; + + printf ("\nSummary:\n") ; + for (int algo = 0 ; algo <= 2 ; algo++) + { + char *algo_name = (algo == 0) ? "Auto" : ((algo == 1) ? "Gus " : "Hash") ; + printf ("algo %s : ", algo_name) ; + printf ("| T2: ") ; + for (int k = 0 ; k < 7 ; k++) + { + printf ("%10.2f ", T2 [algo][k]) ; + } + printf ("| T4: ") ; + for (int k = 0 ; k < 7 ; k++) + { + printf ("%10.2f ", T4 [algo][k]) ; + } + printf ("| T8: ") ; + for (int k = 0 ; k < 7 ; k++) + { + printf ("%10.2f ", T8 [algo][k]) ; + } + printf ("\n") ; + } + + for (int k = 0 ; k < 7 ; k++) + { + T2best [k] = fmin (fmin (T2 [0][k], T2 [1][k]), T2 [2][k]) ; + T4best [k] = fmin (fmin (T4 [0][k], T4 [1][k]), T4 [2][k]) ; + T8best [k] = fmin (fmin (T8 [0][k], T8 [1][k]), T8 [2][k]) ; + } + + printf ("\nRelative:\n") ; + for (int algo = 0 ; algo <= 2 ; algo++) + { + char *algo_name = (algo == 0) ? "Auto" : ((algo == 1) ? "Gus " : "Hash") ; + printf ("algo %s : ", algo_name) ; + printf ("| T2: ") ; + for (int k = 0 ; k < 7 ; k++) + { + if (T2 [algo][k] == T2best [k]) printf (" 1 ") ; + else printf ("%10.2f ", T2 [algo][k] / T2best [k]) ; + } + printf ("| T4: ") ; + for (int k = 0 ; k < 7 ; k++) + { + if (T4 [algo][k] == T4best [k]) printf (" 1 ") ; + else printf ("%10.2f ", T4 [algo][k] / T4best [k]) ; + } + printf ("| T8: ") ; + for (int k = 0 ; k < 7 ; k++) + { + if (T8 [algo][k] == T8best [k]) printf (" 1 ") ; + else printf ("%10.2f ", T8 [algo][k] / T8best [k]) ; + } + printf ("\n") ; + } + + + } + + GrB_free (&Desc_Gustavson) ; + GrB_free (&Desc_Hash) ; +#endif + + OK (GrB_finalize ( )) ; +} + diff --git a/GraphBLAS/Demo/Source/wathen.c b/GraphBLAS/Demo/Source/wathen.c index af256da842..90abd1ded2 100644 --- a/GraphBLAS/Demo/Source/wathen.c +++ b/GraphBLAS/Demo/Source/wathen.c @@ -143,12 +143,12 @@ GrB_Info wathen // construct a random Wathen matrix case 0: { - // This method is fastest, but only 20% faster than methods 2 and - // 3. It is about 15% to 20% faster than the wathen.m function, - // and uses the identical algorithm. The code here is nearly - // identical to the wathen.m M-file, except that here an adjustment - // to the indices must be made since GraphBLAS matrices are indexed - // starting at row and column 0, not 1. + // This method is fastest of the 4 methods here. The code here is + // nearly identical to the wathen.m M-file, except that here an + // adjustment to the indices must be made since GraphBLAS matrices + // are indexed starting at row and column 0, not 1. It requires + // more code on the part of the user application, however, as + // compared to methods 1, 2, and 3. // allocate the tuples int64_t ntriplets = nx*ny*64 ; @@ -202,27 +202,8 @@ GrB_Info wathen // construct a random Wathen matrix case 1: { - - // This method takes about 1.8x the time as other three methods, - // for both small and large problems. The difference in - // performance is likely because GrB_Matrix_assign_FP64 is - // expecting to write its double scalar to a submatrix of A, not a - // single scalar. It has some extra overhead as a result, which is - // not needed. GrB_Matrix_setElement cannot be used because that - // method does not allow for an accumulator function to be - // specified; its implicit accum operator is SECOND, not PLUS. - // Future versions of SuiteSparse:GraphBLAS may correct this - // performance discrepancy, so that this method is just as fast as - // the other three methods here. - - // This method is the same as the older version of wathen.m, before - // it was updated to use the sparse function. That older wathen.m - // function was asymptotically slower, and 300x slower in practice - // for moderate sized problems. The performance difference - // increases greatly as the problem gets larger, as well. By - // contrast, this method is asympotically just as fast as the other - // methods here, it's just a constant times slower (by a typical - // factor of just under 2). + // This method is the simplest, and only takes about 2x the time as + // method 0. It would be impossibly slow in the equivalent MATLAB. for (int j = 1 ; j <= ny ; j++) { @@ -259,11 +240,8 @@ GrB_Info wathen // construct a random Wathen matrix case 2: { - // This method is about 20% slower than method 0, but it has the - // advantage of not requiring the number of tuples to be known in - // advance. Method 3 is just as fast as this method. This method - // is typically about 5% to 10% slower than wathen.m regardless of - // the problem size. + // This method constructs F and then assigns it all at once into A. + // It is about 2x to 3x slower than method 1. // create a single 8-by-8 finite-element matrix F OK (GrB_Matrix_new (&F, GrB_FP64, 8, 8)) ; @@ -306,11 +284,10 @@ GrB_Info wathen // construct a random Wathen matrix case 3: { - // This method is as fast as method 2. It is very flexible since - // any method can be used to construct the finite-element matrix. - // Then A(nn,nn)+=F is very efficient when F is a matrix. This - // method is typically about 5% to 10% slower than wathen.m - // regardless of the problem size. + // This method is as fast as method 2 (that is, 2x to 3x slower + // than method 1). It is very flexible since any method can be + // used to construct the finite-element matrix. Then A(nn,nn)+=F + // is very efficient when F is a matrix. // create a single 8-by-8 finite-element matrix F OK (GrB_Matrix_new (&F, GrB_FP64, 8, 8)) ; @@ -390,8 +367,7 @@ GrB_Info wathen // construct a random Wathen matrix } // force completion - GrB_Index nvals ; - OK (GrB_Matrix_nvals (&nvals, A)) ; + OK (GrB_Matrix_wait (A, GrB_MATERIALIZE)) ; //-------------------------------------------------------------------------- // free workspace and return the result diff --git a/GraphBLAS/Demo/wdemo b/GraphBLAS/Demo/wdemo new file mode 100755 index 0000000000..c505da75e2 --- /dev/null +++ b/GraphBLAS/Demo/wdemo @@ -0,0 +1,38 @@ +#!/bin/sh + +# SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2022, All Rights Reserved. +# SPDX-License-Identifier: Apache-2.0 + +../build/wathen_demo + +../build/wathen_demo 100 100 0 1 +../build/wathen_demo 100 100 0 8 + +../build/wathen_demo 100 100 1 1 +../build/wathen_demo 100 100 1 8 + +../build/wathen_demo 100 100 2 1 +../build/wathen_demo 100 100 2 8 + +../build/wathen_demo 100 100 3 1 +../build/wathen_demo 100 100 3 8 + +../build/wathen_demo 200 200 0 1 +../build/wathen_demo 200 200 0 8 + +../build/wathen_demo 200 200 1 1 +../build/wathen_demo 200 200 1 8 + +../build/wathen_demo 200 200 2 1 +../build/wathen_demo 200 200 2 8 + +../build/wathen_demo 200 200 3 1 +../build/wathen_demo 200 200 3 8 + +../build/wathen_demo 400 400 0 1 +../build/wathen_demo 400 400 0 8 + +../build/wathen_demo 400 400 1 1 +../build/wathen_demo 400 400 1 8 + +exit 0 diff --git a/GraphBLAS/Doc/ChangeLog b/GraphBLAS/Doc/ChangeLog index 21e3aef22b..878af3a116 100644 --- a/GraphBLAS/Doc/ChangeLog +++ b/GraphBLAS/Doc/ChangeLog @@ -1,3 +1,32 @@ +Version 7.0.3, Apr 8, 2022 + + * faster transpose when using 2 threads + +Version 7.0.2, Apr 6, 2022 + + * (45) bug fix: vector iterator was broken for iterating across a + vector in bitmap format. Caught by Erik Welch. + +Version 7.0.1, Apr 3, 2022 + + * revised ACM TOMS submission: Doc/toms_parallel_grb2.pdf + +Version 7.0.0, Apr 2, 2022 + + * (44) spec bug: GrB_Matrix_diag was implemented in v5.2.x and v6.x with + the wrong signature. This fix requires the major release to change, + from v6.x to v7.x, since the change means that the revised v7 is not + backward compatible with v6. + * performance for GrB_mxm: auto selection for saxpy method (Hash vs + Gustavson) revised. + * (43) performance bug fix for GrB_assign: better performance for + C(i,j)=scalar and C(i,j)+=scalar when i and j have length 1 (scalar + assigment with no scalar expansion). The prior code worked but this is + a performance bug introduced in the parallel GraphBLAS. The bug likely + doesn't appear in v2.x (the sequential version of SS:GrB). This + affects method 1 of Demo/Source/wathen.c. Caught by a reviewer of the + ACM TOMS paper on the parallel GraphBLAS. + Version 6.2.5, Mar 14, 2022 * For SuiteSparse v5.11.0 @@ -5,7 +34,7 @@ Version 6.2.5, Mar 14, 2022 Version 6.2.4, Mar 8, 2022 * (42) bug fix: GrB_mxm with 0-by-0 iso full matrices: Caught by - @ParticularMiner in the Python grblas interface, then triaged and + Henry Amuasi in the Python grblas interface, then triaged and isolated by Erik Welch. Version 6.2.3, Mar 5, 2022 diff --git a/GraphBLAS/Doc/GraphBLAS_UserGuide.bib b/GraphBLAS/Doc/GraphBLAS_UserGuide.bib index 194199f857..78012057d5 100644 --- a/GraphBLAS/Doc/GraphBLAS_UserGuide.bib +++ b/GraphBLAS/Doc/GraphBLAS_UserGuide.bib @@ -159,12 +159,12 @@ @article{Davis19 keywords = {sparse matrices, GraphBLAS, Graph algorithms} } -@article{Davis21, +@article{Davis22, author = {Davis, Timothy A.}, title = {Algorithm 10xx: {SuiteSparse:GraphBLAS}: Parallel Graph Algorithms in the Language of Sparse Linear Algebra}, journal = {ACM Trans. Math. Softw.}, -year = {2021}, -annote = {(submitted)} +year = {2022}, +annote = {(submitted, revised Apr 3, 2022)} } diff --git a/GraphBLAS/Doc/GraphBLAS_UserGuide.pdf b/GraphBLAS/Doc/GraphBLAS_UserGuide.pdf index 3447ec2fdf..3574e76c3d 100644 Binary files a/GraphBLAS/Doc/GraphBLAS_UserGuide.pdf and b/GraphBLAS/Doc/GraphBLAS_UserGuide.pdf differ diff --git a/GraphBLAS/Doc/GraphBLAS_UserGuide.tex b/GraphBLAS/Doc/GraphBLAS_UserGuide.tex index 09868a0100..01e4b266d0 100644 --- a/GraphBLAS/Doc/GraphBLAS_UserGuide.tex +++ b/GraphBLAS/Doc/GraphBLAS_UserGuide.tex @@ -115,7 +115,7 @@ \section{Introduction} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% GraphBLAS library. For more details on SuiteSparse:GraphBLAS, and its use in LAGraph, see -\cite{Davis19,Davis21,Davis18b,DavisAznavehKolodziej19,Davis20,Mattson19}. +\cite{Davis19,Davis22,Davis18b,DavisAznavehKolodziej19,Davis20,Mattson19}. A full and precise definition of the GraphBLAS specification is provided in {\em The GraphBLAS C API Specification} by {Ayd\i n Bulu\c{c}, Timothy Mattson, @@ -148,6 +148,38 @@ \subsection{Release Notes} \begin{itemize} +\item Version 7.0.3 (Apr 8, 2022) + + \begin{packed_itemize} + \item faster transpose when using 2 threads + \end{packed_itemize} + +\item Version 7.0.2 (Apr 5, 2022) + + \begin{packed_itemize} + \item (45) bug fix: vector iterator was broken for iterating across a + vector in bitmap format. Caught by Erik Welch. + \end{packed_itemize} + +\item Version 7.0.1 (Apr 3, 2022) + + \begin{packed_itemize} + \item added revised ACM TOMS submission to the Doc folder + \end{packed_itemize} + +\item Version 7.0.0 (Apr 2, 2022) + + \begin{packed_itemize} + \item (44) spec bug: \verb'GrB_Matrix_diag' + was implemented in v5.2.x and v6.x with the wrong signature. + This fix requires the major release to change, from v6.x to v7.x. + \item (43) performance bug fix for \verb'GrB_mxm': + auto selection for saxpy method (Hash vs Gustavson) revised. + \item \verb'GrB_assign': better performance for \verb'C(i,j)=scalar' and + \verb'C(i,j)+=scalar' when \verb'i' and \verb'j' have length 1 (scalar + assigment with no scalar expansion). + \end{packed_itemize} + \item Version 6.2.5 (Mar 14, 2022) \begin{packed_itemize} @@ -158,7 +190,7 @@ \subsection{Release Notes} \begin{packed_itemize} \item (42) bug fix: \verb'GrB_mxm' with 0-by-0 iso full matrices. - Caught by \verb'@ParticularMiner' in the Python + Caught by Henry Amuasi in the Python grblas interface, then triaged and isolated by Erik Welch. \end{packed_itemize} @@ -5745,7 +5777,7 @@ \subsubsection{{\sf GrB\_Matrix\_diag:} construct a diagonal matrix} \begin{verbatim} GrB_Info GrB_Matrix_diag // construct a diagonal matrix from a vector ( - GrB_Matrix C, // output matrix + GrB_Matrix *C, // output matrix const GrB_Vector v, // input vector int64_t k ) ; @@ -5760,26 +5792,21 @@ \subsubsection{{\sf GrB\_Matrix\_diag:} construct a diagonal matrix} \verb'C(i,i+k)=v(i)'. If \verb'k' is negative, it denotes diagonals below the main diagonal of \verb'C', with \verb'C(i-k,i)=v(i)'. This behavior is identical to the MATLAB -statement \verb'C=diag(v,k)', where \verb'v' is a vector, except that -\verb'GrB_Matrix_diag' can also do typecasting. +statement \verb'C=diag(v,k)', where \verb'v' is a vector. -\verb'C' must already exist on input, of the correct size. Any existing -entries in \verb'C' are discarded. The type of \verb'C' is preserved, so that -if the type of \verb'C' and \verb'v' differ, the entries are typecasted into -the type of \verb'C'. Any settings made to \verb'C' by -\verb'GxB_Matrix_Option_set' (format by row or by column, bitmap switch, hyper -switch, and sparsity control) are unchanged. +The output matrix \verb'C' is a newly-constructed square matrix with the +same type as the input vector \verb'v'. No typecasting is performed. \newpage %------------------------------------------------------------------------------- -\subsubsection{{\sf GxB\_Matrix\_diag:} construct a diagonal matrix} +\subsubsection{{\sf GxB\_Matrix\_diag:} build a diagonal matrix} %------------------------------------------------------------------------------- \label{matrix_diag_GxB} \begin{mdframed}[userdefinedwidth=6in] {\footnotesize \begin{verbatim} -GrB_Info GxB_Matrix_diag // construct a diagonal matrix from a vector +GrB_Info GxB_Matrix_diag // build a diagonal matrix from a vector ( GrB_Matrix C, // output matrix const GrB_Vector v, // input vector @@ -5788,8 +5815,17 @@ \subsubsection{{\sf GxB\_Matrix\_diag:} construct a diagonal matrix} ) ; \end{verbatim} } \end{mdframed} -Identical to \verb'GrB_Matrix_diag', except for the extra parameter: -a \verb'descriptor' to provide control over the number of threads used. +Identical to \verb'GrB_Matrix_diag', except for the extra parameter +(a \verb'descriptor' to provide control over the number of threads used), +and this method is not a constructor. + +The matrix \verb'C' must already exist on input, of the correct size. It must +be square of dimension $n+|k|$ where the vector \verb'v' has length $n$. Any +existing entries in \verb'C' are discarded. The type of \verb'C' is preserved, +so that if the type of \verb'C' and \verb'v' differ, the entries are typecasted +into the type of \verb'C'. Any settings made to \verb'C' by +\verb'GxB_Matrix_Option_set' (format by row or by column, bitmap switch, hyper +switch, and sparsity control) are unchanged. %------------------------------------------------------------------------------- \subsubsection{{\sf GxB\_Matrix\_iso:} query iso status of a matrix} @@ -5904,8 +5940,8 @@ \subsection{Serialize/deserialize methods} \url{https://cwe.mitre.org/data/definitions/502.html}. The deserialization methods do a few basic checks so that no out-of-bounds access occurs during deserialization, but the output matrix or vector itself may still be corrupted. -If the data is untrusted, use check the matrix or vector after -deserializing it: +If the data is untrusted, use \verb'GxB_*_fprint' to +check the matrix or vector after deserializing it: {\footnotesize \begin{verbatim} @@ -12315,7 +12351,7 @@ \subsubsection{{\sf GrB\_Matrix\_select:} apply a select operator to a matrix} \end{verbatim} } \end{mdframed} \verb'GrB_Matrix_select_*' applies a \verb'GrB_IndexUnaryOp' operator to the -entries of a vector. If the operator evaluates as \verb'true' for the entry +entries of a matrix. If the operator evaluates as \verb'true' for the entry \verb'A(i,j)', it is copied to the matrix \verb'T', or not copied if the operator evaluates to \verb'false'. The input matrix \verb'A' may be transposed first. The entries in \verb'A' are typecasted into the \verb'xtype' @@ -13873,8 +13909,7 @@ \subsection{Using iso matrices and vectors in a graph algorithm} // x = max (A) where x(i) = max (A (i,:)) GrB_mxv (x, NULL, NULL, GrB_MAX_FIRST_SEMIRING_FP64, A, y, NULL) ; // D = diag (x) - GrB_Matrix_new (&D, GrB_FP64, n, n) ; - GrB_Matrix_diag (D, x, 0) ; + GrB_Matrix_diag (&D, x, 0) ; // G = D*A using the ANY_EQ semiring GrB_Matrix_new (&G, GrB_BOOL, n, n) ; GrB_mxm (G, NULL, NULL, GxB_ANY_EQ_FP64, D, A, NULL) ; @@ -15142,7 +15177,8 @@ \subsection{On Linux and Mac} make CMAKE_OPTIONS='-DGBNCPUFEAT=1 -DGBAVX2=1' JOBS=40 \end{verbatim} } After compiling the library, you can compile the demos with -\verb'make all' and then \verb'make run'. +\verb'make all' and then \verb'make run' while in the top-level +GraphBLAS folder. If \verb'cmake' or \verb'make' fail, it might be that your default compiler does not support ANSI C11. Try another compiler. For example, try one of @@ -15613,10 +15649,12 @@ \subsection{Running the Demos} %---------------------------------------- After \verb'make' in the top-level directory to compile the library, type -\verb'make run' to run the demos. You can also run the demos after compiling: +\verb'make run' to run the demos (also in the top-level directory). +You can also run the demos after compiling with \verb'make all': {\small \begin{verbatim} + make all cd Demo ./demo \end{verbatim} } diff --git a/GraphBLAS/Doc/GraphBLAS_version.tex b/GraphBLAS/Doc/GraphBLAS_version.tex index 653d40675a..c476f7511c 100644 --- a/GraphBLAS/Doc/GraphBLAS_version.tex +++ b/GraphBLAS/Doc/GraphBLAS_version.tex @@ -1,5 +1,5 @@ % version of SuiteSparse:GraphBLAS \date{VERSION -6.2.5, -Mar 14, 2022} +7.0.3, +Apr 8, 2022} diff --git a/GraphBLAS/Doc/toms_parallel_grb.pdf b/GraphBLAS/Doc/toms_parallel_grb.pdf deleted file mode 100644 index 167e37d2e3..0000000000 Binary files a/GraphBLAS/Doc/toms_parallel_grb.pdf and /dev/null differ diff --git a/GraphBLAS/Doc/toms_parallel_grb2.pdf b/GraphBLAS/Doc/toms_parallel_grb2.pdf new file mode 100644 index 0000000000..9bec761767 Binary files /dev/null and b/GraphBLAS/Doc/toms_parallel_grb2.pdf differ diff --git a/GraphBLAS/GraphBLAS/@GrB/private/mexfunctions/gbargminmax.c b/GraphBLAS/GraphBLAS/@GrB/private/mexfunctions/gbargminmax.c index f136cbc427..0cace365d8 100644 --- a/GraphBLAS/GraphBLAS/@GrB/private/mexfunctions/gbargminmax.c +++ b/GraphBLAS/GraphBLAS/@GrB/private/mexfunctions/gbargminmax.c @@ -81,13 +81,12 @@ static void gb_argminmax // D = diag (x) //-------------------------------------------------------------------------- - // note: typecasting from an m-by-1 GrB_Matrix to a GrB_Vector is + // note: typecasting from an m-by-1 GrB_Matrix to a GrB_Vector is // not allowed by the GraphBLAS C API, but it can be done in SuiteSparse. // A more portable method would construct x as a GrB_Vector, // but using x as a GrB_Matrix simplifies the gb_export. - OK (GrB_Matrix_new (&D, type, m, m)) ; - OK (GrB_Matrix_diag (D, (GrB_Vector) *x, 0)) ; + OK (GrB_Matrix_diag (&D, (GrB_Vector) *x, 0)) ; //-------------------------------------------------------------------------- // compute G, where G(i,j)=1 if A(i,j) is the min/max in its row/col diff --git a/GraphBLAS/GraphBLAS/@GrB/private/mexfunctions/gbmdiag.c b/GraphBLAS/GraphBLAS/@GrB/private/mexfunctions/gbmdiag.c index 557587ca38..026ea76535 100644 --- a/GraphBLAS/GraphBLAS/@GrB/private/mexfunctions/gbmdiag.c +++ b/GraphBLAS/GraphBLAS/@GrB/private/mexfunctions/gbmdiag.c @@ -84,14 +84,7 @@ void mexFunction // compute C = diag (v, k) //-------------------------------------------------------------------------- - if (desc == NULL) - { - OK1 (C, GrB_Matrix_diag (C, (GrB_Vector) V, k)) ; - } - else - { - OK1 (C, GxB_Matrix_diag (C, (GrB_Vector) V, k, desc)) ; - } + OK1 (C, GxB_Matrix_diag (C, (GrB_Vector) V, k, desc)) ; //-------------------------------------------------------------------------- // free shallow copies diff --git a/GraphBLAS/GraphBLAS/CMakeLists.txt b/GraphBLAS/GraphBLAS/CMakeLists.txt index 16c43d13d7..98eb0cb075 100644 --- a/GraphBLAS/GraphBLAS/CMakeLists.txt +++ b/GraphBLAS/GraphBLAS/CMakeLists.txt @@ -29,10 +29,10 @@ endif ( ) set ( CMAKE_MACOSX_RPATH TRUE ) # version of SuiteSparse:GraphBLAS (must match ../CMakeLists.txt) -set ( GraphBLAS_DATE "Mar 14, 2022" ) -set ( GraphBLAS_VERSION_MAJOR 6 ) -set ( GraphBLAS_VERSION_MINOR 2 ) -set ( GraphBLAS_VERSION_SUB 5 ) +set ( GraphBLAS_DATE "Apr 8, 2022" ) +set ( GraphBLAS_VERSION_MAJOR 7 ) +set ( GraphBLAS_VERSION_MINOR 0 ) +set ( GraphBLAS_VERSION_SUB 3 ) message ( STATUS "Building SuiteSparse:GraphBLAS version: v" ${GraphBLAS_VERSION_MAJOR}.${GraphBLAS_VERSION_MINOR}.${GraphBLAS_VERSION_SUB} " date: " ${GraphBLAS_DATE} ) diff --git a/GraphBLAS/Include/GraphBLAS.h b/GraphBLAS/Include/GraphBLAS.h index 4ed26f3103..6d1d4b3498 100644 --- a/GraphBLAS/Include/GraphBLAS.h +++ b/GraphBLAS/Include/GraphBLAS.h @@ -221,10 +221,10 @@ // The version of this implementation, and the GraphBLAS API version: #define GxB_IMPLEMENTATION_NAME "SuiteSparse:GraphBLAS" -#define GxB_IMPLEMENTATION_DATE "Mar 14, 2022" -#define GxB_IMPLEMENTATION_MAJOR 6 -#define GxB_IMPLEMENTATION_MINOR 2 -#define GxB_IMPLEMENTATION_SUB 5 +#define GxB_IMPLEMENTATION_DATE "Apr 8, 2022" +#define GxB_IMPLEMENTATION_MAJOR 7 +#define GxB_IMPLEMENTATION_MINOR 0 +#define GxB_IMPLEMENTATION_SUB 3 #define GxB_SPEC_DATE "Nov 15, 2021" #define GxB_SPEC_MAJOR 2 #define GxB_SPEC_MINOR 0 @@ -285,7 +285,7 @@ typedef uint64_t GrB_Index ; // GxB_INDEX_MAX is historical; use GrB_INDEX_MAX+1 instead. It differs by one // from GrB_INDEX_MAX, since it defined the largest valid matrix or vector -// dimension. +// dimension. #define GxB_INDEX_MAX ((GrB_Index) (1ULL << 60)) //============================================================================== @@ -1148,18 +1148,18 @@ GB_PUBLIC GrB_BinaryOp // the same type. The value z is either 1 for true or 0 for false, but it // is a value with the same type as x and y. - // z = (x == y) z = (x != y) - GxB_ISEQ_BOOL, GxB_ISNE_BOOL, - GxB_ISEQ_INT8, GxB_ISNE_INT8, - GxB_ISEQ_INT16, GxB_ISNE_INT16, - GxB_ISEQ_INT32, GxB_ISNE_INT32, - GxB_ISEQ_INT64, GxB_ISNE_INT64, - GxB_ISEQ_UINT8, GxB_ISNE_UINT8, - GxB_ISEQ_UINT16, GxB_ISNE_UINT16, - GxB_ISEQ_UINT32, GxB_ISNE_UINT32, - GxB_ISEQ_UINT64, GxB_ISNE_UINT64, - GxB_ISEQ_FP32, GxB_ISNE_FP32, - GxB_ISEQ_FP64, GxB_ISNE_FP64, + // z = (x == y) z = (x != y) + GxB_ISEQ_BOOL, GxB_ISNE_BOOL, + GxB_ISEQ_INT8, GxB_ISNE_INT8, + GxB_ISEQ_INT16, GxB_ISNE_INT16, + GxB_ISEQ_INT32, GxB_ISNE_INT32, + GxB_ISEQ_INT64, GxB_ISNE_INT64, + GxB_ISEQ_UINT8, GxB_ISNE_UINT8, + GxB_ISEQ_UINT16, GxB_ISNE_UINT16, + GxB_ISEQ_UINT32, GxB_ISNE_UINT32, + GxB_ISEQ_UINT64, GxB_ISNE_UINT64, + GxB_ISEQ_FP32, GxB_ISNE_FP32, + GxB_ISEQ_FP64, GxB_ISNE_FP64, // complex: GxB_ISEQ_FC32, GxB_ISNE_FC32, GxB_ISEQ_FC64, GxB_ISNE_FC64, @@ -4251,16 +4251,26 @@ GrB_Info GxB_Matrix_split // split a matrix into 2D array of matrices // GxB_Matrix_diag, GxB_Vector_diag, GrB_Matrix_diag //------------------------------------------------------------------------------ -// GxB_Matrix_diag constructs a matrix from a vector. Let n be the length of -// the v vector, from GrB_Vector_size (&n, v). If k = 0, then C is an n-by-n -// diagonal matrix with the entries from v along the main diagonal of C, with -// C(i,i) = v(i). If k is nonzero, C is square with dimension n+abs(k). If k -// is positive, it denotes diagonals above the main diagonal, with C(i,i+k) = -// v(i). If k is negative, it denotes diagonals below the main diagonal of C, -// with C(i-k,i) = v(i). +// GrB_Matrix_diag constructs a new matrix from a vector. Let n be the length +// of the v vector, from GrB_Vector_size (&n, v). If k = 0, then C is an +// n-by-n diagonal matrix with the entries from v along the main diagonal of C, +// with C(i,i) = v(i). If k is nonzero, C is square with dimension n+abs(k). +// If k is positive, it denotes diagonals above the main diagonal, with +// C(i,i+k) = v(i). If k is negative, it denotes diagonals below the main +// diagonal of C, with C(i-k,i) = v(i). C is constructed with the same type +// as v. -// C must already exist on input, of the correct size. Any existing entries in -// C are discarded. The type of C is preserved, so that if the type of C and v +GB_PUBLIC +GrB_Info GrB_Matrix_diag // build a diagonal matrix from a vector +( + GrB_Matrix *C, // output matrix + const GrB_Vector v, // input vector + int64_t k +) ; + +// GrB_Matrix_diag is like GxB_Matrix_diag (&C, v, k, NULL), except that C must +// already exist on input, of the correct size. Any existing entries in C are +// discarded. The type of C is preserved, so that if the type of C and v // differ, the entries are typecasted into the type of C. Any settings made to // C by GxB_Matrix_Option_set (format by row or by column, bitmap switch, hyper // switch, and sparsity control) are unchanged. @@ -4274,22 +4284,11 @@ GrB_Info GxB_Matrix_diag // construct a diagonal matrix from a vector const GrB_Descriptor desc // to specify # of threads ) ; -// GrB_Matrix_diag is identical to GxB_Matrix_diag (C, v, k, NULL), -// using the default # of threads from the global setting. - -GB_PUBLIC -GrB_Info GrB_Matrix_diag // construct a diagonal matrix from a vector -( - GrB_Matrix C, // output matrix - const GrB_Vector v, // input vector - int64_t k -) ; - // GxB_Vector_diag extracts a vector v from an input matrix A, which may be // rectangular. If k = 0, the main diagonal of A is extracted; k > 0 denotes // diagonals above the main diagonal of A, and k < 0 denotes diagonals below // the main diagonal of A. Let A have dimension m-by-n. If k is in the range -// 0 to n-1, then v has length min(m,n-k). If k is negative and in the range +// 0 to n-1, then v has length min(m,n-k). If k is negative and in the range // -1 to -m+1, then v has length min(m+k,n). If k is outside these ranges, // v has length 0 (this is not an error). @@ -5116,8 +5115,9 @@ GrB_Info GrB_Matrix_eWiseAdd_BinaryOp // C = accum (C, A+B) // GxB_eWiseUnion: a variant of GrB_eWiseAdd //============================================================================== -// GxB_eWiseUnion is a variant of eWiseAdd. They differ when an entry is -// present in A but not B, or in B but not A. +// GxB_eWiseUnion is a variant of eWiseAdd. The methods create a result with +// the same sparsity structure. They differ when an entry is present in A but +// not B, or in B but not A. // eWiseAdd does the following, for a matrix, where "+" is the add binary op: @@ -5128,7 +5128,7 @@ GrB_Info GrB_Matrix_eWiseAdd_BinaryOp // C = accum (C, A+B) // else if B(i,j) is present but not A(i,j) // C(i,j) = B(i,j) -// by constrast, eWiseUnion always applies the operator: +// by contrast, eWiseUnion always applies the operator: // if A(i,j) and B(i,j) are both present: // C(i,j) = A(i,j) + B(i,j) @@ -9268,8 +9268,8 @@ GB_PUBLIC GrB_Semiring // 64 bitwise semirings //------------------------------------------------------------------------------ - // monoids: (BOR, BAND, BXOR, BXNOR) x - // mult: (BOR, BAND, BXOR, BXNOR) x + // monoids: (BOR, BAND, BXOR, BXNOR) x + // mult: (BOR, BAND, BXOR, BXNOR) x // types: (UINT8, UINT16, UINT32, UINT64) GxB_BOR_BOR_UINT8 , GxB_BOR_BOR_UINT16 , GxB_BOR_BOR_UINT32 , GxB_BOR_BOR_UINT64 , @@ -9364,7 +9364,7 @@ GB_PUBLIC GrB_Semiring // MIN_PLUS, MIN_TIMES, MIN_FIRST, MIN_SECOND, MIN_MAX, // MAX_PLUS, MAX_TIMES, MAX_FIRST, MAX_SECOND, MAX_MIN -// and 4 semirings for boolean only: +// and 4 semirings for boolean only: // LOR_LAND, LAND_LOR, LXOR_LAND, LXNOR_LOR. @@ -9386,8 +9386,8 @@ GB_PUBLIC GrB_Semiring GrB_PLUS_TIMES_SEMIRING_UINT16, // GxB_PLUS_TIMES_UINT16 GrB_PLUS_TIMES_SEMIRING_UINT32, // GxB_PLUS_TIMES_UINT32 GrB_PLUS_TIMES_SEMIRING_UINT64, // GxB_PLUS_TIMES_UINT64 - GrB_PLUS_TIMES_SEMIRING_FP32, // GxB_PLUS_TIMES_FP32 - GrB_PLUS_TIMES_SEMIRING_FP64, // GxB_PLUS_TIMES_FP64 + GrB_PLUS_TIMES_SEMIRING_FP32, // GxB_PLUS_TIMES_FP32 + GrB_PLUS_TIMES_SEMIRING_FP64, // GxB_PLUS_TIMES_FP64 // PLUS_MIN semirings for all 10 real, non-boolean types: GrB_PLUS_MIN_SEMIRING_INT8, // GxB_PLUS_MIN_INT8 @@ -9398,8 +9398,8 @@ GB_PUBLIC GrB_Semiring GrB_PLUS_MIN_SEMIRING_UINT16, // GxB_PLUS_MIN_UINT16 GrB_PLUS_MIN_SEMIRING_UINT32, // GxB_PLUS_MIN_UINT32 GrB_PLUS_MIN_SEMIRING_UINT64, // GxB_PLUS_MIN_UINT64 - GrB_PLUS_MIN_SEMIRING_FP32, // GxB_PLUS_MIN_FP32 - GrB_PLUS_MIN_SEMIRING_FP64, // GxB_PLUS_MIN_FP64 + GrB_PLUS_MIN_SEMIRING_FP32, // GxB_PLUS_MIN_FP32 + GrB_PLUS_MIN_SEMIRING_FP64, // GxB_PLUS_MIN_FP64 //-------------------------------------------------------------------------- // 50 semirings with MIN monoids @@ -9414,8 +9414,8 @@ GB_PUBLIC GrB_Semiring GrB_MIN_PLUS_SEMIRING_UINT16, // GxB_MIN_PLUS_UINT16 GrB_MIN_PLUS_SEMIRING_UINT32, // GxB_MIN_PLUS_UINT32 GrB_MIN_PLUS_SEMIRING_UINT64, // GxB_MIN_PLUS_UINT64 - GrB_MIN_PLUS_SEMIRING_FP32, // GxB_MIN_PLUS_FP32 - GrB_MIN_PLUS_SEMIRING_FP64, // GxB_MIN_PLUS_FP64 + GrB_MIN_PLUS_SEMIRING_FP32, // GxB_MIN_PLUS_FP32 + GrB_MIN_PLUS_SEMIRING_FP64, // GxB_MIN_PLUS_FP64 // MIN_TIMES semirings for all 10 real, non-boolean types: GrB_MIN_TIMES_SEMIRING_INT8, // GxB_MIN_TIMES_INT8 @@ -9426,8 +9426,8 @@ GB_PUBLIC GrB_Semiring GrB_MIN_TIMES_SEMIRING_UINT16, // GxB_MIN_TIMES_UINT16 GrB_MIN_TIMES_SEMIRING_UINT32, // GxB_MIN_TIMES_UINT32 GrB_MIN_TIMES_SEMIRING_UINT64, // GxB_MIN_TIMES_UINT64 - GrB_MIN_TIMES_SEMIRING_FP32, // GxB_MIN_TIMES_FP32 - GrB_MIN_TIMES_SEMIRING_FP64, // GxB_MIN_TIMES_FP64 + GrB_MIN_TIMES_SEMIRING_FP32, // GxB_MIN_TIMES_FP32 + GrB_MIN_TIMES_SEMIRING_FP64, // GxB_MIN_TIMES_FP64 // MIN_FIRST semirings for all 10 real, non-boolean types: GrB_MIN_FIRST_SEMIRING_INT8, // GxB_MIN_FIRST_INT8 @@ -9438,8 +9438,8 @@ GB_PUBLIC GrB_Semiring GrB_MIN_FIRST_SEMIRING_UINT16, // GxB_MIN_FIRST_UINT16 GrB_MIN_FIRST_SEMIRING_UINT32, // GxB_MIN_FIRST_UINT32 GrB_MIN_FIRST_SEMIRING_UINT64, // GxB_MIN_FIRST_UINT64 - GrB_MIN_FIRST_SEMIRING_FP32, // GxB_MIN_FIRST_FP32 - GrB_MIN_FIRST_SEMIRING_FP64, // GxB_MIN_FIRST_FP64 + GrB_MIN_FIRST_SEMIRING_FP32, // GxB_MIN_FIRST_FP32 + GrB_MIN_FIRST_SEMIRING_FP64, // GxB_MIN_FIRST_FP64 // MIN_SECOND semirings for all 10 real, non-boolean types: GrB_MIN_SECOND_SEMIRING_INT8, // GxB_MIN_SECOND_INT8 @@ -9450,8 +9450,8 @@ GB_PUBLIC GrB_Semiring GrB_MIN_SECOND_SEMIRING_UINT16, // GxB_MIN_SECOND_UINT16 GrB_MIN_SECOND_SEMIRING_UINT32, // GxB_MIN_SECOND_UINT32 GrB_MIN_SECOND_SEMIRING_UINT64, // GxB_MIN_SECOND_UINT64 - GrB_MIN_SECOND_SEMIRING_FP32, // GxB_MIN_SECOND_FP32 - GrB_MIN_SECOND_SEMIRING_FP64, // GxB_MIN_SECOND_FP64 + GrB_MIN_SECOND_SEMIRING_FP32, // GxB_MIN_SECOND_FP32 + GrB_MIN_SECOND_SEMIRING_FP64, // GxB_MIN_SECOND_FP64 // MIN_MAX semirings for all 10 real, non-boolean types: GrB_MIN_MAX_SEMIRING_INT8, // GxB_MIN_MAX_INT8 @@ -9462,8 +9462,8 @@ GB_PUBLIC GrB_Semiring GrB_MIN_MAX_SEMIRING_UINT16, // GxB_MIN_MAX_UINT16 GrB_MIN_MAX_SEMIRING_UINT32, // GxB_MIN_MAX_UINT32 GrB_MIN_MAX_SEMIRING_UINT64, // GxB_MIN_MAX_UINT64 - GrB_MIN_MAX_SEMIRING_FP32, // GxB_MIN_MAX_FP32 - GrB_MIN_MAX_SEMIRING_FP64, // GxB_MIN_MAX_FP64 + GrB_MIN_MAX_SEMIRING_FP32, // GxB_MIN_MAX_FP32 + GrB_MIN_MAX_SEMIRING_FP64, // GxB_MIN_MAX_FP64 //-------------------------------------------------------------------------- // 50 semirings with MAX monoids @@ -9478,8 +9478,8 @@ GB_PUBLIC GrB_Semiring GrB_MAX_PLUS_SEMIRING_UINT16, // GxB_MAX_PLUS_UINT16 GrB_MAX_PLUS_SEMIRING_UINT32, // GxB_MAX_PLUS_UINT32 GrB_MAX_PLUS_SEMIRING_UINT64, // GxB_MAX_PLUS_UINT64 - GrB_MAX_PLUS_SEMIRING_FP32, // GxB_MAX_PLUS_FP32 - GrB_MAX_PLUS_SEMIRING_FP64, // GxB_MAX_PLUS_FP64 + GrB_MAX_PLUS_SEMIRING_FP32, // GxB_MAX_PLUS_FP32 + GrB_MAX_PLUS_SEMIRING_FP64, // GxB_MAX_PLUS_FP64 // MAX_TIMES semirings for all 10 real, non-boolean types: GrB_MAX_TIMES_SEMIRING_INT8, // GxB_MAX_TIMES_INT8 @@ -9490,8 +9490,8 @@ GB_PUBLIC GrB_Semiring GrB_MAX_TIMES_SEMIRING_UINT16, // GxB_MAX_TIMES_UINT16 GrB_MAX_TIMES_SEMIRING_UINT32, // GxB_MAX_TIMES_UINT32 GrB_MAX_TIMES_SEMIRING_UINT64, // GxB_MAX_TIMES_UINT64 - GrB_MAX_TIMES_SEMIRING_FP32, // GxB_MAX_TIMES_FP32 - GrB_MAX_TIMES_SEMIRING_FP64, // GxB_MAX_TIMES_FP64 + GrB_MAX_TIMES_SEMIRING_FP32, // GxB_MAX_TIMES_FP32 + GrB_MAX_TIMES_SEMIRING_FP64, // GxB_MAX_TIMES_FP64 // MAX_FIRST semirings for all 10 real, non-boolean types: GrB_MAX_FIRST_SEMIRING_INT8, // GxB_MAX_FIRST_INT8 @@ -9502,8 +9502,8 @@ GB_PUBLIC GrB_Semiring GrB_MAX_FIRST_SEMIRING_UINT16, // GxB_MAX_FIRST_UINT16 GrB_MAX_FIRST_SEMIRING_UINT32, // GxB_MAX_FIRST_UINT32 GrB_MAX_FIRST_SEMIRING_UINT64, // GxB_MAX_FIRST_UINT64 - GrB_MAX_FIRST_SEMIRING_FP32, // GxB_MAX_FIRST_FP32 - GrB_MAX_FIRST_SEMIRING_FP64, // GxB_MAX_FIRST_FP64 + GrB_MAX_FIRST_SEMIRING_FP32, // GxB_MAX_FIRST_FP32 + GrB_MAX_FIRST_SEMIRING_FP64, // GxB_MAX_FIRST_FP64 // MAX_SECOND semirings for all 10 real, non-boolean types: GrB_MAX_SECOND_SEMIRING_INT8, // GxB_MAX_SECOND_INT8 @@ -9514,8 +9514,8 @@ GB_PUBLIC GrB_Semiring GrB_MAX_SECOND_SEMIRING_UINT16, // GxB_MAX_SECOND_UINT16 GrB_MAX_SECOND_SEMIRING_UINT32, // GxB_MAX_SECOND_UINT32 GrB_MAX_SECOND_SEMIRING_UINT64, // GxB_MAX_SECOND_UINT64 - GrB_MAX_SECOND_SEMIRING_FP32, // GxB_MAX_SECOND_FP32 - GrB_MAX_SECOND_SEMIRING_FP64, // GxB_MAX_SECOND_FP64 + GrB_MAX_SECOND_SEMIRING_FP32, // GxB_MAX_SECOND_FP32 + GrB_MAX_SECOND_SEMIRING_FP64, // GxB_MAX_SECOND_FP64 // MAX_MIN semirings for all 10 real, non-boolean types: GrB_MAX_MIN_SEMIRING_INT8, // GxB_MAX_MIN_INT8 @@ -9526,8 +9526,8 @@ GB_PUBLIC GrB_Semiring GrB_MAX_MIN_SEMIRING_UINT16, // GxB_MAX_MIN_UINT16 GrB_MAX_MIN_SEMIRING_UINT32, // GxB_MAX_MIN_UINT32 GrB_MAX_MIN_SEMIRING_UINT64, // GxB_MAX_MIN_UINT64 - GrB_MAX_MIN_SEMIRING_FP32, // GxB_MAX_MIN_FP32 - GrB_MAX_MIN_SEMIRING_FP64, // GxB_MAX_MIN_FP64 + GrB_MAX_MIN_SEMIRING_FP32, // GxB_MAX_MIN_FP32 + GrB_MAX_MIN_SEMIRING_FP64, // GxB_MAX_MIN_FP64 //-------------------------------------------------------------------------- // 4 boolean semirings: @@ -11292,7 +11292,8 @@ GrB_Info GrB_Matrix_exportHint // suggest the best export format FILE *f = fopen ("myblob", "r") ; fread (&blob_size, sizeof (size_t), 1, f) ; blob = malloc (blob_size) ; - fread (&blob, sizeof (uint8_t), 1, f) ; + fread (blob, sizeof (uint8_t), blob_size, f) ; + fclose (f) ; char type_name [GxB_MAX_NAME_LEN] ; GxB_deserialize_type_name (type_name, blob, blob_size) ; printf ("blob type is: %s\n", type_name) ; @@ -11331,7 +11332,8 @@ GrB_Info GrB_Matrix_exportHint // suggest the best export format FILE *f = fopen ("myblob", "r") ; fread (&blob_size, sizeof (size_t), 1, f) ; blob = malloc (blob_size) ; - fread (&blob, sizeof (uint8_t), 1, f) ; + fread (blob, sizeof (uint8_t), blob_size, f) ; + fclose (f) ; // the user must know the type of A is MyQType GrB_Matrix_deserialize (&A, MyQtype, blob, blob_size) ; free (blob) ; @@ -11634,7 +11636,7 @@ struct GB_Iterator_opaque // only changes when the iterator is created: size_t header_size ; // size of this iterator object - // these components only change when the iterator is attached: + // these components only change when the iterator is attached: int64_t pmax ; // avlen*avdim for bitmap; nvals(A) otherwise int64_t avlen ; // length of each vector in the matrix int64_t avdim ; // number of vectors in the matrix dimension @@ -11895,7 +11897,7 @@ GrB_Index GxB_rowIterator_kount (GxB_Iterator iterator) ; // For SuiteSparse:GraphBLAS: If the matrix is hypersparse, and the row // does not appear in the hyperlist, then the iterator is moved to the first -// row after the given row that does appear in the hyperlist. +// row after the given row that does appear in the hyperlist. // The method is always successful; the following are conditions are returned: // GxB_EXHAUSTED: if the row index is >= nrows(A); the row iterator is @@ -12223,7 +12225,7 @@ GrB_Info GxB_Matrix_Iterator_next (GxB_Iterator iterator) ; // GxB_Matrix_Iterator_next. Results are undefined if these conditions are not // met. -GB_PUBLIC +GB_PUBLIC GrB_Index GxB_Matrix_Iterator_getp (GxB_Iterator iterator) ; //------------------------------------------------------------------------------ @@ -12236,7 +12238,7 @@ GrB_Index GxB_Matrix_Iterator_getp (GxB_Iterator iterator) ; // GxB_Matrix_Iterator_next, with a return value of GrB_SUCCESS. Results are // undefined if these conditions are not met. -GB_PUBLIC +GB_PUBLIC void GxB_Matrix_Iterator_getIndex ( GxB_Iterator iterator, @@ -12341,7 +12343,8 @@ GrB_Index GxB_Vector_Iterator_getpmax (GxB_Iterator iterator) ; // vector, or GxB_EXHAUSTED if the iterator is exhausted. GB_PUBLIC -GrB_Info GB_Vector_Iterator_bitmap_seek (GxB_Iterator iterator, GrB_Index p) ; +GrB_Info GB_Vector_Iterator_bitmap_seek (GxB_Iterator iterator, + GrB_Index unused) ; // unused parameter to be removed in v8.x GB_PUBLIC GrB_Info GxB_Vector_Iterator_seek (GxB_Iterator iterator, GrB_Index p) ; @@ -12360,7 +12363,7 @@ GrB_Info GxB_Vector_Iterator_seek (GxB_Iterator iterator, GrB_Index p) ; iterator->p = q, \ (iterator->A_sparsity == GxB_BITMAP) ? \ ( \ - GB_Vector_Iterator_bitmap_seek (iterator, q) \ + GB_Vector_Iterator_bitmap_seek (iterator, 0) \ ) \ : \ ( \ @@ -12401,7 +12404,16 @@ GrB_Info GxB_Vector_Iterator_next (GxB_Iterator iterator) ; ) \ : \ ( \ - GrB_SUCCESS \ + (iterator->A_sparsity == GxB_BITMAP) ? \ + ( \ + /* bitmap: seek to the next entry present in the bitmap */ \ + GB_Vector_Iterator_bitmap_seek (iterator, 0) \ + ) \ + : \ + ( \ + /* other formats: already at the next entry */ \ + GrB_SUCCESS \ + ) \ ) \ ) @@ -12515,5 +12527,47 @@ GB_PUBLIC void GxB_Iterator_get_UDT (GxB_Iterator iterator, (iterator)->type_size) \ ) +//------------------------------------------------------------------------------ +// Rapids Memory Manager wrappers for SuiteSparse:GraphBLAS +//------------------------------------------------------------------------------ + +#ifndef RMM_WRAP_H +#define RMM_WRAP_H + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +// TODO describe the modes +typedef enum { rmm_wrap_host=0, rmm_wrap_host_pinned=1, rmm_wrap_device=2, rmm_wrap_managed=3 } RMM_MODE ; + +void rmm_wrap_finalize (void) ; +int rmm_wrap_initialize (RMM_MODE mode, size_t init_pool_size, size_t max_pool_size) ; + +// example usage: + // rmm_wrap_initialize (rmm_wrap_managed, INT32_MAX, INT64_MAX) ; + // GxB_init (GrB_NONBLOCKING, rmm_wrap_malloc, rmm_wrap_calloc, rmm_wrap_realloc, rmm_wrap_free) ; + // use GraphBLAS ... + // GrB_finalize ( ) ; + // rmm_wrap_finalize ( ) ; + +// The two PMR-based allocate/deallocate signatures (C-style): +void *rmm_wrap_allocate (size_t *size) ; +void rmm_wrap_deallocate (void *p, size_t size) ; + +// The four malloc/calloc/realloc/free signatures: +void *rmm_wrap_malloc (size_t size) ; +void *rmm_wrap_calloc (size_t n, size_t size) ; +void *rmm_wrap_realloc (void *p, size_t newsize) ; +void rmm_wrap_free (void *p) ; + +#ifdef __cplusplus +} +#endif +#endif + #endif diff --git a/GraphBLAS/README.md b/GraphBLAS/README.md index 88992604dd..18228c695d 100644 --- a/GraphBLAS/README.md +++ b/GraphBLAS/README.md @@ -8,7 +8,7 @@ For the GraphBLAS/GraphBLAS Octave/MATLAB interface *only*: SPDX-License-Identifier: GPL-3.0-or-later (see below for a discussion of the licensing of this package). -VERSION 6.2.5, Mar 14, 2022 +VERSION 7.0.3, Apr 8, 2022 SuiteSparse:GraphBLAS is a complete implementation of the GraphBLAS standard, which defines a set of sparse matrix operations on an extended algebra of @@ -55,9 +55,9 @@ To remove all compiled files: make clean -To compile the demos: +To compile and run the demos: - make all + make run See the GraphBLAS/ subfolder for the Octave/MATLAB interface, which contains a README.md file with further details. diff --git a/GraphBLAS/Source/GB_AxB_meta.c b/GraphBLAS/Source/GB_AxB_meta.c index 3998dd3215..bdfe55c47d 100644 --- a/GraphBLAS/Source/GB_AxB_meta.c +++ b/GraphBLAS/Source/GB_AxB_meta.c @@ -234,7 +234,7 @@ GrB_Info GB_AxB_meta // C=A*B meta algorithm // B is treated just like A if (!B_in->is_csc) { - // Flip the sense of A_transpose + // Flip the sense of B_transpose B_transpose = !B_transpose ; } diff --git a/GraphBLAS/Source/GB_AxB_saxpy3_slice_balanced.c b/GraphBLAS/Source/GB_AxB_saxpy3_slice_balanced.c index 9a488f347a..dd22a05881 100644 --- a/GraphBLAS/Source/GB_AxB_saxpy3_slice_balanced.c +++ b/GraphBLAS/Source/GB_AxB_saxpy3_slice_balanced.c @@ -400,11 +400,11 @@ GrB_Info GB_AxB_saxpy3_slice_balanced // give preference to Gustavson when using few threads //-------------------------------------------------------------------------- - if ((*nthreads) <= 8 && + if (/* (*nthreads) <= 8 && */ (!(AxB_method == GxB_AxB_HASH || AxB_method == GxB_AxB_GUSTAVSON))) { // Unless a specific method has been explicitly requested, see if - // Gustavson should be used with a small number of threads. + // Gustavson should be used. // Matrix-vector has a maximum intensity of 1, so this heuristic only // applies to GrB_mxm. double abnz = GB_nnz (A) + GB_nnz (B) + 1 ; @@ -412,12 +412,13 @@ GrB_Info GB_AxB_saxpy3_slice_balanced double intensity = total_flops / abnz ; GBURBLE ("(intensity: %0.3g workspace/(nnz(A)+nnz(B)): %0.3g", intensity, workspace / abnz) ; - if (intensity >= 8 && workspace < abnz) + if (((*nthreads) <= 8 && intensity >= 8 && workspace < abnz) + || ( intensity >= 16 && workspace < abnz)) { // work intensity is large, and Gustvason workspace is modest; // use Gustavson for all tasks AxB_method = GxB_AxB_GUSTAVSON ; - GBURBLE (": select Gustvason) ") ; + GBURBLE (": all Gustvason) ") ; } else { diff --git a/GraphBLAS/Source/GB_Element.h b/GraphBLAS/Source/GB_Element.h index 8bd0b78aa1..c9a0e60509 100644 --- a/GraphBLAS/Source/GB_Element.h +++ b/GraphBLAS/Source/GB_Element.h @@ -13,7 +13,9 @@ GrB_Info GB_setElement // set a single entry, C(row,col) = scalar ( GrB_Matrix C, // matrix to modify - void *scalar, // scalar to set + const GrB_BinaryOp accum, // if NULL: C(row,col) = scalar + // else: C(row,col) += scalar + const void *scalar, // scalar to set const GrB_Index row, // row index const GrB_Index col, // column index const GB_Type_code scalar_code, // type of the scalar diff --git a/GraphBLAS/Source/GB_Matrix_diag.c b/GraphBLAS/Source/GB_Matrix_diag.c index 6729af5f55..ae4e826bb3 100644 --- a/GraphBLAS/Source/GB_Matrix_diag.c +++ b/GraphBLAS/Source/GB_Matrix_diag.c @@ -20,7 +20,7 @@ #include "GB_diag.h" -GrB_Info GB_Matrix_diag // construct a diagonal matrix from a vector +GrB_Info GB_Matrix_diag // build a diagonal matrix from a vector ( GrB_Matrix C, // output matrix const GrB_Matrix V_in, // input vector (as an n-by-1 matrix) @@ -45,23 +45,11 @@ GrB_Info GB_Matrix_diag // construct a diagonal matrix from a vector GrB_Type ctype = C->type ; GrB_Type vtype = V_in->type ; - int64_t nrows = GB_NROWS (C) ; - int64_t ncols = GB_NCOLS (C) ; int64_t n = V_in->vlen + GB_IABS (k) ; // C must be n-by-n - if (nrows != ncols || nrows != n) - { - GB_ERROR (GrB_DIMENSION_MISMATCH, - "Input matrix is " GBd "-by-" GBd " but must be " - GBd "-by-" GBd "\n", nrows, ncols, n, n) ; - } - - if (!GB_Type_compatible (ctype, vtype)) - { - GB_ERROR (GrB_DOMAIN_MISMATCH, "Input vector of type [%s] " - "cannot be typecast to output of type [%s]\n", - vtype->name, ctype->name) ; - } + ASSERT (GB_NROWS (C) == GB_NCOLS (C)) + ASSERT (GB_NROWS (C) == n) + ASSERT (GB_Type_compatible (ctype, vtype)) ; //-------------------------------------------------------------------------- // finish any pending work in V_in and clear the output matrix C diff --git a/GraphBLAS/Source/GB_Matrix_new.c b/GraphBLAS/Source/GB_Matrix_new.c new file mode 100644 index 0000000000..4d34f32f80 --- /dev/null +++ b/GraphBLAS/Source/GB_Matrix_new.c @@ -0,0 +1,78 @@ +//------------------------------------------------------------------------------ +// GB_Matrix_new: create a new matrix +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2022, All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +//------------------------------------------------------------------------------ + +// The new matrix is nrows-by-ncols, with no entries in it. Default format for +// an empty matrix is hypersparse CSC: A->p is size 2 and all zero, A->h is +// size 1, A->plen is 1, and contents A->x and A->i are NULL. If this method +// fails, *A is set to NULL. + +#include "GB.h" + +GrB_Info GB_Matrix_new // create a new matrix with no entries +( + GrB_Matrix *A, // handle of matrix to create + GrB_Type type, // type of matrix to create + GrB_Index nrows, // matrix dimension is nrows-by-ncols + GrB_Index ncols, + GB_Context Context +) +{ + + //-------------------------------------------------------------------------- + // check inputs + //-------------------------------------------------------------------------- + + GB_RETURN_IF_NULL (A) ; + (*A) = NULL ; + GB_RETURN_IF_NULL_OR_FAULTY (type) ; + + if (nrows > GB_NMAX || ncols > GB_NMAX) + { + // problem too large + return (GrB_INVALID_VALUE) ; + } + + //-------------------------------------------------------------------------- + // create the matrix + //-------------------------------------------------------------------------- + + int64_t vlen, vdim ; + bool A_is_csc ; + if (ncols == 1) + { + // n-by-1 matrices are always held by column, including 1-by-1 + A_is_csc = true ; + } + else if (nrows == 1) + { + // 1-by-n matrices (except 1-by-1) are always held by row + A_is_csc = false ; + } + else + { + // m-by-n (including 0-by-0) with m != and n != use the global setting + A_is_csc = GB_Global_is_csc_get ( ) ; + } + + if (A_is_csc) + { + vlen = (int64_t) nrows ; + vdim = (int64_t) ncols ; + } + else + { + vlen = (int64_t) ncols ; + vdim = (int64_t) nrows ; + } + + return (GB_new (A, // auto sparsity, new header + type, vlen, vdim, GB_Ap_calloc, A_is_csc, GxB_AUTO_SPARSITY, + GB_Global_hyper_switch_get ( ), 1, Context)) ; +} + diff --git a/GraphBLAS/Source/GB_assign_prep.c b/GraphBLAS/Source/GB_assign_prep.c index d5af91509c..efac9a6190 100644 --- a/GraphBLAS/Source/GB_assign_prep.c +++ b/GraphBLAS/Source/GB_assign_prep.c @@ -1137,8 +1137,7 @@ GrB_Info GB_assign_prep //---------------------------------------------------------------------- if (!wait) - { - + { // ( delete ) will not occur, but new pending tuples may be added // via the action: ( insert ). Check if the accum operator is the // same as the prior pending operator and ensure the types are @@ -1147,33 +1146,18 @@ GrB_Info GB_assign_prep ASSERT (C->Pending != NULL) ; ASSERT (C->Pending->type != NULL) ; - if (atype != C->Pending->type) - { + wait = // entries in A are copied directly into the list of pending // tuples for C, with no typecasting. The type of the prior - // pending tuples must match the type of A. Since the types - // do not match, prior updates must be assembled first. - wait = true ; - } - else if - ( - // the types match, now check the pending operator - ! ( - // the operators are the same - (accum == C->Pending->op) - // or both operators are SECOND_Ctype, implicit or explicit + // pending tuples must match the type of A. If the types do + // not match, prior updates must be assembled first. + (atype != C->Pending->type) + // also wait if the pending operator has changed. + || !((accum == C->Pending->op) || (GB_op_is_second (accum, ctype) && - GB_op_is_second (C->Pending->op, ctype)) - ) - ) - { - wait = true ; - } - else if (C->iso != C_iso_out) - { - // the iso property of C is changing - wait = true ; - } + GB_op_is_second (C->Pending->op, ctype))) + // also wait if the iso property of C changes. + || (C->iso != C_iso_out) ; } } diff --git a/GraphBLAS/Source/GB_assign_scalar.c b/GraphBLAS/Source/GB_assign_scalar.c index 7ec19f28fa..8c70fa58a1 100644 --- a/GraphBLAS/Source/GB_assign_scalar.c +++ b/GraphBLAS/Source/GB_assign_scalar.c @@ -35,13 +35,15 @@ GrB_Info GB_assign_scalar // C(Rows,Cols) += x const GrB_Descriptor desc, // descriptor for C and M GB_Context Context ) -{ +{ //-------------------------------------------------------------------------- // check inputs //-------------------------------------------------------------------------- GB_RETURN_IF_NULL (scalar) ; + GB_RETURN_IF_NULL (Rows) ; + GB_RETURN_IF_NULL (Cols) ; ASSERT (scalar_code <= GB_UDT_code) ; // get the descriptor @@ -55,18 +57,27 @@ GrB_Info GB_assign_scalar // C(Rows,Cols) += x // C(Rows,Cols) = accum (C(Rows,Cols), scalar) //-------------------------------------------------------------------------- - return (GB_assign ( - C, C_replace, // C matrix and its descriptor - M, Mask_comp, Mask_struct, // mask matrix and its descriptor - false, // do not transpose the mask - accum, // for accum (C(Rows,Cols),scalar) - NULL, false, // no explicit matrix A - Rows, nRows, // row indices - Cols, nCols, // column indices - true, // do scalar expansion - scalar, // scalar to assign, expands to become A - scalar_code, // type code of scalar to expand - GB_ASSIGN, - Context)) ; + if (M == NULL && !Mask_comp && nRows == 1 && nCols == 1 && !C_replace) + { + // C(i,j) = scalar or C(i,j) += scalar + return (GB_setElement (C, accum, scalar, Rows [0], Cols [0], + scalar_code, Context)) ; + } + else + { + return (GB_assign ( + C, C_replace, // C matrix and its descriptor + M, Mask_comp, Mask_struct, // mask matrix and its descriptor + false, // do not transpose the mask + accum, // for accum (C(Rows,Cols),scalar) + NULL, false, // no explicit matrix A + Rows, nRows, // row indices + Cols, nCols, // column indices + true, // do scalar expansion + scalar, // scalar to assign, expands to become A + scalar_code, // type code of scalar to expand + GB_ASSIGN, + Context)) ; + } } diff --git a/GraphBLAS/Source/GB_diag.h b/GraphBLAS/Source/GB_diag.h index 18c775adcf..4065f9a927 100644 --- a/GraphBLAS/Source/GB_diag.h +++ b/GraphBLAS/Source/GB_diag.h @@ -11,7 +11,7 @@ #define GB_DIAG_H #include "GB.h" -GrB_Info GB_Matrix_diag // construct a diagonal matrix from a vector +GrB_Info GB_Matrix_diag // build a diagonal matrix from a vector ( GrB_Matrix C, // output matrix const GrB_Matrix V_input, // input vector (as an n-by-1 matrix) diff --git a/GraphBLAS/Source/GB_new.h b/GraphBLAS/Source/GB_new.h index 3e0a24ae34..595e0556aa 100644 --- a/GraphBLAS/Source/GB_new.h +++ b/GraphBLAS/Source/GB_new.h @@ -18,6 +18,16 @@ typedef enum // input parameter to GB_new and GB_new_bix } GB_Ap_code ; +GB_PUBLIC +GrB_Info GB_Matrix_new // create a new matrix with no entries +( + GrB_Matrix *A, // handle of matrix to create + GrB_Type type, // type of matrix to create + GrB_Index nrows, // matrix dimension is nrows-by-ncols + GrB_Index ncols, + GB_Context Context +) ; + GB_PUBLIC GrB_Info GB_new // create matrix, except for indices & values ( diff --git a/GraphBLAS/Source/GB_setElement.c b/GraphBLAS/Source/GB_setElement.c index aac1d35add..031f112ce4 100644 --- a/GraphBLAS/Source/GB_setElement.c +++ b/GraphBLAS/Source/GB_setElement.c @@ -7,20 +7,24 @@ //------------------------------------------------------------------------------ -// Sets the value of single scalar, C(row,col) = scalar, typecasting from the -// type of scalar to the type of C, as needed. Not user-callable; does the -// work for all GrB_*_setElement* functions. +// Sets the value of single scalar, C(row,col) = scalar, or C(row,col)+=scalar, +// typecasting from the type of scalar to the type of C, as needed. Not +// user-callable; does the work for all GrB_*_setElement* functions, and for +// GrB_*assign when a single entry is modified. // If C(row,col) is already present in the matrix, its value is overwritten // with the scalar. Otherwise, if the mode determined by GrB_init is // non-blocking, the tuple (i,j,scalar) is appended to a list of pending tuples // to C. GB_wait assembles these pending tuples. -// GrB_setElement is the same as GrB_*assign with an implied SECOND accum -// operator whose ztype, xtype, and ytype are the same as C, with I=i, J=1, a -// 1-by-1 dense matrix A (where nnz (A) == 1), no mask, mask not complemented, -// C_replace effectively false (its value is ignored), and A transpose -// effectively false (since transposing a scalar has no effect). +// GrB_setElement when accum is NULL is the same as GrB_*assign with an implied +// SECOND accum operator whose ztype, xtype, and ytype are the same as C, with +// I=i, J=j, a 1-by-1 dense matrix A (where nnz (A) == 1), no mask, mask not +// complemented, C_replace effectively false (its value is ignored), and A +// transpose effectively false (since transposing a scalar has no effect). + +// GrB_setElement when accum is not NULL uses the accum operator instead of +// the implied SECOND operator. // Compare this function with GrB_*_extractElement_* @@ -31,7 +35,9 @@ GrB_Info GB_setElement // set a single entry, C(row,col) = scalar ( GrB_Matrix C, // matrix to modify - void *scalar, // scalar to set + const GrB_BinaryOp accum, // if NULL: C(row,col) = scalar + // else: C(row,col) += scalar + const void *scalar, // scalar to set const GrB_Index row, // row index const GrB_Index col, // column index const GB_Type_code scalar_code, // type of the scalar @@ -74,6 +80,14 @@ GrB_Info GB_setElement // set a single entry, C(row,col) = scalar GB_code_string (scalar_code), ctype->name) ; } + if (accum != NULL) + { + // C and scalar must be compatible with the accum operator + GB_RETURN_IF_FAULTY_OR_POSITIONAL (accum) ; + GB_OK (GB_BinaryOp_compatible (accum, ctype, ctype, NULL, scalar_code, + Context)) ; + } + // pending tuples and zombies are expected, and C might be jumbled too ASSERT (GB_JUMBLED_OK (C)) ; ASSERT (GB_PENDING_OK (C)) ; @@ -110,9 +124,13 @@ GrB_Info GB_setElement // set a single entry, C(row,col) = scalar // typecast the scalar and compare with the iso value of C //---------------------------------------------------------------------- - // s = (ctype) scalar bool convert_to_non_iso ; - if (ctype != stype) + if (accum != NULL) + { + // C(i,j) += scalar always converts C to non-iso + convert_to_non_iso = true ; + } + else if (ctype != stype) { // s = (ctype) scalar GB_void s [GB_VLA(csize)] ; @@ -138,14 +156,14 @@ GrB_Info GB_setElement // set a single entry, C(row,col) = scalar } } - else if (GB_nnz (C) == 0 && !C_is_full && C->Pending == NULL) + else if (GB_nnz (C) == 0 && !C_is_full && C->Pending == NULL + && accum == NULL) { //---------------------------------------------------------------------- // C is empty: this is the first setElement, convert C to iso //---------------------------------------------------------------------- - // s = (ctype) scalar if (ctype != stype) { // s = (ctype) scalar @@ -234,14 +252,49 @@ GrB_Info GB_setElement // set a single entry, C(row,col) = scalar // C (i,j) found //---------------------------------------------------------------------- - // if not zombie: action: ( =A ): copy A into C - // else action: ( undelete ): bring a zombie back to life + // if not zombie: + // no accum: action: ( =A ): copy A into C + // with accum: action: ( C+=A ): accumulate A into C + // else action: ( undelete ): bring a zombie back to life + + int8_t cb = (C_is_bitmap) ? C->b [pleft] : 0 ; if (!C->iso) { - // typecast or copy the scalar into C(i,j) void *cx = ((GB_void *) C->x) + (pleft*csize) ; - GB_cast_scalar (cx, ccode, scalar, scalar_code, csize) ; + if (accum == NULL || is_zombie || (C_is_bitmap && cb == 0)) + { + // C(i,j) = (ctype) scalar + GB_cast_scalar (cx, ccode, scalar, scalar_code, csize) ; + } + else + { + // C(i,j) += scalar + GxB_binary_function faccum = accum->binop_function ; + + // TODO: no need to cast if types match + GB_cast_function cast_C_to_xaccum, cast_Z_to_yaccum, cast_zaccum_to_C ; + cast_C_to_xaccum = GB_cast_factory (accum->xtype->code, ctype->code) ; + cast_Z_to_yaccum = GB_cast_factory (accum->ytype->code, scalar_code) ; + cast_zaccum_to_C = GB_cast_factory (ctype->code, accum->ztype->code) ; + + // scalar workspace + GB_void xaccum [GB_VLA(accum->xtype->size)] ; + GB_void yaccum [GB_VLA(accum->ytype->size)] ; + GB_void zaccum [GB_VLA(accum->ztype->size)] ; + + // xaccum = (accum->xtype) cx + cast_C_to_xaccum (xaccum, cx, ctype->size) ; + + // yaccum = (accum->ytype) scalar + cast_Z_to_yaccum (yaccum, scalar, accum->ytype->size) ; + + // zaccum = xaccum "+" yaccum + faccum (zaccum, xaccum, yaccum) ; + + // cx = (ctype) zaccum + cast_zaccum_to_C (cx, zaccum, ctype->size) ; + } } if (is_zombie) @@ -253,7 +306,6 @@ GrB_Info GB_setElement // set a single entry, C(row,col) = scalar else if (C_is_bitmap) { // set the entry in the C bitmap - int8_t cb = C->b [pleft] ; C->nvals += (cb == 0) ; C->b [pleft] = 1 ; } @@ -295,12 +347,18 @@ GrB_Info GB_setElement // set a single entry, C(row,col) = scalar // pending tuples must be assembled first. wait = true ; } - else if (!GB_op_is_second (C->Pending->op, ctype)) + else if + ( + // the types match, now check the pending operator + ! ( + // the operators are the same + (accum == C->Pending->op) + // or both operators are SECOND_Ctype, implicit or explicit + || (GB_op_is_second (accum, ctype) && + GB_op_is_second (C->Pending->op, ctype)) + ) + ) { - // prior op is not SECOND: setElement uses an implicit - // SECOND_Ctype operator, which must match the operator of the - // prior pending tuples. If it doesn't match, prior pending - // tuples must be assembled first. wait = true ; } } @@ -323,9 +381,10 @@ GrB_Info GB_setElement // set a single entry, C(row,col) = scalar // repeat the search since the C(i,j) entry may have been in // the list of pending tuples. There are no longer any pending // tuples, so this recursion will only happen once. The - // pending operator will become the implicit SECOND_ctype, - // and the type of the pending tuples will become ctype. - return (GB_setElement (C, scalar, row, col, scalar_code, Context)) ; + // pending operator will become the implicit SECOND_ctype, or + // accum, and the type of the pending tuples will become stype. + return (GB_setElement (C, accum, scalar, row, col, scalar_code, + Context)) ; } else @@ -341,10 +400,10 @@ GrB_Info GB_setElement // set a single entry, C(row,col) = scalar // C (i,j) must be added to the list of pending tuples. // If this is the first pending tuple, then the type of pending // tuples becomes the type of this scalar, and the pending operator - // becomes NULL, which is the implicit SECOND_ctype operator. - + // becomes NULL, which is the implicit SECOND_ctype operator, + // or non-NULL if accum is present. if (!GB_Pending_add (&(C->Pending), C->iso, (GB_void *) scalar, - stype, NULL, i, j, C->vdim > 1, Context)) + stype, accum, i, j, C->vdim > 1, Context)) { // out of memory GB_phbix_free (C) ; @@ -355,7 +414,14 @@ GrB_Info GB_setElement // set a single entry, C(row,col) = scalar // if this was the first tuple, then the pending operator and // pending type have been defined - ASSERT (GB_op_is_second (C->Pending->op, ctype)) ; + if (accum == NULL) + { + ASSERT (GB_op_is_second (C->Pending->op, ctype)) ; + } + else + { + ASSERT (C->Pending->op == accum) ; + } ASSERT (C->Pending->type == stype) ; ASSERT (C->Pending->size == stype->size) ; diff --git a/GraphBLAS/Source/GB_subassign_IxJ_slice.c b/GraphBLAS/Source/GB_subassign_IxJ_slice.c index bf18514f38..ad13b09368 100644 --- a/GraphBLAS/Source/GB_subassign_IxJ_slice.c +++ b/GraphBLAS/Source/GB_subassign_IxJ_slice.c @@ -31,7 +31,7 @@ // M c r + - S 19: C(I,J) += x, with S // There are 10 methods that perform scalar assignment: the 6 listed above, and -// Methods 05, 07, 09, and 11. The latter 4 methods do do not need to iterate +// Methods 05, 07, 09, and 11. The latter 4 methods do not need to iterate // over the entire IxJ space, because of the mask M: // M - - - - - 05: C(I,J) = x diff --git a/GraphBLAS/Source/GB_subassign_scalar.c b/GraphBLAS/Source/GB_subassign_scalar.c index 3cdac1ecc8..89ce080c3c 100644 --- a/GraphBLAS/Source/GB_subassign_scalar.c +++ b/GraphBLAS/Source/GB_subassign_scalar.c @@ -34,13 +34,15 @@ GrB_Info GB_subassign_scalar // C(Rows,Cols) += x const GrB_Descriptor desc, // descriptor for C(Rows,Cols) and M GB_Context Context ) -{ +{ //-------------------------------------------------------------------------- // check inputs //-------------------------------------------------------------------------- GB_RETURN_IF_NULL (scalar) ; + GB_RETURN_IF_NULL (Rows) ; + GB_RETURN_IF_NULL (Cols) ; ASSERT (scalar_code <= GB_UDT_code) ; // get the descriptor @@ -54,17 +56,25 @@ GrB_Info GB_subassign_scalar // C(Rows,Cols) += x // C(Rows,Cols) = accum (C(Rows,Cols), scalar) //-------------------------------------------------------------------------- - return (GB_subassign ( - C, C_replace, // C matrix and its descriptor - M, Mask_comp, Mask_struct, // mask matrix and its descriptor - false, // do not transpose the mask - accum, // for accum (C(Rows,Cols),scalar) - NULL, false, // no explicit matrix A - Rows, nRows, // row indices - Cols, nCols, // column indices - true, // do scalar expansion - scalar, // scalar to assign, expands to become A - scalar_code, // type code of scalar to expand - Context)) ; + if (M == NULL && !Mask_comp && nRows == 1 && nCols == 1 && !C_replace) + { + // C(i,j) = scalar or C(i,j) += scalar + return (GB_setElement (C, accum, scalar, Rows [0], Cols [0], + scalar_code, Context)) ; + } + else + { + return (GB_subassign ( + C, C_replace, // C matrix and its descriptor + M, Mask_comp, Mask_struct, // mask matrix and its descriptor + false, // do not transpose the mask + accum, // for accum (C(Rows,Cols),scalar) + NULL, false, // no explicit matrix A + Rows, nRows, // row indices + Cols, nCols, // column indices + true, // do scalar expansion + scalar, // scalar to assign, expands to become A + scalar_code, // type code of scalar to expand + Context)) ; + } } - diff --git a/GraphBLAS/Source/GB_transpose.c b/GraphBLAS/Source/GB_transpose.c index 2d022fddac..8c2f78baad 100644 --- a/GraphBLAS/Source/GB_transpose.c +++ b/GraphBLAS/Source/GB_transpose.c @@ -267,6 +267,7 @@ GrB_Info GB_transpose // C=A', C=(ctype)A' or C=op(A') // with all entries present, no zombies, no pending tuples, and not // jumbled). T = A' is either bitmap or full. + GBURBLE ("(bitmap/full transpose) ") ; int T_sparsity = (A_is_bitmap) ? GxB_BITMAP : GxB_FULL ; bool T_cheap = // T can be done quickly if: (avlen == 1 || avdim == 1) // A is a row or column vector, @@ -351,6 +352,7 @@ GrB_Info GB_transpose // C=A', C=(ctype)A' or C=op(A') // transpose a vector (avlen-by-1) into a "row" matrix (1-by-avlen). // A must be sorted first. + GBURBLE ("(sparse vector transpose (a)) ") ; ASSERT_MATRIX_OK (A, "the vector A must already be sorted", GB0) ; ASSERT (!GB_JUMBLED (A)) ; @@ -464,6 +466,7 @@ GrB_Info GB_transpose // C=A', C=(ctype)A' or C=op(A') // transpose a "row" matrix (1-by-avdim) into a vector (avdim-by-1). // if A->vlen is 1, all vectors of A are implicitly sorted + GBURBLE ("(sparse vector transpose (b)) ") ; ASSERT_MATRIX_OK (A, "1-by-n input A already sorted", GB0) ; //---------------------------------------------------------------------- @@ -702,6 +705,8 @@ GrB_Info GB_transpose // C=A', C=(ctype)A' or C=op(A') // transpose via GB_builder //------------------------------------------------------------------ + GBURBLE ("(builder transpose) ") ; + //------------------------------------------------------------------ // allocate and create iwork //------------------------------------------------------------------ diff --git a/GraphBLAS/Source/GB_transpose_bucket.c b/GraphBLAS/Source/GB_transpose_bucket.c index b01e3c5aaa..761d74a4d6 100644 --- a/GraphBLAS/Source/GB_transpose_bucket.c +++ b/GraphBLAS/Source/GB_transpose_bucket.c @@ -179,6 +179,7 @@ GrB_Info GB_transpose_bucket // bucket transpose; typecast and apply op // Only requires a single int64 workspace of size vlen for a single // thread. The resulting C matrix is not jumbled. + GBURBLE ("(1-thread bucket transpose) ") ; // compute the row counts of A. No need to scan the A->p pointers ASSERT (nworkspaces == 1) ; @@ -210,6 +211,8 @@ GrB_Info GB_transpose_bucket // bucket transpose; typecast and apply op // typically faster than the non-atomic method. The resulting C matrix // is jumbled. + GBURBLE ("(%d-thread atomic bucket transpose) ", nthreads) ; + // compute the row counts of A. No need to scan the A->p pointers int64_t *restrict workspace = Workspaces [0] ; GB_memset (workspace, 0, (vlen + 1) * sizeof (int64_t), nth) ; @@ -244,6 +247,8 @@ GrB_Info GB_transpose_bucket // bucket transpose; typecast and apply op // resulting C matrix is not jumbled, so this can save work if C needs // to be unjumbled later. + GBURBLE ("(%d-thread non-atomic bucket transpose) ", nthreads) ; + ASSERT (nworkspaces == nthreads) ; const int64_t *restrict Ap = A->p ; const int64_t *restrict Ah = A->h ; diff --git a/GraphBLAS/Source/GB_transpose_method.c b/GraphBLAS/Source/GB_transpose_method.c index cf1f2eed29..87de866fd0 100644 --- a/GraphBLAS/Source/GB_transpose_method.c +++ b/GraphBLAS/Source/GB_transpose_method.c @@ -44,9 +44,10 @@ bool GB_transpose_method // if true: use GB_builder, false: use bucket //-------------------------------------------------------------------------- bool atomics ; - if (nthreads == 1) + if (nthreads <= 2) { - // sequential bucket method, no atomics needed + // sequential bucket method: no atomics needed + // 2 threads: always use non-atomic method atomics = false ; } else if ((double) nthreads * (double) avlen > (double) anz) @@ -70,7 +71,7 @@ bool GB_transpose_method // if true: use GB_builder, false: use bucket { switch (anzlog) { - case 14: atol = -4 ; break ; // 16K entried in A + case 14: atol = -4 ; break ; // 16K entries in A case 15: atol = -3 ; break ; // 32K case 16: atol = -2 ; break ; // 64K case 17: atol = -1 ; break ; // 128K @@ -159,6 +160,7 @@ bool GB_transpose_method // if true: use GB_builder, false: use bucket // select the method with the least amount of work //-------------------------------------------------------------------------- - return (builder_work < bucket_work) ; + bool use_builder = (builder_work < bucket_work) ; + return (use_builder) ; } diff --git a/GraphBLAS/Source/GrB_Matrix_assign_scalar.c b/GraphBLAS/Source/GrB_Matrix_assign_scalar.c index 6418d98b5e..e4d8aa6936 100644 --- a/GraphBLAS/Source/GrB_Matrix_assign_scalar.c +++ b/GraphBLAS/Source/GrB_Matrix_assign_scalar.c @@ -106,6 +106,8 @@ GrB_Info GrB_Matrix_assign_Scalar // C(I,J) = accum (C(I,J),s) GB_RETURN_IF_NULL_OR_FAULTY (C) ; GB_RETURN_IF_NULL_OR_FAULTY (scalar) ; GB_RETURN_IF_FAULTY (M_in) ; + GB_RETURN_IF_NULL (I) ; + GB_RETURN_IF_NULL (J) ; // get the descriptor GB_GET_DESCRIPTOR (info, desc, C_replace, Mask_comp, Mask_struct, @@ -120,7 +122,30 @@ GrB_Info GrB_Matrix_assign_Scalar // C(I,J) = accum (C(I,J),s) GrB_Index nvals ; GB_OK (GB_nvals (&nvals, (GrB_Matrix) scalar, Context)) ; - if (nvals == 1) + + if (M == NULL && !Mask_comp && ni == 1 && nj == 1 && !C_replace) + { + + //---------------------------------------------------------------------- + // scalar assignment + //---------------------------------------------------------------------- + + const GrB_Index row = I [0] ; + const GrB_Index col = J [0] ; + if (nvals == 1) + { + // set the element: C(row,col) += scalar or C(row,col) = scalar + info = GB_setElement (C, accum, scalar->x, row, col, + scalar->type->code, Context) ; + } + else if (accum == NULL) + { + // delete the C(row,col) element + info = GB_Matrix_removeElement (C, row, col, Context) ; + } + + } + else if (nvals == 1) { //---------------------------------------------------------------------- @@ -129,7 +154,7 @@ GrB_Info GrB_Matrix_assign_Scalar // C(I,J) = accum (C(I,J),s) // This is identical to non-opaque scalar assignment - info = (GB_assign ( + info = GB_assign ( C, C_replace, // C matrix and its descriptor M, Mask_comp, Mask_struct, // mask matrix and its descriptor false, // do not transpose the mask @@ -141,7 +166,7 @@ GrB_Info GrB_Matrix_assign_Scalar // C(I,J) = accum (C(I,J),s) scalar->x, // scalar to assign, expands to become A scalar->type->code, // type code of scalar to expand GB_ASSIGN, - Context)) ; + Context) ; } else diff --git a/GraphBLAS/Source/GrB_Matrix_diag.c b/GraphBLAS/Source/GrB_Matrix_diag.c index 8d40d9e1d2..a246e86b4f 100644 --- a/GraphBLAS/Source/GrB_Matrix_diag.c +++ b/GraphBLAS/Source/GrB_Matrix_diag.c @@ -7,32 +7,38 @@ //------------------------------------------------------------------------------ -// Identical to GxB_Matrix_diag (C, v, k, NULL) +// Similar to GxB_Matrix_diag (C, v, k, NULL), except that C is constructed +// as a new matrix, like GrB_Matrix_new. C has the same type as v. #include "GB_diag.h" -GrB_Info GrB_Matrix_diag // construct a diagonal matrix from a vector +GrB_Info GrB_Matrix_diag // construct a diagonal matrix from a vector ( - GrB_Matrix C, // output matrix - const GrB_Vector v, // input vector + GrB_Matrix *C, // output matrix + const GrB_Vector v, // input vector int64_t k ) -{ +{ //-------------------------------------------------------------------------- // check inputs //-------------------------------------------------------------------------- - GB_WHERE (C, "GrB_Matrix_diag (C, v, k)") ; + GB_WHERE1 ("GrB_Matrix_diag (&C, v, k)") ; GB_BURBLE_START ("GrB_Matrix_diag") ; - GB_RETURN_IF_NULL_OR_FAULTY (C) ; GB_RETURN_IF_NULL_OR_FAULTY (v) ; //-------------------------------------------------------------------------- - // C = diag (v,0) + // C = diag (v,k) //-------------------------------------------------------------------------- - GrB_Info info = GB_Matrix_diag (C, (GrB_Matrix) v, k, Context) ; + GrB_Index n = v->vlen + GB_IABS (k) ; + GrB_Info info = GB_Matrix_new (C, v->type, n, n, Context) ; + if (info == GrB_SUCCESS) + { + info = GB_Matrix_diag (*C, (GrB_Matrix) v, k, Context) ; + } + GB_BURBLE_END ; return (info) ; } diff --git a/GraphBLAS/Source/GrB_Matrix_new.c b/GraphBLAS/Source/GrB_Matrix_new.c index cb891ece03..f39f7f9142 100644 --- a/GraphBLAS/Source/GrB_Matrix_new.c +++ b/GraphBLAS/Source/GrB_Matrix_new.c @@ -7,10 +7,7 @@ //------------------------------------------------------------------------------ -// The new matrix is nrows-by-ncols, with no entries in it. Default format for -// an empty matrix is hypersparse CSC: A->p is size 2 and all zero, A->h is -// size 1, A->plen is 1, and contents A->x and A->i are NULL. If this method -// fails, *A is set to NULL. +// The new matrix is nrows-by-ncols, with no entries in it. #include "GB.h" @@ -22,60 +19,7 @@ GrB_Info GrB_Matrix_new // create a new matrix with no entries GrB_Index ncols ) { - - //-------------------------------------------------------------------------- - // check inputs - //-------------------------------------------------------------------------- - GB_WHERE1 ("GrB_Matrix_new (&A, type, nrows, ncols)") ; - GB_RETURN_IF_NULL (A) ; - (*A) = NULL ; - GB_RETURN_IF_NULL_OR_FAULTY (type) ; - - if (nrows > GB_NMAX || ncols > GB_NMAX) - { - // problem too large - return (GrB_INVALID_VALUE) ; - } - - //-------------------------------------------------------------------------- - // create the matrix - //-------------------------------------------------------------------------- - - GrB_Info info ; - int64_t vlen, vdim ; - - bool A_is_csc ; - if (ncols == 1) - { - // n-by-1 matrices are always held by column, including 1-by-1 - A_is_csc = true ; - } - else if (nrows == 1) - { - // 1-by-n matrices (except 1-by-1) are always held by row - A_is_csc = false ; - } - else - { - // m-by-n (including 0-by-0) with m != and n != use the global setting - A_is_csc = GB_Global_is_csc_get ( ) ; - } - - if (A_is_csc) - { - vlen = (int64_t) nrows ; - vdim = (int64_t) ncols ; - } - else - { - vlen = (int64_t) ncols ; - vdim = (int64_t) nrows ; - } - - info = GB_new (A, // auto sparsity, new header - type, vlen, vdim, GB_Ap_calloc, A_is_csc, GxB_AUTO_SPARSITY, - GB_Global_hyper_switch_get ( ), 1, Context) ; - return (info) ; + return (GB_Matrix_new (A, type, nrows, ncols, Context)) ; } diff --git a/GraphBLAS/Source/GrB_Matrix_setElement.c b/GraphBLAS/Source/GrB_Matrix_setElement.c index 09881a158d..187f313c0c 100644 --- a/GraphBLAS/Source/GrB_Matrix_setElement.c +++ b/GraphBLAS/Source/GrB_Matrix_setElement.c @@ -24,8 +24,8 @@ GrB_Info GB_EVAL3 (prefix, _Matrix_setElement_, T) /* C (row,col) = x */ \ GB_WHERE (C, GB_STR(prefix) "_Matrix_setElement_" GB_STR(T) \ " (C, row, col, x)") ; \ GB_RETURN_IF_NULL_OR_FAULTY (C) ; \ - return (GB_setElement (C, ampersand x, row, col, GB_ ## T ## _code, \ - Context)) ; \ + return (GB_setElement (C, NULL, ampersand x, row, col, \ + GB_ ## T ## _code, Context)) ; \ } GB_SET (GrB, bool , BOOL , &) @@ -75,8 +75,8 @@ GrB_Info GrB_Matrix_setElement_Scalar if (GB_nnz ((GrB_Matrix) scalar) > 0) { // set the element: C(row,col) = scalar - return (GB_setElement (C, scalar->x, row, col, scalar->type->code, - Context)) ; + return (GB_setElement (C, NULL, scalar->x, row, col, + scalar->type->code, Context)) ; } else { diff --git a/GraphBLAS/Source/GrB_Scalar_setElement.c b/GraphBLAS/Source/GrB_Scalar_setElement.c index c379515bdb..fb47408b22 100644 --- a/GraphBLAS/Source/GrB_Scalar_setElement.c +++ b/GraphBLAS/Source/GrB_Scalar_setElement.c @@ -22,7 +22,7 @@ GrB_Info GB_EVAL2 (GRB (Scalar_setElement_), T) /* s = x */ \ GB_WHERE (s, "GrB_Scalar_setElement_" GB_STR(T) " (w, x)") ; \ GB_RETURN_IF_NULL_OR_FAULTY (s) ; \ ASSERT (GB_SCALAR_OK (s)) ; \ - return (GB_setElement ((GrB_Matrix) s, ampersand x, 0, 0, \ + return (GB_setElement ((GrB_Matrix) s, NULL, ampersand x, 0, 0, \ GB_ ## T ## _code, Context)) ; \ } @@ -53,7 +53,7 @@ GrB_Info GB_EVAL2 (GXB (Scalar_setElement_), T) /* s = x */ \ GB_WHERE (s, "GxB_Scalar_setElement_" GB_STR(T) " (w, x)") ; \ GB_RETURN_IF_NULL_OR_FAULTY (s) ; \ ASSERT (GB_SCALAR_OK (s)) ; \ - return (GB_setElement ((GrB_Matrix) s, ampersand x, 0, 0, \ + return (GB_setElement ((GrB_Matrix) s, NULL, ampersand x, 0, 0, \ GB_ ## T ## _code, Context)) ; \ } diff --git a/GraphBLAS/Source/GrB_Vector_assign_scalar.c b/GraphBLAS/Source/GrB_Vector_assign_scalar.c index aeeefe69ad..15c59713b2 100644 --- a/GraphBLAS/Source/GrB_Vector_assign_scalar.c +++ b/GraphBLAS/Source/GrB_Vector_assign_scalar.c @@ -97,6 +97,7 @@ GrB_Info GrB_Vector_assign_Scalar // w(I) = accum (w(I),s) GB_RETURN_IF_NULL_OR_FAULTY (w) ; GB_RETURN_IF_NULL_OR_FAULTY (scalar) ; GB_RETURN_IF_FAULTY (M_in) ; + GB_RETURN_IF_NULL (I) ; ASSERT (GB_VECTOR_OK (w)) ; ASSERT (M_in == NULL || GB_VECTOR_OK (M_in)) ; @@ -113,7 +114,29 @@ GrB_Info GrB_Vector_assign_Scalar // w(I) = accum (w(I),s) GrB_Index nvals ; GB_OK (GB_nvals (&nvals, (GrB_Matrix) scalar, Context)) ; - if (nvals == 1) + + if (M == NULL && !Mask_comp && ni == 1 && !C_replace) + { + + //---------------------------------------------------------------------- + // scalar assignment + //---------------------------------------------------------------------- + + const GrB_Index row = I [0] ; + if (nvals == 1) + { + // set the element: w(row) += scalar or w(row) = scalar + info = GB_setElement ((GrB_Matrix) w, accum, scalar->x, row, 0, + scalar->type->code, Context) ; + } + else if (accum == NULL) + { + // delete the w(row) element + info = GB_Vector_removeElement (w, row, Context) ; + } + + } + else if (nvals == 1) { //---------------------------------------------------------------------- @@ -122,7 +145,7 @@ GrB_Info GrB_Vector_assign_Scalar // w(I) = accum (w(I),s) // This is identical to non-opaque scalar subassignment - info = (GB_assign ( + info = GB_assign ( (GrB_Matrix) w, C_replace, // w vector and its descriptor M, Mask_comp, Mask_struct, // mask vector and its descriptor false, // do not transpose the mask @@ -134,7 +157,7 @@ GrB_Info GrB_Vector_assign_Scalar // w(I) = accum (w(I),s) scalar->x, // scalar to assign, expands to become u scalar->type->code, // type code of scalar to expand GB_ASSIGN, - Context)) ; + Context) ; } else diff --git a/GraphBLAS/Source/GrB_Vector_setElement.c b/GraphBLAS/Source/GrB_Vector_setElement.c index b8c9de3685..d9b09c7d01 100644 --- a/GraphBLAS/Source/GrB_Vector_setElement.c +++ b/GraphBLAS/Source/GrB_Vector_setElement.c @@ -23,7 +23,7 @@ GrB_Info GB_EVAL3 (prefix, _Vector_setElement_, T) /* w(row) = x */ \ GB_WHERE (w, "GrB_Vector_setElement_" GB_STR(T) " (w, x, row)") ; \ GB_RETURN_IF_NULL_OR_FAULTY (w) ; \ ASSERT (GB_VECTOR_OK (w)) ; \ - return (GB_setElement ((GrB_Matrix) w, ampersand x, row, 0, \ + return (GB_setElement ((GrB_Matrix) w, NULL, ampersand x, row, 0, \ GB_ ## T ## _code, Context)) ; \ } @@ -74,7 +74,7 @@ GrB_Info GrB_Vector_setElement_Scalar if (GB_nnz ((GrB_Matrix) scalar) > 0) { // set the element: w(row) = scalar - return (GB_setElement ((GrB_Matrix) w, scalar->x, row, 0, + return (GB_setElement ((GrB_Matrix) w, NULL, scalar->x, row, 0, scalar->type->code, Context)) ; } else diff --git a/GraphBLAS/Source/GxB_Matrix_diag.c b/GraphBLAS/Source/GxB_Matrix_diag.c index 55bafcbcfb..fef4788748 100644 --- a/GraphBLAS/Source/GxB_Matrix_diag.c +++ b/GraphBLAS/Source/GxB_Matrix_diag.c @@ -1,5 +1,5 @@ //------------------------------------------------------------------------------ -// GxB_Matrix_diag: construct a diagonal matrix from a vector +// GxB_Matrix_diag: build a diagonal matrix from a vector //------------------------------------------------------------------------------ // SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2022, All Rights Reserved. @@ -9,12 +9,12 @@ #include "GB_diag.h" -GrB_Info GxB_Matrix_diag // construct a diagonal matrix from a vector +GrB_Info GxB_Matrix_diag // build a diagonal matrix from a vector ( - GrB_Matrix C, // output matrix - const GrB_Vector v, // input vector + GrB_Matrix C, // output matrix + const GrB_Vector v, // input vector int64_t k, - const GrB_Descriptor desc // unused, except threading control + const GrB_Descriptor desc // unused, except threading control ) { @@ -27,6 +27,26 @@ GrB_Info GxB_Matrix_diag // construct a diagonal matrix from a vector GB_RETURN_IF_NULL_OR_FAULTY (C) ; GB_RETURN_IF_NULL_OR_FAULTY (v) ; + GrB_Type ctype = C->type ; + GrB_Type vtype = v->type ; + int64_t nrows = GB_NROWS (C) ; + int64_t ncols = GB_NCOLS (C) ; + int64_t n = v->vlen + GB_IABS (k) ; // C must be n-by-n + + if (nrows != ncols || nrows != n) + { + GB_ERROR (GrB_DIMENSION_MISMATCH, + "Input matrix is " GBd "-by-" GBd " but must be " + GBd "-by-" GBd "\n", nrows, ncols, n, n) ; + } + + if (!GB_Type_compatible (ctype, vtype)) + { + GB_ERROR (GrB_DOMAIN_MISMATCH, "Input vector of type [%s] " + "cannot be typecast to output of type [%s]\n", + vtype->name, ctype->name) ; + } + // get the descriptor GB_GET_DESCRIPTOR (info, desc, xx1, xx2, xx3, xx4, xx5, xx6, xx7) ; diff --git a/GraphBLAS/Source/GxB_Matrix_subassign_scalar.c b/GraphBLAS/Source/GxB_Matrix_subassign_scalar.c index 77c77e8074..46a2ce9d3b 100644 --- a/GraphBLAS/Source/GxB_Matrix_subassign_scalar.c +++ b/GraphBLAS/Source/GxB_Matrix_subassign_scalar.c @@ -104,6 +104,8 @@ GrB_Info GxB_Matrix_subassign_Scalar // C(I,J) = accum (C(I,J),s) GB_RETURN_IF_NULL_OR_FAULTY (C) ; GB_RETURN_IF_NULL_OR_FAULTY (scalar) ; GB_RETURN_IF_FAULTY (M_in) ; + GB_RETURN_IF_NULL (I) ; + GB_RETURN_IF_NULL (J) ; // get the descriptor GB_GET_DESCRIPTOR (info, desc, C_replace, Mask_comp, Mask_struct, @@ -118,7 +120,30 @@ GrB_Info GxB_Matrix_subassign_Scalar // C(I,J) = accum (C(I,J),s) GrB_Index nvals ; GB_OK (GB_nvals (&nvals, (GrB_Matrix) scalar, Context)) ; - if (nvals == 1) + + if (M == NULL && !Mask_comp && ni == 1 && nj == 1 && !C_replace) + { + + //---------------------------------------------------------------------- + // scalar assignment + //---------------------------------------------------------------------- + + const GrB_Index row = I [0] ; + const GrB_Index col = J [0] ; + if (nvals == 1) + { + // set the element: C(row,col) += scalar or C(row,col) = scalar + info = GB_setElement (C, accum, scalar->x, row, col, + scalar->type->code, Context) ; + } + else if (accum == NULL) + { + // delete the C(row,col) element + info = GB_Matrix_removeElement (C, row, col, Context) ; + } + + } + else if (nvals == 1) { //---------------------------------------------------------------------- @@ -127,7 +152,7 @@ GrB_Info GxB_Matrix_subassign_Scalar // C(I,J) = accum (C(I,J),s) // This is identical to non-opaque scalar subassignment - info = (GB_subassign ( + info = GB_subassign ( C, C_replace, // C matrix and its descriptor M, Mask_comp, Mask_struct, // mask matrix and its descriptor false, // do not transpose the mask @@ -138,7 +163,7 @@ GrB_Info GxB_Matrix_subassign_Scalar // C(I,J) = accum (C(I,J),s) true, // do scalar expansion scalar->x, // scalar to assign, expands to become A scalar->type->code, // type code of scalar to expand - Context)) ; + Context) ; } else diff --git a/GraphBLAS/Source/GxB_Vector_Iterator.c b/GraphBLAS/Source/GxB_Vector_Iterator.c index 5f206c98e7..fde036ce20 100644 --- a/GraphBLAS/Source/GxB_Vector_Iterator.c +++ b/GraphBLAS/Source/GxB_Vector_Iterator.c @@ -28,7 +28,11 @@ GrB_Info GxB_Vector_Iterator_attach desc)) ; } -GrB_Info GB_Vector_Iterator_bitmap_seek (GxB_Iterator iterator, GrB_Index p) +GrB_Info GB_Vector_Iterator_bitmap_seek +( + GxB_Iterator iterator, + GrB_Index unused // note: unused parameter to be removed in v8.x +) { for ( ; iterator->p < iterator->pmax ; iterator->p++) { diff --git a/GraphBLAS/Source/GxB_Vector_subassign_scalar.c b/GraphBLAS/Source/GxB_Vector_subassign_scalar.c index 6d9864ecc2..8e75579c4f 100644 --- a/GraphBLAS/Source/GxB_Vector_subassign_scalar.c +++ b/GraphBLAS/Source/GxB_Vector_subassign_scalar.c @@ -37,9 +37,9 @@ GrB_Info GB_EVAL2 (GXB (Vector_subassign_), T) /* w(I) = accum (w(I),x) */ \ GB_RETURN_IF_FAULTY (M) ; \ ASSERT (GB_VECTOR_OK (w)) ; \ ASSERT (GB_IMPLIES (M != NULL, GB_VECTOR_OK (M))) ; \ - GrB_Info info = (GB_subassign_scalar ((GrB_Matrix) w, (GrB_Matrix) M, \ + GrB_Info info = GB_subassign_scalar ((GrB_Matrix) w, (GrB_Matrix) M, \ accum, ampersand x, GB_## T ## _code, Rows, nRows, GrB_ALL, 1, desc, \ - Context)) ; \ + Context) ; \ GB_BURBLE_END ; \ return (info) ; \ } @@ -99,6 +99,7 @@ GrB_Info GxB_Vector_subassign_Scalar // w(I) = accum (w(I),s) GB_RETURN_IF_NULL_OR_FAULTY (w) ; GB_RETURN_IF_NULL_OR_FAULTY (scalar) ; GB_RETURN_IF_FAULTY (M_in) ; + GB_RETURN_IF_NULL (I) ; ASSERT (GB_VECTOR_OK (w)) ; ASSERT (M_in == NULL || GB_VECTOR_OK (M_in)) ; @@ -115,7 +116,29 @@ GrB_Info GxB_Vector_subassign_Scalar // w(I) = accum (w(I),s) GrB_Index nvals ; GB_OK (GB_nvals (&nvals, (GrB_Matrix) scalar, Context)) ; - if (nvals == 1) + + if (M == NULL && !Mask_comp && ni == 1 && !C_replace) + { + + //---------------------------------------------------------------------- + // scalar assignment + //---------------------------------------------------------------------- + + const GrB_Index row = I [0] ; + if (nvals == 1) + { + // set the element: w(row) += scalar or w(wrow) = scalar + info = GB_setElement ((GrB_Matrix) w, accum, scalar->x, row, 0, + scalar->type->code, Context) ; + } + else if (accum == NULL) + { + // delete the w(row) element + info = GB_Vector_removeElement (w, row, Context) ; + } + + } + else if (nvals == 1) { //---------------------------------------------------------------------- @@ -124,7 +147,7 @@ GrB_Info GxB_Vector_subassign_Scalar // w(I) = accum (w(I),s) // This is identical to non-opaque scalar assignment - info = (GB_subassign ( + info = GB_subassign ( (GrB_Matrix) w, C_replace, // w vector and its descriptor M, Mask_comp, Mask_struct, // mask vector and its descriptor false, // do not transpose the mask @@ -135,7 +158,7 @@ GrB_Info GxB_Vector_subassign_Scalar // w(I) = accum (w(I),s) true, // do scalar expansion scalar->x, // scalar to assign, expands to become u scalar->type->code, // type code of scalar to expand - Context)) ; + Context) ; } else diff --git a/GraphBLAS/Tcov/log_Apr8_2022.txt b/GraphBLAS/Tcov/log_Apr8_2022.txt new file mode 100644 index 0000000000..dfad223721 --- /dev/null +++ b/GraphBLAS/Tcov/log_Apr8_2022.txt @@ -0,0 +1,144 @@ + +---------------------------------------------- [malloc] [cover] +08-Apr 15:28:34 test243 20.5 sec 206: 19624 of 19830 1.0% 10.06/sec +08-Apr 15:29:15 test242 41.2 sec 310: 19314 of 19830 2.6% 7.52/sec +08-Apr 15:29:15 test241 0.3 sec 144: 19170 of 19830 3.3% 540.39/sec +08-Apr 15:30:48 testca 92.1 sec 496: 18674 of 19830 5.8% 5.38/sec +08-Apr 15:30:48 test240 0.3 sec 19: 18655 of 19830 5.9% 61.23/sec +08-Apr 15:30:48 test240 0.2 sec 5: 18650 of 19830 6.0% 22.53/sec +08-Apr 15:32:36 testca 107.8 sec 12: 18638 of 19830 6.0% 0.11/sec +08-Apr 15:32:52 test238 16.2 sec 161: 18477 of 19830 6.8% 9.95/sec +08-Apr 15:32:53 test237 1.2 sec 3: 18474 of 19830 6.8% 2.55/sec +08-Apr 15:32:57 test236 3.7 sec 109: 18365 of 19830 7.4% 29.13/sec +08-Apr 15:33:03 test192 5.8 sec 46: 18319 of 19830 7.6% 7.93/sec +08-Apr 15:33:16 test191 12.9 sec 63: 18256 of 19830 7.9% 4.90/sec +08-Apr 15:34:09 test188 53.3 sec 348: 17908 of 19830 9.7% 6.52/sec +08-Apr 15:34:18 test187 9.1 sec 20: 17888 of 19830 9.8% 2.20/sec +08-Apr 15:34:19 test186 0.4 sec 50: 17838 of 19830 10.0% 112.03/sec +08-Apr 15:34:19 test186 0.4 sec 10: 17828 of 19830 10.1% 27.04/sec +08-Apr 15:36:10 test185 110.9 sec 40: 17788 of 19830 10.3% 0.36/sec +08-Apr 15:36:13 test184 3.3 sec 51: 17737 of 19830 10.6% 15.37/sec +08-Apr 15:36:35 test181 21.4 sec 102: 17635 of 19830 11.1% 4.77/sec +08-Apr 15:36:43 test180 8.6 sec 213: 17422 of 19830 12.1% 24.67/sec +08-Apr 15:37:00 test180 16.8 sec 11: 17411 of 19830 12.2% 0.65/sec +08-Apr 15:37:00 test150 0.1 sec 41: 17370 of 19830 12.4% 371.60/sec +08-Apr 15:37:12 test14 12.1 sec 710: 16660 of 19830 16.0% 58.64/sec +08-Apr 15:39:02 test154 109.4 sec 1906: 14754 of 19830 25.6% 17.42/sec +08-Apr 15:39:32 test151b 30.5 sec 235: 14519 of 19830 26.8% 7.71/sec +08-Apr 15:39:32 test239 0.0 sec 14: 14505 of 19830 26.9% 506.46/sec +08-Apr 15:48:14 test74 521.9 sec 5982: 8523 of 19830 57.0% 11.46/sec +08-Apr 15:48:14 test235 0.0 sec 4: 8519 of 19830 57.0% 99.49/sec +08-Apr 15:49:41 test234 87.0 sec 454: 8065 of 19830 59.3% 5.22/sec +08-Apr 15:49:49 test233 7.9 sec 3: 8062 of 19830 59.3% 0.38/sec +08-Apr 15:49:51 test232 2.2 sec 57: 8005 of 19830 59.6% 25.97/sec +08-Apr 15:56:57 test231 426.0 sec 701: 7304 of 19830 63.2% 1.65/sec +08-Apr 15:58:25 test230 88.0 sec 118: 7186 of 19830 63.8% 1.34/sec +08-Apr 15:58:26 test229 0.7 sec 8: 7178 of 19830 63.8% 11.79/sec +08-Apr 15:58:32 test228 5.8 sec 46: 7132 of 19830 64.0% 7.99/sec +08-Apr 15:58:38 test227 5.8 sec 38: 7094 of 19830 64.2% 6.56/sec +08-Apr 15:58:38 test226 0.0 sec 6: 7088 of 19830 64.3% 476.23/sec +08-Apr 15:58:38 test225 0.2 sec 4: 7084 of 19830 64.3% 26.55/sec +08-Apr 15:58:43 test224 4.8 sec 73: 7011 of 19830 64.6% 15.36/sec +08-Apr 15:58:43 test223 0.0 sec 2: 7009 of 19830 64.7% 68.31/sec +08-Apr 15:58:43 test222 0.1 sec 11: 6998 of 19830 64.7% 143.80/sec +08-Apr 15:58:43 test221 0.0 sec 2: 6996 of 19830 64.7% 236.49/sec +08-Apr 15:58:43 test220 0.0 sec 5: 6991 of 19830 64.7% 152.66/sec +08-Apr 15:58:43 test219 0.0 sec 4: 6987 of 19830 64.8% 552.79/sec +08-Apr 15:58:43 test217 0.0 sec 4: 6983 of 19830 64.8% 306.09/sec +08-Apr 15:58:43 test216 0.1 sec 11: 6972 of 19830 64.8% 133.54/sec +08-Apr 15:58:45 test215 2.5 sec 1: 6971 of 19830 64.8% 0.39/sec +08-Apr 15:58:45 test214 0.0 sec 1: 6970 of 19830 64.9% 139.14/sec +08-Apr 15:58:45 test213 0.0 sec 5: 6965 of 19830 64.9% 778.33/sec +08-Apr 15:58:45 test212 0.1 sec 4: 6961 of 19830 64.9% 47.81/sec +08-Apr 15:58:45 test211 0.0 sec 12: 6949 of 19830 65.0% 581.87/sec +08-Apr 15:58:45 test210 0.0 sec 2: 6947 of 19830 65.0% 689.89/sec +08-Apr 15:58:51 test209 5.3 sec 24: 6923 of 19830 65.1% 4.55/sec +08-Apr 15:58:51 test208 0.0 sec 5: 6918 of 19830 65.1% 246.62/sec +08-Apr 15:58:51 test207 0.1 sec 8: 6910 of 19830 65.2% 108.95/sec +08-Apr 15:58:54 test206 2.8 sec 12: 6898 of 19830 65.2% 4.28/sec +08-Apr 15:58:54 test204 0.1 sec 10: 6888 of 19830 65.3% 74.71/sec +08-Apr 15:58:54 test203 0.0 sec 5: 6883 of 19830 65.3% 1156.34/sec +08-Apr 15:58:54 test202 0.0 sec 8: 6875 of 19830 65.3% 918.91/sec +08-Apr 15:58:54 test201 0.0 sec 7: 6868 of 19830 65.4% 967.65/sec +08-Apr 15:58:57 test200 3.1 sec 7: 6861 of 19830 65.4% 2.29/sec +08-Apr 15:58:57 test199 0.0 sec 1: 6860 of 19830 65.4% 197.94/sec +08-Apr 15:58:57 test198 0.1 sec 4: 6856 of 19830 65.4% 56.70/sec +08-Apr 15:58:58 test197 0.7 sec 1: 6855 of 19830 65.4% 1.40/sec +08-Apr 15:59:00 test196 2.3 sec 15: 6840 of 19830 65.5% 6.50/sec +08-Apr 16:01:20 test195 139.9 sec 79: 6761 of 19830 65.9% 0.56/sec +08-Apr 16:02:24 test194 64.5 sec 124: 6637 of 19830 66.5% 1.92/sec +08-Apr 16:02:43 test193 18.2 sec 6: 6631 of 19830 66.6% 0.33/sec +08-Apr 16:02:51 test189 8.1 sec 10: 6621 of 19830 66.6% 1.24/sec +08-Apr 16:02:51 test183 0.0 sec 4: 6617 of 19830 66.6% 154.36/sec +08-Apr 16:02:52 test182 0.9 sec 9: 6608 of 19830 66.7% 10.15/sec +08-Apr 16:02:52 test179 0.1 sec 18: 6590 of 19830 66.8% 309.06/sec +08-Apr 16:02:52 test165 0.0 sec 3: 6587 of 19830 66.8% 516.80/sec +08-Apr 16:02:53 test01 0.9 sec 751: 5836 of 19830 70.6% 827.13/sec +08-Apr 16:02:53 test83 0.0 sec 1: 5835 of 19830 70.6% 277.78/sec +08-Apr 16:02:53 test176 0.2 sec 7: 5828 of 19830 70.6% 40.04/sec +08-Apr 16:02:53 test174 0.0 sec 9: 5819 of 19830 70.7% 320.91/sec +08-Apr 16:02:53 test170 0.1 sec 1: 5818 of 19830 70.7% 13.87/sec +08-Apr 16:02:54 test152 0.8 sec 405: 5413 of 19830 72.7% 536.46/sec +08-Apr 16:02:54 test155 0.1 sec 13: 5400 of 19830 72.8% 99.85/sec +08-Apr 16:02:55 test156 0.7 sec 2: 5398 of 19830 72.8% 2.76/sec +08-Apr 16:02:55 test136 0.0 sec 21: 5377 of 19830 72.9% 466.47/sec +08-Apr 16:02:55 test02 0.2 sec 133: 5244 of 19830 73.6% 618.52/sec +08-Apr 16:02:55 test109 0.1 sec 2: 5242 of 19830 73.6% 25.43/sec +08-Apr 16:02:55 test109 0.0 sec 1: 5241 of 19830 73.6% 401.93/sec +08-Apr 16:02:55 test04 0.0 sec 8: 5233 of 19830 73.6% 304.23/sec +08-Apr 16:03:03 test142 7.8 sec 627: 4606 of 19830 76.8% 80.20/sec +08-Apr 16:03:03 test162 0.1 sec 1: 4605 of 19830 76.8% 18.55/sec +08-Apr 16:03:03 test161 0.1 sec 1: 4604 of 19830 76.8% 9.92/sec +08-Apr 16:03:04 test159 1.4 sec 23: 4581 of 19830 76.9% 16.08/sec +08-Apr 16:03:05 test137 0.2 sec 10: 4571 of 19830 76.9% 59.44/sec +08-Apr 16:03:05 test139 0.4 sec 2: 4569 of 19830 77.0% 4.55/sec +08-Apr 16:03:05 test09 0.0 sec 1: 4568 of 19830 77.0% 86.72/sec +08-Apr 16:03:05 test132 0.0 sec 1: 4567 of 19830 77.0% 49.89/sec +08-Apr 16:03:09 test141 3.9 sec 110: 4457 of 19830 77.5% 27.98/sec +08-Apr 16:03:10 test144 0.6 sec 1: 4456 of 19830 77.5% 1.78/sec +08-Apr 16:03:10 test145 0.2 sec 5: 4451 of 19830 77.6% 30.90/sec +08-Apr 16:03:10 test92 0.1 sec 4: 4447 of 19830 77.6% 55.88/sec +08-Apr 16:03:10 test108 0.3 sec 2: 4445 of 19830 77.6% 7.18/sec +08-Apr 16:03:10 test172 0.1 sec 3: 4442 of 19830 77.6% 38.83/sec +08-Apr 16:03:11 test148 0.4 sec 7: 4435 of 19830 77.6% 16.78/sec +08-Apr 16:03:11 testc2(1) 0.3 sec 6: 4429 of 19830 77.7% 17.21/sec +08-Apr 16:03:12 test173 1.5 sec 11: 4418 of 19830 77.7% 7.34/sec +08-Apr 16:03:13 test157 0.7 sec 13: 4405 of 19830 77.8% 19.85/sec +08-Apr 16:03:20 test29 6.4 sec 3: 4402 of 19830 77.8% 0.47/sec +08-Apr 16:03:20 test128 0.3 sec 15: 4387 of 19830 77.9% 56.89/sec +08-Apr 16:03:50 test125 30.6 sec 639: 3748 of 19830 81.1% 20.90/sec +08-Apr 16:03:51 test82 0.1 sec 5: 3743 of 19830 81.1% 59.26/sec +08-Apr 16:04:08 test158 17.0 sec 19: 3724 of 19830 81.2% 1.12/sec +08-Apr 16:04:10 test84 2.3 sec 19: 3705 of 19830 81.3% 8.24/sec +08-Apr 16:04:11 test130 1.3 sec 18: 3687 of 19830 81.4% 14.08/sec +08-Apr 16:04:21 test19b 9.9 sec 44: 3643 of 19830 81.6% 4.43/sec +08-Apr 16:04:26 test19b 5.4 sec 5: 3638 of 19830 81.7% 0.93/sec +08-Apr 16:04:27 test133 0.5 sec 2: 3636 of 19830 81.7% 4.32/sec +08-Apr 16:04:30 test80 3.5 sec 2: 3634 of 19830 81.7% 0.57/sec +08-Apr 16:04:52 test151 21.7 sec 74: 3560 of 19830 82.0% 3.41/sec +08-Apr 16:04:52 test124 0.2 sec 3: 3557 of 19830 82.1% 14.50/sec +08-Apr 16:05:08 test23 15.3 sec 88: 3469 of 19830 82.5% 5.76/sec +08-Apr 16:05:18 test175 9.9 sec 1: 3468 of 19830 82.5% 0.10/sec +08-Apr 16:06:27 test160 69.7 sec 16: 3452 of 19830 82.6% 0.23/sec +08-Apr 16:07:37 test160 69.6 sec 3: 3449 of 19830 82.6% 0.04/sec +08-Apr 16:07:49 test54 11.9 sec 20: 3429 of 19830 82.7% 1.68/sec +08-Apr 16:08:06 test104 17.3 sec 38: 3391 of 19830 82.9% 2.19/sec +08-Apr 16:08:10 test11 3.4 sec 3: 3388 of 19830 82.9% 0.88/sec +08-Apr 16:08:11 test129 1.4 sec 1: 3387 of 19830 82.9% 0.73/sec +08-Apr 16:08:11 test138 0.1 sec 1: 3386 of 19830 82.9% 9.25/sec +08-Apr 16:13:21 test127 310.1 sec 1613: 1773 of 19830 91.1% 5.20/sec +08-Apr 16:13:36 test76 15.3 sec 15: 1758 of 19830 91.1% 0.98/sec +08-Apr 16:13:38 test107 1.6 sec 3: 1755 of 19830 91.1% 1.91/sec +08-Apr 16:13:44 test69 6.0 sec 2: 1753 of 19830 91.2% 0.33/sec +08-Apr 16:13:46 test135 1.9 sec 4: 1749 of 19830 91.2% 2.08/sec +08-Apr 16:14:19 test17 33.4 sec 29: 1720 of 19830 91.3% 0.87/sec +08-Apr 16:14:48 test53 28.8 sec 4: 1716 of 19830 91.3% 0.14/sec +08-Apr 16:18:13 test19 204.4 sec 11: 1705 of 19830 91.4% 0.05/sec +[malloc debugging turned off] +08-Apr 16:30:34 test10 741.4 sec 784: 921 of 19830 95.4% 1.06/sec +08-Apr 16:38:22 test75b 467.9 sec 870: 51 of 19830 99.7% 1.86/sec +08-Apr 16:41:19 test16 177.5 sec 8: 43 of 19830 99.8% 0.05/sec +08-Apr 16:42:59 test81 99.3 sec 6: 37 of 19830 99.8% 0.06/sec +08-Apr 16:44:23 test21b 84.7 sec 21: 16 of 19830 99.9% 0.25/sec +08-Apr 16:50:29 test18 365.6 sec 16: all 19830 full 100% 0.04/sec +[malloc debugging turned back on] diff --git a/GraphBLAS/Tcov/log_Feb28_2022.txt b/GraphBLAS/Tcov/log_Feb28_2022.txt deleted file mode 100644 index 33314b0abc..0000000000 --- a/GraphBLAS/Tcov/log_Feb28_2022.txt +++ /dev/null @@ -1,145 +0,0 @@ - ----------------------------------------------- [malloc] [cover] -28-Feb 15:53:13 test243 18.1 sec 206: 19612 of 19818 1.0% 11.40/sec -28-Feb 15:53:48 test242 35.0 sec 309: 19303 of 19818 2.6% 8.84/sec -28-Feb 15:53:48 test241 0.2 sec 144: 19159 of 19818 3.3% 638.16/sec -28-Feb 15:55:15 testca 87.3 sec 497: 18662 of 19818 5.8% 5.69/sec -28-Feb 15:55:16 test240 0.3 sec 19: 18643 of 19818 5.9% 59.19/sec -28-Feb 15:55:16 test240 0.2 sec 5: 18638 of 19818 6.0% 23.69/sec -28-Feb 15:56:58 testca 102.0 sec 12: 18626 of 19818 6.0% 0.12/sec -28-Feb 15:57:10 test238 11.6 sec 161: 18465 of 19818 6.8% 13.85/sec -28-Feb 15:57:10 test237 0.9 sec 3: 18462 of 19818 6.8% 3.36/sec -28-Feb 15:57:13 test236 2.3 sec 108: 18354 of 19818 7.4% 47.92/sec -28-Feb 15:57:18 test192 5.3 sec 46: 18308 of 19818 7.6% 8.66/sec -28-Feb 15:57:30 test191 11.5 sec 64: 18244 of 19818 7.9% 5.55/sec -28-Feb 15:58:20 test188 50.5 sec 348: 17896 of 19818 9.7% 6.89/sec -28-Feb 15:58:29 test187 8.6 sec 20: 17876 of 19818 9.8% 2.34/sec -28-Feb 15:58:29 test186 0.4 sec 50: 17826 of 19818 10.1% 116.63/sec -28-Feb 15:58:29 test186 0.3 sec 10: 17816 of 19818 10.1% 28.83/sec -28-Feb 15:59:54 test185 84.9 sec 40: 17776 of 19818 10.3% 0.47/sec -28-Feb 15:59:59 test184 4.3 sec 51: 17725 of 19818 10.6% 11.81/sec -28-Feb 16:00:18 test181 19.3 sec 102: 17623 of 19818 11.1% 5.28/sec -28-Feb 16:00:26 test180 7.8 sec 213: 17410 of 19818 12.2% 27.44/sec -28-Feb 16:00:47 test180 20.8 sec 11: 17399 of 19818 12.2% 0.53/sec -28-Feb 16:00:47 test150 0.2 sec 41: 17358 of 19818 12.4% 260.93/sec -28-Feb 16:01:01 test14 14.0 sec 710: 16648 of 19818 16.0% 50.66/sec -28-Feb 16:02:50 test154 109.6 sec 1906: 14742 of 19818 25.6% 17.39/sec -28-Feb 16:03:20 test151b 29.3 sec 235: 14507 of 19818 26.8% 8.02/sec -28-Feb 16:03:20 test239 0.0 sec 14: 14493 of 19818 26.9% 345.98/sec -28-Feb 16:17:49 test74 869.3 sec 5982: 8511 of 19818 57.1% 6.88/sec -28-Feb 16:17:49 test235 0.2 sec 4: 8507 of 19818 57.1% 19.93/sec -28-Feb 16:29:06 test234 676.9 sec 454: 8053 of 19818 59.4% 0.67/sec -28-Feb 16:29:33 test233 26.5 sec 3: 8050 of 19818 59.4% 0.11/sec -28-Feb 16:29:41 test232 8.0 sec 29: 8021 of 19818 59.5% 3.62/sec -28-Feb 16:55:18 test231 1537.1 sec 701: 7320 of 19818 63.1% 0.46/sec -28-Feb 16:56:37 test230 79.4 sec 118: 7202 of 19818 63.7% 1.49/sec -28-Feb 16:56:38 test229 0.7 sec 10: 7192 of 19818 63.7% 14.99/sec -28-Feb 16:56:43 test228 4.9 sec 46: 7146 of 19818 63.9% 9.32/sec -28-Feb 16:56:49 test227 5.9 sec 38: 7108 of 19818 64.1% 6.39/sec -28-Feb 16:56:49 test226 0.0 sec 6: 7102 of 19818 64.2% 243.27/sec -28-Feb 16:56:49 test225 0.2 sec 4: 7098 of 19818 64.2% 21.05/sec -28-Feb 16:56:53 test224 4.3 sec 73: 7025 of 19818 64.6% 16.89/sec -28-Feb 16:56:53 test223 0.0 sec 2: 7023 of 19818 64.6% 43.35/sec -28-Feb 16:56:53 test222 0.1 sec 11: 7012 of 19818 64.6% 121.44/sec -28-Feb 16:56:53 test221 0.0 sec 2: 7010 of 19818 64.6% 149.12/sec -28-Feb 16:56:54 test220 0.0 sec 5: 7005 of 19818 64.7% 139.26/sec -28-Feb 16:56:54 test219 0.0 sec 4: 7001 of 19818 64.7% 299.76/sec -28-Feb 16:56:54 test217 0.0 sec 4: 6997 of 19818 64.7% 211.20/sec -28-Feb 16:56:54 test216 0.1 sec 11: 6986 of 19818 64.7% 116.60/sec -28-Feb 16:56:56 test215 2.1 sec 1: 6985 of 19818 64.8% 0.48/sec -28-Feb 16:56:56 test214 0.0 sec 1: 6984 of 19818 64.8% 103.23/sec -28-Feb 16:56:56 test213 0.0 sec 5: 6979 of 19818 64.8% 500.90/sec -28-Feb 16:56:56 test212 0.1 sec 4: 6975 of 19818 64.8% 55.23/sec -28-Feb 16:56:56 test211 0.0 sec 12: 6963 of 19818 64.9% 624.84/sec -28-Feb 16:56:56 test210 0.0 sec 2: 6961 of 19818 64.9% 283.05/sec -28-Feb 16:57:01 test209 5.0 sec 24: 6937 of 19818 65.0% 4.79/sec -28-Feb 16:57:01 test208 0.0 sec 5: 6932 of 19818 65.0% 217.42/sec -28-Feb 16:57:01 test207 0.1 sec 9: 6923 of 19818 65.1% 114.80/sec -28-Feb 16:57:04 test206 2.6 sec 12: 6911 of 19818 65.1% 4.63/sec -28-Feb 16:57:04 test204 0.2 sec 10: 6901 of 19818 65.2% 64.01/sec -28-Feb 16:57:04 test203 0.0 sec 7: 6894 of 19818 65.2% 667.68/sec -28-Feb 16:57:04 test202 0.0 sec 8: 6886 of 19818 65.3% 537.74/sec -28-Feb 16:57:04 test201 0.0 sec 7: 6879 of 19818 65.3% 513.91/sec -28-Feb 16:57:06 test200 2.4 sec 7: 6872 of 19818 65.3% 2.92/sec -28-Feb 16:57:06 test199 0.0 sec 1: 6871 of 19818 65.3% 61.06/sec -28-Feb 16:57:06 test198 0.1 sec 4: 6867 of 19818 65.3% 58.36/sec -28-Feb 16:57:07 test197 0.7 sec 1: 6866 of 19818 65.4% 1.49/sec -28-Feb 16:57:09 test196 2.3 sec 15: 6851 of 19818 65.4% 6.64/sec -28-Feb 16:59:05 test195 116.1 sec 79: 6772 of 19818 65.8% 0.68/sec -28-Feb 17:00:12 test194 66.6 sec 124: 6648 of 19818 66.5% 1.86/sec -28-Feb 17:00:31 test193 18.6 sec 6: 6642 of 19818 66.5% 0.32/sec -28-Feb 17:00:40 test189 9.9 sec 10: 6632 of 19818 66.5% 1.01/sec -28-Feb 17:00:41 test183 0.0 sec 4: 6628 of 19818 66.6% 99.64/sec -28-Feb 17:00:41 test182 0.7 sec 9: 6619 of 19818 66.6% 13.65/sec -28-Feb 17:00:41 test179 0.1 sec 18: 6601 of 19818 66.7% 332.06/sec -28-Feb 17:00:41 test165 0.0 sec 3: 6598 of 19818 66.7% 340.41/sec -28-Feb 17:00:42 test01 0.8 sec 759: 5839 of 19818 70.5% 898.56/sec -28-Feb 17:00:42 test07b 0.0 sec 2: 5837 of 19818 70.5% 139.52/sec -28-Feb 17:00:42 test83 0.0 sec 1: 5836 of 19818 70.6% 109.81/sec -28-Feb 17:00:42 test176 0.2 sec 7: 5829 of 19818 70.6% 41.02/sec -28-Feb 17:00:42 test174 0.0 sec 9: 5820 of 19818 70.6% 211.22/sec -28-Feb 17:00:42 test170 0.1 sec 1: 5819 of 19818 70.6% 12.88/sec -28-Feb 17:00:43 test152 0.7 sec 405: 5414 of 19818 72.7% 571.62/sec -28-Feb 17:00:43 test155 0.1 sec 13: 5401 of 19818 72.7% 92.14/sec -28-Feb 17:00:44 test156 0.7 sec 2: 5399 of 19818 72.8% 3.06/sec -28-Feb 17:00:44 test136 0.1 sec 21: 5378 of 19818 72.9% 346.84/sec -28-Feb 17:00:44 test02 0.2 sec 133: 5245 of 19818 73.5% 632.84/sec -28-Feb 17:00:44 test109 0.1 sec 2: 5243 of 19818 73.5% 20.84/sec -28-Feb 17:00:44 test109 0.0 sec 1: 5242 of 19818 73.5% 449.24/sec -28-Feb 17:00:44 test04 0.0 sec 8: 5234 of 19818 73.6% 249.44/sec -28-Feb 17:00:52 test142 7.6 sec 627: 4607 of 19818 76.8% 82.35/sec -28-Feb 17:00:52 test162 0.1 sec 1: 4606 of 19818 76.8% 16.96/sec -28-Feb 17:00:52 test161 0.1 sec 1: 4605 of 19818 76.8% 7.96/sec -28-Feb 17:00:54 test159 1.4 sec 23: 4582 of 19818 76.9% 16.62/sec -28-Feb 17:00:54 test137 0.1 sec 10: 4572 of 19818 76.9% 69.20/sec -28-Feb 17:00:54 test139 0.5 sec 2: 4570 of 19818 76.9% 3.81/sec -28-Feb 17:00:54 test09 0.0 sec 1: 4569 of 19818 76.9% 54.82/sec -28-Feb 17:00:54 test132 0.0 sec 1: 4568 of 19818 77.0% 34.62/sec -28-Feb 17:00:59 test141 4.7 sec 110: 4458 of 19818 77.5% 23.27/sec -28-Feb 17:01:00 test144 0.6 sec 1: 4457 of 19818 77.5% 1.69/sec -28-Feb 17:01:00 test145 0.2 sec 5: 4452 of 19818 77.5% 20.84/sec -28-Feb 17:01:00 test92 0.1 sec 4: 4448 of 19818 77.6% 34.87/sec -28-Feb 17:01:00 test108 0.3 sec 2: 4446 of 19818 77.6% 5.97/sec -28-Feb 17:01:01 test172 0.1 sec 3: 4443 of 19818 77.6% 23.20/sec -28-Feb 17:01:01 test148 0.6 sec 7: 4436 of 19818 77.6% 11.06/sec -28-Feb 17:01:02 testc2(1) 0.7 sec 6: 4430 of 19818 77.6% 8.54/sec -28-Feb 17:01:04 test173 2.2 sec 11: 4419 of 19818 77.7% 4.96/sec -28-Feb 17:01:05 test157 0.7 sec 13: 4406 of 19818 77.8% 17.50/sec -28-Feb 17:01:10 test29 5.4 sec 3: 4403 of 19818 77.8% 0.55/sec -28-Feb 17:01:11 test128 0.2 sec 15: 4388 of 19818 77.9% 62.00/sec -28-Feb 17:01:36 test125 24.9 sec 639: 3749 of 19818 81.1% 25.65/sec -28-Feb 17:01:36 test82 0.1 sec 5: 3744 of 19818 81.1% 72.89/sec -28-Feb 17:01:50 test158 14.7 sec 19: 3725 of 19818 81.2% 1.29/sec -28-Feb 17:01:52 test84 2.1 sec 19: 3706 of 19818 81.3% 9.00/sec -28-Feb 17:01:54 test130 1.6 sec 18: 3688 of 19818 81.4% 11.34/sec -28-Feb 17:02:06 test19b 11.5 sec 44: 3644 of 19818 81.6% 3.81/sec -28-Feb 17:02:21 test19b 15.9 sec 5: 3639 of 19818 81.6% 0.32/sec -28-Feb 17:02:23 test133 1.4 sec 2: 3637 of 19818 81.6% 1.42/sec -28-Feb 17:02:29 test80 6.4 sec 2: 3635 of 19818 81.7% 0.31/sec -28-Feb 17:03:24 test151 54.9 sec 74: 3561 of 19818 82.0% 1.35/sec -28-Feb 17:03:25 test124 1.0 sec 3: 3558 of 19818 82.0% 2.99/sec -28-Feb 17:04:02 test23 36.6 sec 88: 3470 of 19818 82.5% 2.41/sec -28-Feb 17:04:22 test175 20.5 sec 1: 3469 of 19818 82.5% 0.05/sec -28-Feb 17:06:17 test160 115.0 sec 16: 3453 of 19818 82.6% 0.14/sec -28-Feb 17:07:19 test160 62.0 sec 3: 3450 of 19818 82.6% 0.05/sec -28-Feb 17:07:30 test54 10.8 sec 20: 3430 of 19818 82.7% 1.85/sec -28-Feb 17:07:46 test104 15.6 sec 38: 3392 of 19818 82.9% 2.44/sec -28-Feb 17:07:49 test11 3.4 sec 3: 3389 of 19818 82.9% 0.89/sec -28-Feb 17:07:50 test129 1.3 sec 1: 3388 of 19818 82.9% 0.76/sec -28-Feb 17:07:50 test138 0.1 sec 1: 3387 of 19818 82.9% 11.70/sec -28-Feb 17:13:26 test127 335.6 sec 1613: 1774 of 19818 91.0% 4.81/sec -28-Feb 17:13:39 test76 12.7 sec 15: 1759 of 19818 91.1% 1.18/sec -28-Feb 17:13:40 test107 1.5 sec 3: 1756 of 19818 91.1% 1.95/sec -28-Feb 17:13:47 test69 7.2 sec 2: 1754 of 19818 91.1% 0.28/sec -28-Feb 17:13:50 test135 2.5 sec 4: 1750 of 19818 91.2% 1.61/sec -28-Feb 17:14:23 test17 33.3 sec 29: 1721 of 19818 91.3% 0.87/sec -28-Feb 17:14:53 test53 29.2 sec 4: 1717 of 19818 91.3% 0.14/sec -28-Feb 17:17:51 test19 178.3 sec 12: 1705 of 19818 91.4% 0.07/sec -[malloc debugging turned off] -28-Feb 17:31:26 test10 814.9 sec 784: 921 of 19818 95.4% 0.96/sec -28-Feb 17:37:33 test75b 367.3 sec 870: 51 of 19818 99.7% 2.37/sec -28-Feb 17:41:09 test16 215.4 sec 8: 43 of 19818 99.8% 0.04/sec -28-Feb 17:43:00 test81 111.5 sec 6: 37 of 19818 99.8% 0.05/sec -28-Feb 17:44:30 test21b 90.0 sec 21: 16 of 19818 99.9% 0.23/sec -28-Feb 17:50:30 test18 359.8 sec 16: all 19818 full 100% 0.04/sec -[malloc debugging turned back on] diff --git a/GraphBLAS/Tcov/log_Mar14_2022.txt b/GraphBLAS/Tcov/log_Mar14_2022.txt deleted file mode 100644 index 6d723af28d..0000000000 --- a/GraphBLAS/Tcov/log_Mar14_2022.txt +++ /dev/null @@ -1,145 +0,0 @@ - ----------------------------------------------- [malloc] [cover] -14-Mar 16:15:26 test243 15.9 sec 206: 19612 of 19818 1.0% 12.92/sec -14-Mar 16:15:56 test242 29.3 sec 309: 19303 of 19818 2.6% 10.54/sec -14-Mar 16:15:56 test241 0.3 sec 144: 19159 of 19818 3.3% 421.22/sec -14-Mar 16:17:10 testca 73.6 sec 497: 18662 of 19818 5.8% 6.75/sec -14-Mar 16:17:10 test240 0.4 sec 19: 18643 of 19818 5.9% 45.82/sec -14-Mar 16:17:10 test240 0.2 sec 5: 18638 of 19818 6.0% 27.53/sec -14-Mar 16:18:39 testca 88.5 sec 12: 18626 of 19818 6.0% 0.14/sec -14-Mar 16:18:50 test238 11.0 sec 161: 18465 of 19818 6.8% 14.58/sec -14-Mar 16:18:51 test237 0.9 sec 3: 18462 of 19818 6.8% 3.19/sec -14-Mar 16:18:54 test236 3.3 sec 109: 18353 of 19818 7.4% 32.86/sec -14-Mar 16:18:58 test192 4.1 sec 46: 18307 of 19818 7.6% 11.16/sec -14-Mar 16:19:08 test191 9.8 sec 63: 18244 of 19818 7.9% 6.45/sec -14-Mar 16:19:51 test188 42.7 sec 348: 17896 of 19818 9.7% 8.14/sec -14-Mar 16:19:58 test187 7.0 sec 20: 17876 of 19818 9.8% 2.86/sec -14-Mar 16:19:58 test186 0.4 sec 50: 17826 of 19818 10.1% 142.60/sec -14-Mar 16:19:58 test186 0.3 sec 10: 17816 of 19818 10.1% 36.32/sec -14-Mar 16:21:36 test185 97.4 sec 40: 17776 of 19818 10.3% 0.41/sec -14-Mar 16:21:39 test184 3.4 sec 51: 17725 of 19818 10.6% 14.91/sec -14-Mar 16:21:57 test181 17.8 sec 102: 17623 of 19818 11.1% 5.73/sec -14-Mar 16:22:04 test180 7.4 sec 213: 17410 of 19818 12.2% 28.82/sec -14-Mar 16:22:26 test180 22.1 sec 11: 17399 of 19818 12.2% 0.50/sec -14-Mar 16:22:27 test150 0.2 sec 41: 17358 of 19818 12.4% 167.74/sec -14-Mar 16:22:43 test14 15.8 sec 710: 16648 of 19818 16.0% 44.89/sec -14-Mar 16:24:24 test154 101.6 sec 1906: 14742 of 19818 25.6% 18.76/sec -14-Mar 16:24:54 test151b 29.4 sec 235: 14507 of 19818 26.8% 8.00/sec -14-Mar 16:24:54 test239 0.0 sec 14: 14493 of 19818 26.9% 508.00/sec -14-Mar 16:34:33 test74 579.7 sec 5982: 8511 of 19818 57.1% 10.32/sec -14-Mar 16:34:33 test235 0.0 sec 4: 8507 of 19818 57.1% 107.22/sec -14-Mar 16:35:55 test234 81.7 sec 454: 8053 of 19818 59.4% 5.56/sec -14-Mar 16:36:03 test233 7.6 sec 3: 8050 of 19818 59.4% 0.39/sec -14-Mar 16:36:04 test232 1.1 sec 29: 8021 of 19818 59.5% 26.63/sec -14-Mar 16:42:15 test231 371.7 sec 701: 7320 of 19818 63.1% 1.89/sec -14-Mar 16:43:30 test230 74.6 sec 118: 7202 of 19818 63.7% 1.58/sec -14-Mar 16:43:31 test229 0.8 sec 10: 7192 of 19818 63.7% 12.15/sec -14-Mar 16:43:36 test228 5.3 sec 46: 7146 of 19818 63.9% 8.71/sec -14-Mar 16:43:42 test227 5.4 sec 38: 7108 of 19818 64.1% 7.10/sec -14-Mar 16:43:42 test226 0.0 sec 6: 7102 of 19818 64.2% 454.42/sec -14-Mar 16:43:42 test225 0.2 sec 4: 7098 of 19818 64.2% 22.37/sec -14-Mar 16:43:47 test224 5.3 sec 73: 7025 of 19818 64.6% 13.77/sec -14-Mar 16:43:47 test223 0.0 sec 2: 7023 of 19818 64.6% 87.58/sec -14-Mar 16:43:47 test222 0.1 sec 11: 7012 of 19818 64.6% 80.55/sec -14-Mar 16:43:47 test221 0.0 sec 2: 7010 of 19818 64.6% 238.02/sec -14-Mar 16:43:47 test220 0.0 sec 5: 7005 of 19818 64.7% 167.92/sec -14-Mar 16:43:47 test219 0.0 sec 4: 7001 of 19818 64.7% 486.32/sec -14-Mar 16:43:47 test217 0.0 sec 4: 6997 of 19818 64.7% 206.64/sec -14-Mar 16:43:48 test216 0.1 sec 11: 6986 of 19818 64.7% 79.25/sec -14-Mar 16:43:50 test215 2.6 sec 1: 6985 of 19818 64.8% 0.38/sec -14-Mar 16:43:50 test214 0.0 sec 1: 6984 of 19818 64.8% 93.66/sec -14-Mar 16:43:50 test213 0.0 sec 5: 6979 of 19818 64.8% 528.74/sec -14-Mar 16:43:50 test212 0.1 sec 4: 6975 of 19818 64.8% 26.94/sec -14-Mar 16:43:50 test211 0.0 sec 12: 6963 of 19818 64.9% 610.16/sec -14-Mar 16:43:50 test210 0.0 sec 2: 6961 of 19818 64.9% 480.40/sec -14-Mar 16:43:55 test209 4.2 sec 24: 6937 of 19818 65.0% 5.78/sec -14-Mar 16:43:55 test208 0.0 sec 5: 6932 of 19818 65.0% 283.46/sec -14-Mar 16:43:55 test207 0.1 sec 9: 6923 of 19818 65.1% 66.06/sec -14-Mar 16:43:57 test206 2.4 sec 12: 6911 of 19818 65.1% 5.05/sec -14-Mar 16:43:57 test204 0.2 sec 10: 6901 of 19818 65.2% 40.12/sec -14-Mar 16:43:57 test203 0.0 sec 7: 6894 of 19818 65.2% 1302.37/sec -14-Mar 16:43:57 test202 0.0 sec 8: 6886 of 19818 65.3% 874.01/sec -14-Mar 16:43:57 test201 0.0 sec 7: 6879 of 19818 65.3% 986.75/sec -14-Mar 16:44:00 test200 2.1 sec 7: 6872 of 19818 65.3% 3.29/sec -14-Mar 16:44:00 test199 0.0 sec 1: 6871 of 19818 65.3% 198.28/sec -14-Mar 16:44:00 test198 0.1 sec 4: 6867 of 19818 65.3% 30.57/sec -14-Mar 16:44:00 test197 0.4 sec 1: 6866 of 19818 65.4% 2.25/sec -14-Mar 16:44:02 test196 1.6 sec 15: 6851 of 19818 65.4% 9.24/sec -14-Mar 16:46:03 test195 120.9 sec 79: 6772 of 19818 65.8% 0.65/sec -14-Mar 16:47:06 test194 63.5 sec 124: 6648 of 19818 66.5% 1.95/sec -14-Mar 16:47:23 test193 17.3 sec 6: 6642 of 19818 66.5% 0.35/sec -14-Mar 16:47:30 test189 7.0 sec 10: 6632 of 19818 66.5% 1.43/sec -14-Mar 16:47:31 test183 0.0 sec 4: 6628 of 19818 66.6% 181.40/sec -14-Mar 16:47:32 test182 1.2 sec 9: 6619 of 19818 66.6% 7.44/sec -14-Mar 16:47:32 test179 0.1 sec 18: 6601 of 19818 66.7% 292.62/sec -14-Mar 16:47:32 test165 0.0 sec 3: 6598 of 19818 66.7% 459.59/sec -14-Mar 16:47:33 test01 1.4 sec 759: 5839 of 19818 70.5% 552.66/sec -14-Mar 16:47:33 test07b 0.0 sec 2: 5837 of 19818 70.5% 188.57/sec -14-Mar 16:47:33 test83 0.0 sec 1: 5836 of 19818 70.6% 203.49/sec -14-Mar 16:47:33 test176 0.2 sec 7: 5829 of 19818 70.6% 38.73/sec -14-Mar 16:47:33 test174 0.0 sec 9: 5820 of 19818 70.6% 271.35/sec -14-Mar 16:47:34 test170 0.1 sec 1: 5819 of 19818 70.6% 7.67/sec -14-Mar 16:47:35 test152 1.0 sec 405: 5414 of 19818 72.7% 394.35/sec -14-Mar 16:47:35 test155 0.2 sec 13: 5401 of 19818 72.7% 60.64/sec -14-Mar 16:47:36 test156 0.7 sec 2: 5399 of 19818 72.8% 2.72/sec -14-Mar 16:47:36 test136 0.0 sec 21: 5378 of 19818 72.9% 495.89/sec -14-Mar 16:47:36 test02 0.3 sec 133: 5245 of 19818 73.5% 456.18/sec -14-Mar 16:47:36 test109 0.1 sec 2: 5243 of 19818 73.5% 14.16/sec -14-Mar 16:47:36 test109 0.0 sec 1: 5242 of 19818 73.5% 456.79/sec -14-Mar 16:47:36 test04 0.0 sec 8: 5234 of 19818 73.6% 289.51/sec -14-Mar 16:47:44 test142 7.7 sec 627: 4607 of 19818 76.8% 81.33/sec -14-Mar 16:47:44 test162 0.1 sec 1: 4606 of 19818 76.8% 13.21/sec -14-Mar 16:47:44 test161 0.2 sec 1: 4605 of 19818 76.8% 6.03/sec -14-Mar 16:47:45 test159 1.4 sec 23: 4582 of 19818 76.9% 16.47/sec -14-Mar 16:47:46 test137 0.3 sec 10: 4572 of 19818 76.9% 34.66/sec -14-Mar 16:47:46 test139 0.5 sec 2: 4570 of 19818 76.9% 4.36/sec -14-Mar 16:47:46 test09 0.0 sec 1: 4569 of 19818 76.9% 53.88/sec -14-Mar 16:47:46 test132 0.0 sec 1: 4568 of 19818 77.0% 55.35/sec -14-Mar 16:47:50 test141 3.9 sec 110: 4458 of 19818 77.5% 28.07/sec -14-Mar 16:47:51 test144 0.7 sec 1: 4457 of 19818 77.5% 1.54/sec -14-Mar 16:47:51 test145 0.3 sec 5: 4452 of 19818 77.5% 17.57/sec -14-Mar 16:47:51 test92 0.1 sec 4: 4448 of 19818 77.6% 31.02/sec -14-Mar 16:47:52 test108 0.4 sec 2: 4446 of 19818 77.6% 5.47/sec -14-Mar 16:47:52 test172 0.2 sec 3: 4443 of 19818 77.6% 17.97/sec -14-Mar 16:47:53 test148 0.8 sec 7: 4436 of 19818 77.6% 8.57/sec -14-Mar 16:47:53 testc2(1) 0.6 sec 6: 4430 of 19818 77.6% 10.89/sec -14-Mar 16:47:55 test173 1.8 sec 11: 4419 of 19818 77.7% 6.14/sec -14-Mar 16:47:56 test157 0.8 sec 13: 4406 of 19818 77.8% 16.15/sec -14-Mar 16:48:10 test29 14.2 sec 3: 4403 of 19818 77.8% 0.21/sec -14-Mar 16:48:10 test128 0.3 sec 15: 4388 of 19818 77.9% 43.67/sec -14-Mar 16:48:45 test125 34.7 sec 639: 3749 of 19818 81.1% 18.42/sec -14-Mar 16:48:45 test82 0.1 sec 5: 3744 of 19818 81.1% 57.91/sec -14-Mar 16:49:01 test158 15.3 sec 19: 3725 of 19818 81.2% 1.24/sec -14-Mar 16:49:02 test84 1.9 sec 19: 3706 of 19818 81.3% 9.75/sec -14-Mar 16:49:04 test130 1.4 sec 18: 3688 of 19818 81.4% 12.87/sec -14-Mar 16:49:11 test19b 6.7 sec 44: 3644 of 19818 81.6% 6.57/sec -14-Mar 16:49:14 test19b 3.5 sec 5: 3639 of 19818 81.6% 1.42/sec -14-Mar 16:49:15 test133 0.5 sec 2: 3637 of 19818 81.6% 3.72/sec -14-Mar 16:49:18 test80 3.5 sec 2: 3635 of 19818 81.7% 0.56/sec -14-Mar 16:49:38 test151 19.7 sec 74: 3561 of 19818 82.0% 3.75/sec -14-Mar 16:49:38 test124 0.2 sec 3: 3558 of 19818 82.0% 13.92/sec -14-Mar 16:49:50 test23 12.3 sec 88: 3470 of 19818 82.5% 7.15/sec -14-Mar 16:50:01 test175 10.0 sec 1: 3469 of 19818 82.5% 0.10/sec -14-Mar 16:51:05 test160 64.2 sec 16: 3453 of 19818 82.6% 0.25/sec -14-Mar 16:52:16 test160 71.2 sec 3: 3450 of 19818 82.6% 0.04/sec -14-Mar 16:52:30 test54 13.9 sec 20: 3430 of 19818 82.7% 1.44/sec -14-Mar 16:52:50 test104 20.3 sec 38: 3392 of 19818 82.9% 1.87/sec -14-Mar 16:52:55 test11 4.6 sec 3: 3389 of 19818 82.9% 0.65/sec -14-Mar 16:52:56 test129 1.3 sec 1: 3388 of 19818 82.9% 0.77/sec -14-Mar 16:52:56 test138 0.1 sec 1: 3387 of 19818 82.9% 7.22/sec -14-Mar 16:57:47 test127 290.7 sec 1613: 1774 of 19818 91.0% 5.55/sec -14-Mar 16:58:00 test76 12.9 sec 15: 1759 of 19818 91.1% 1.16/sec -14-Mar 16:58:01 test107 1.4 sec 3: 1756 of 19818 91.1% 2.09/sec -14-Mar 16:58:06 test69 5.0 sec 2: 1754 of 19818 91.1% 0.40/sec -14-Mar 16:58:08 test135 1.3 sec 4: 1750 of 19818 91.2% 3.12/sec -14-Mar 16:58:39 test17 31.4 sec 29: 1721 of 19818 91.3% 0.92/sec -14-Mar 16:59:03 test53 23.7 sec 4: 1717 of 19818 91.3% 0.17/sec -14-Mar 17:01:29 test19 146.0 sec 12: 1705 of 19818 91.4% 0.08/sec -[malloc debugging turned off] -14-Mar 17:10:51 test10 561.9 sec 784: 921 of 19818 95.4% 1.40/sec -14-Mar 17:15:13 test75b 262.0 sec 870: 51 of 19818 99.7% 3.32/sec -14-Mar 17:18:05 test16 172.0 sec 8: 43 of 19818 99.8% 0.05/sec -14-Mar 17:19:34 test81 89.3 sec 6: 37 of 19818 99.8% 0.07/sec -14-Mar 17:20:34 test21b 60.1 sec 21: 16 of 19818 99.9% 0.35/sec -14-Mar 17:25:02 test18 268.2 sec 16: all 19818 full 100% 0.06/sec -[malloc debugging turned back on] diff --git a/GraphBLAS/Tcov/log_Mar8_2022.txt b/GraphBLAS/Tcov/log_Mar8_2022.txt deleted file mode 100644 index a58076c364..0000000000 --- a/GraphBLAS/Tcov/log_Mar8_2022.txt +++ /dev/null @@ -1,145 +0,0 @@ - ----------------------------------------------- [malloc] [cover] -08-Mar 19:36:03 test243 20.5 sec 206: 19612 of 19818 1.0% 10.02/sec -08-Mar 19:36:43 test242 39.6 sec 309: 19303 of 19818 2.6% 7.80/sec -08-Mar 19:36:43 test241 0.2 sec 144: 19159 of 19818 3.3% 579.86/sec -08-Mar 19:38:14 testca 90.4 sec 497: 18662 of 19818 5.8% 5.50/sec -08-Mar 19:38:14 test240 0.3 sec 19: 18643 of 19818 5.9% 61.72/sec -08-Mar 19:38:14 test240 0.2 sec 5: 18638 of 19818 6.0% 25.09/sec -08-Mar 19:40:00 testca 106.1 sec 12: 18626 of 19818 6.0% 0.11/sec -08-Mar 19:40:16 test238 15.7 sec 161: 18465 of 19818 6.8% 10.24/sec -08-Mar 19:40:17 test237 1.1 sec 3: 18462 of 19818 6.8% 2.62/sec -08-Mar 19:40:21 test236 3.8 sec 109: 18353 of 19818 7.4% 28.94/sec -08-Mar 19:40:27 test192 5.9 sec 46: 18307 of 19818 7.6% 7.85/sec -08-Mar 19:40:40 test191 12.6 sec 63: 18244 of 19818 7.9% 4.99/sec -08-Mar 19:41:32 test188 52.6 sec 348: 17896 of 19818 9.7% 6.61/sec -08-Mar 19:41:41 test187 9.0 sec 20: 17876 of 19818 9.8% 2.22/sec -08-Mar 19:41:42 test186 0.5 sec 50: 17826 of 19818 10.1% 108.16/sec -08-Mar 19:41:42 test186 0.4 sec 10: 17816 of 19818 10.1% 24.71/sec -08-Mar 19:43:31 test185 109.2 sec 40: 17776 of 19818 10.3% 0.37/sec -08-Mar 19:43:35 test184 3.3 sec 51: 17725 of 19818 10.6% 15.33/sec -08-Mar 19:43:56 test181 21.5 sec 102: 17623 of 19818 11.1% 4.75/sec -08-Mar 19:44:05 test180 8.4 sec 213: 17410 of 19818 12.2% 25.25/sec -08-Mar 19:44:21 test180 16.7 sec 11: 17399 of 19818 12.2% 0.66/sec -08-Mar 19:44:21 test150 0.1 sec 41: 17358 of 19818 12.4% 373.74/sec -08-Mar 19:44:33 test14 11.8 sec 710: 16648 of 19818 16.0% 60.30/sec -08-Mar 19:46:22 test154 108.5 sec 1906: 14742 of 19818 25.6% 17.56/sec -08-Mar 19:46:51 test151b 29.7 sec 235: 14507 of 19818 26.8% 7.91/sec -08-Mar 19:46:51 test239 0.0 sec 14: 14493 of 19818 26.9% 501.41/sec -08-Mar 19:55:23 test74 511.6 sec 5982: 8511 of 19818 57.1% 11.69/sec -08-Mar 19:55:23 test235 0.0 sec 4: 8507 of 19818 57.1% 104.78/sec -08-Mar 19:56:49 test234 85.6 sec 454: 8053 of 19818 59.4% 5.30/sec -08-Mar 19:56:57 test233 7.8 sec 3: 8050 of 19818 59.4% 0.39/sec -08-Mar 19:56:57 test232 0.8 sec 29: 8021 of 19818 59.5% 35.97/sec -08-Mar 20:04:01 test231 423.9 sec 701: 7320 of 19818 63.1% 1.65/sec -08-Mar 20:05:27 test230 85.7 sec 118: 7202 of 19818 63.7% 1.38/sec -08-Mar 20:05:28 test229 0.7 sec 10: 7192 of 19818 63.7% 14.63/sec -08-Mar 20:05:33 test228 5.7 sec 46: 7146 of 19818 63.9% 8.12/sec -08-Mar 20:05:39 test227 5.6 sec 38: 7108 of 19818 64.1% 6.79/sec -08-Mar 20:05:39 test226 0.0 sec 6: 7102 of 19818 64.2% 489.84/sec -08-Mar 20:05:39 test225 0.1 sec 4: 7098 of 19818 64.2% 27.40/sec -08-Mar 20:05:44 test224 4.7 sec 73: 7025 of 19818 64.6% 15.52/sec -08-Mar 20:05:44 test223 0.0 sec 2: 7023 of 19818 64.6% 67.85/sec -08-Mar 20:05:44 test222 0.1 sec 11: 7012 of 19818 64.6% 154.28/sec -08-Mar 20:05:44 test221 0.0 sec 2: 7010 of 19818 64.6% 254.74/sec -08-Mar 20:05:44 test220 0.0 sec 5: 7005 of 19818 64.7% 154.41/sec -08-Mar 20:05:44 test219 0.0 sec 4: 7001 of 19818 64.7% 580.30/sec -08-Mar 20:05:44 test217 0.0 sec 4: 6997 of 19818 64.7% 310.90/sec -08-Mar 20:05:44 test216 0.1 sec 11: 6986 of 19818 64.7% 142.88/sec -08-Mar 20:05:47 test215 2.6 sec 1: 6985 of 19818 64.8% 0.38/sec -08-Mar 20:05:47 test214 0.0 sec 1: 6984 of 19818 64.8% 141.72/sec -08-Mar 20:05:47 test213 0.0 sec 5: 6979 of 19818 64.8% 820.08/sec -08-Mar 20:05:47 test212 0.1 sec 4: 6975 of 19818 64.8% 50.41/sec -08-Mar 20:05:47 test211 0.0 sec 12: 6963 of 19818 64.9% 624.80/sec -08-Mar 20:05:47 test210 0.0 sec 2: 6961 of 19818 64.9% 727.01/sec -08-Mar 20:05:52 test209 5.3 sec 24: 6937 of 19818 65.0% 4.53/sec -08-Mar 20:05:52 test208 0.0 sec 5: 6932 of 19818 65.0% 280.84/sec -08-Mar 20:05:52 test207 0.1 sec 9: 6923 of 19818 65.1% 128.91/sec -08-Mar 20:05:55 test206 2.6 sec 12: 6911 of 19818 65.1% 4.54/sec -08-Mar 20:05:55 test204 0.1 sec 10: 6901 of 19818 65.2% 76.89/sec -08-Mar 20:05:55 test203 0.0 sec 7: 6894 of 19818 65.2% 1647.45/sec -08-Mar 20:05:55 test202 0.0 sec 8: 6886 of 19818 65.3% 932.62/sec -08-Mar 20:05:55 test201 0.0 sec 7: 6879 of 19818 65.3% 994.88/sec -08-Mar 20:05:58 test200 3.1 sec 7: 6872 of 19818 65.3% 2.24/sec -08-Mar 20:05:58 test199 0.0 sec 1: 6871 of 19818 65.3% 212.45/sec -08-Mar 20:05:58 test198 0.1 sec 4: 6867 of 19818 65.3% 59.79/sec -08-Mar 20:05:59 test197 0.7 sec 1: 6866 of 19818 65.4% 1.38/sec -08-Mar 20:06:01 test196 2.5 sec 15: 6851 of 19818 65.4% 6.07/sec -08-Mar 20:08:20 test195 138.5 sec 79: 6772 of 19818 65.8% 0.57/sec -08-Mar 20:09:25 test194 65.3 sec 124: 6648 of 19818 66.5% 1.90/sec -08-Mar 20:09:43 test193 18.0 sec 6: 6642 of 19818 66.5% 0.33/sec -08-Mar 20:09:51 test189 7.9 sec 10: 6632 of 19818 66.5% 1.26/sec -08-Mar 20:09:51 test183 0.0 sec 4: 6628 of 19818 66.6% 159.69/sec -08-Mar 20:09:52 test182 0.9 sec 9: 6619 of 19818 66.6% 10.29/sec -08-Mar 20:09:52 test179 0.1 sec 18: 6601 of 19818 66.7% 320.83/sec -08-Mar 20:09:52 test165 0.0 sec 3: 6598 of 19818 66.7% 606.67/sec -08-Mar 20:09:53 test01 0.9 sec 759: 5839 of 19818 70.5% 884.45/sec -08-Mar 20:09:53 test07b 0.0 sec 2: 5837 of 19818 70.5% 308.31/sec -08-Mar 20:09:53 test83 0.0 sec 1: 5836 of 19818 70.6% 316.36/sec -08-Mar 20:09:53 test176 0.2 sec 7: 5829 of 19818 70.6% 37.56/sec -08-Mar 20:09:53 test174 0.0 sec 9: 5820 of 19818 70.6% 326.60/sec -08-Mar 20:09:53 test170 0.1 sec 1: 5819 of 19818 70.6% 14.56/sec -08-Mar 20:09:54 test152 0.8 sec 405: 5414 of 19818 72.7% 529.71/sec -08-Mar 20:09:54 test155 0.1 sec 13: 5401 of 19818 72.7% 106.73/sec -08-Mar 20:09:55 test156 0.7 sec 2: 5399 of 19818 72.8% 2.87/sec -08-Mar 20:09:55 test136 0.0 sec 21: 5378 of 19818 72.9% 482.54/sec -08-Mar 20:09:55 test02 0.2 sec 133: 5245 of 19818 73.5% 630.55/sec -08-Mar 20:09:55 test109 0.1 sec 2: 5243 of 19818 73.5% 27.21/sec -08-Mar 20:09:55 test109 0.0 sec 1: 5242 of 19818 73.5% 410.85/sec -08-Mar 20:09:55 test04 0.0 sec 8: 5234 of 19818 73.6% 312.71/sec -08-Mar 20:10:03 test142 7.7 sec 627: 4607 of 19818 76.8% 81.92/sec -08-Mar 20:10:03 test162 0.1 sec 1: 4606 of 19818 76.8% 19.03/sec -08-Mar 20:10:03 test161 0.1 sec 1: 4605 of 19818 76.8% 9.85/sec -08-Mar 20:10:05 test159 1.4 sec 23: 4582 of 19818 76.9% 16.95/sec -08-Mar 20:10:05 test137 0.1 sec 10: 4572 of 19818 76.9% 72.57/sec -08-Mar 20:10:05 test139 0.4 sec 2: 4570 of 19818 76.9% 4.78/sec -08-Mar 20:10:05 test09 0.0 sec 1: 4569 of 19818 76.9% 97.86/sec -08-Mar 20:10:05 test132 0.0 sec 1: 4568 of 19818 77.0% 52.51/sec -08-Mar 20:10:09 test141 3.8 sec 110: 4458 of 19818 77.5% 28.74/sec -08-Mar 20:10:10 test144 0.5 sec 1: 4457 of 19818 77.5% 1.83/sec -08-Mar 20:10:10 test145 0.2 sec 5: 4452 of 19818 77.5% 32.08/sec -08-Mar 20:10:10 test92 0.1 sec 4: 4448 of 19818 77.6% 58.22/sec -08-Mar 20:10:10 test108 0.3 sec 2: 4446 of 19818 77.6% 7.27/sec -08-Mar 20:10:10 test172 0.1 sec 3: 4443 of 19818 77.6% 39.07/sec -08-Mar 20:10:11 test148 0.4 sec 7: 4436 of 19818 77.6% 17.69/sec -08-Mar 20:10:11 testc2(1) 0.3 sec 6: 4430 of 19818 77.6% 17.78/sec -08-Mar 20:10:12 test173 1.5 sec 11: 4419 of 19818 77.7% 7.26/sec -08-Mar 20:10:13 test157 0.6 sec 13: 4406 of 19818 77.8% 20.66/sec -08-Mar 20:10:19 test29 6.3 sec 3: 4403 of 19818 77.8% 0.48/sec -08-Mar 20:10:20 test128 0.3 sec 15: 4388 of 19818 77.9% 57.24/sec -08-Mar 20:10:49 test125 29.7 sec 639: 3749 of 19818 81.1% 21.50/sec -08-Mar 20:10:49 test82 0.1 sec 5: 3744 of 19818 81.1% 60.36/sec -08-Mar 20:11:06 test158 16.7 sec 19: 3725 of 19818 81.2% 1.14/sec -08-Mar 20:11:09 test84 2.3 sec 19: 3706 of 19818 81.3% 8.27/sec -08-Mar 20:11:10 test130 1.3 sec 18: 3688 of 19818 81.4% 13.62/sec -08-Mar 20:11:20 test19b 9.7 sec 44: 3644 of 19818 81.6% 4.55/sec -08-Mar 20:11:25 test19b 5.1 sec 5: 3639 of 19818 81.6% 0.97/sec -08-Mar 20:11:25 test133 0.4 sec 2: 3637 of 19818 81.6% 4.99/sec -08-Mar 20:11:29 test80 3.6 sec 2: 3635 of 19818 81.7% 0.56/sec -08-Mar 20:11:50 test151 21.1 sec 74: 3561 of 19818 82.0% 3.50/sec -08-Mar 20:11:50 test124 0.2 sec 3: 3558 of 19818 82.0% 16.32/sec -08-Mar 20:12:05 test23 15.2 sec 88: 3470 of 19818 82.5% 5.80/sec -08-Mar 20:12:15 test175 9.7 sec 1: 3469 of 19818 82.5% 0.10/sec -08-Mar 20:13:24 test160 68.8 sec 16: 3453 of 19818 82.6% 0.23/sec -08-Mar 20:14:32 test160 68.1 sec 3: 3450 of 19818 82.6% 0.04/sec -08-Mar 20:14:43 test54 11.6 sec 20: 3430 of 19818 82.7% 1.73/sec -08-Mar 20:15:01 test104 17.5 sec 38: 3392 of 19818 82.9% 2.17/sec -08-Mar 20:15:04 test11 3.4 sec 3: 3389 of 19818 82.9% 0.88/sec -08-Mar 20:15:06 test129 1.4 sec 1: 3388 of 19818 82.9% 0.73/sec -08-Mar 20:15:06 test138 0.1 sec 1: 3387 of 19818 82.9% 15.01/sec -08-Mar 20:20:13 test127 307.3 sec 1613: 1774 of 19818 91.0% 5.25/sec -08-Mar 20:20:28 test76 15.1 sec 15: 1759 of 19818 91.1% 0.99/sec -08-Mar 20:20:30 test107 1.6 sec 3: 1756 of 19818 91.1% 1.93/sec -08-Mar 20:20:36 test69 5.9 sec 2: 1754 of 19818 91.1% 0.34/sec -08-Mar 20:20:38 test135 1.9 sec 4: 1750 of 19818 91.2% 2.12/sec -08-Mar 20:21:11 test17 33.2 sec 29: 1721 of 19818 91.3% 0.87/sec -08-Mar 20:21:39 test53 28.1 sec 4: 1717 of 19818 91.3% 0.14/sec -08-Mar 20:25:00 test19 201.2 sec 12: 1705 of 19818 91.4% 0.06/sec -[malloc debugging turned off] -08-Mar 20:37:03 test10 722.9 sec 784: 921 of 19818 95.4% 1.08/sec -08-Mar 20:44:42 test75b 458.8 sec 870: 51 of 19818 99.7% 1.90/sec -08-Mar 20:47:43 test16 181.2 sec 8: 43 of 19818 99.8% 0.04/sec -08-Mar 20:49:25 test81 102.4 sec 6: 37 of 19818 99.8% 0.06/sec -08-Mar 20:50:49 test21b 83.0 sec 21: 16 of 19818 99.9% 0.25/sec -08-Mar 20:56:50 test18 361.8 sec 16: all 19818 full 100% 0.04/sec -[malloc debugging turned back on] diff --git a/GraphBLAS/Test/GB_mex_about8.c b/GraphBLAS/Test/GB_mex_about8.c index 3173ab2e13..7411e69d9d 100644 --- a/GraphBLAS/Test/GB_mex_about8.c +++ b/GraphBLAS/Test/GB_mex_about8.c @@ -41,7 +41,7 @@ void mexFunction //-------------------------------------------------------------------------- // Test for bug fix in GB_iso_reduce_worker (the correct test is "n <= 0"). - // Bug caught by @ParticularMiner in the python grblas interface, on Mar 8, + // Bug caught by Henry Amuasi in the python grblas interface, on Mar 8, // 2022, which causes a stack overflow because of an infinite recursion, // and segfaults in v6.2.3 and earlier. The bug first occurs in v5.1.1, // released on June 29, 2021. diff --git a/GraphBLAS/Test/GB_mex_dot_iterator.c b/GraphBLAS/Test/GB_mex_dot_iterator.c index 607301c48a..2517a54199 100644 --- a/GraphBLAS/Test/GB_mex_dot_iterator.c +++ b/GraphBLAS/Test/GB_mex_dot_iterator.c @@ -84,8 +84,6 @@ void mexFunction OK (GrB_Vector_size (&ny, Y)) ; GB_Global_print_one_based_set (0) ; - // GxB_print (X, 3) ; - // GxB_print (Y, 3) ; if (n != ny) { @@ -127,6 +125,32 @@ void mexFunction OK (GrB_Vector_nvals (&xnvals, X)) ; OK (GrB_Vector_nvals (&ynvals, Y)) ; +// if (kind == 0) { GxB_print (X, 3) ; GxB_print (Y, 3) ; } + + if (X->b != NULL && X->type == GrB_FP64) + { + // mangle the X vector where entries are not present + double *Xx = (double *) X->x ; + bool *Xb = X->b ; + int64_t n = X->vlen ; + for (int64_t k = 0 ; k < n ; k++) + { + if (!Xb [k]) Xx [k] = 42 ; + } + } + + if (Y->b != NULL && Y->type == GrB_FP64) + { + // mangle the Y vector where entries are not present + double *Yx = (double *) Y->x ; + bool *Yb = Y->b ; + int64_t n = Y->vlen ; + for (int64_t k = 0 ; k < n ; k++) + { + if (!Yb [k]) Yx [k] = 42 ; + } + } + if (use_macros) { // use macros that are #define'd in GraphBLAS.h diff --git a/GraphBLAS/Test/GB_mex_errors.c b/GraphBLAS/Test/GB_mex_errors.c index cfbf295353..cc647cfd35 100644 --- a/GraphBLAS/Test/GB_mex_errors.c +++ b/GraphBLAS/Test/GB_mex_errors.c @@ -2828,6 +2828,7 @@ void mexFunction ERR1 (C4, GxB_Matrix_subassign (C4, C4, GrB_PLUS_FP64, C4, I3, 3, J3, 2, NULL)) ; OK (GrB_Matrix_free_(&C4)) ; + expected = GrB_INVALID_INDEX ; OK (GrB_Matrix_dup (&A4, A)) ; ERR1 (A4, GxB_Matrix_subassign_FP64_(A4, NULL, GrB_PLUS_FP64, x_double, I3, 1, J3, 1, NULL)); OK (GrB_Matrix_free_(&A4)) ; diff --git a/GraphBLAS/Test/GB_mex_mdiag.c b/GraphBLAS/Test/GB_mex_mdiag.c index dcbf403535..3fcf124e3f 100644 --- a/GraphBLAS/Test/GB_mex_mdiag.c +++ b/GraphBLAS/Test/GB_mex_mdiag.c @@ -60,9 +60,9 @@ void mexFunction int64_t GET_SCALAR (1, int64_t, k, 0) ; // get the type - GrB_Type ctype ; - GxB_Matrix_type (&ctype, V) ; - ctype = GB_mx_string_to_Type (PARGIN (2), ctype) ; + GrB_Type ctype, vtype ; + GxB_Matrix_type (&vtype, V) ; + ctype = GB_mx_string_to_Type (PARGIN (2), vtype) ; // get fmt int GET_SCALAR (3, int, fmt, GxB_BY_COL) ; @@ -72,25 +72,23 @@ void mexFunction GrB_Matrix_nrows (&n, V) ; n += GB_IABS (k) ; - #undef GET_DEEP_COPY #undef FREE_DEEP_COPY - - #define GET_DEEP_COPY \ - GrB_Matrix_new (&C, ctype, n, n) ; \ - GxB_Matrix_Option_set (C, GxB_FORMAT, fmt) ; - #define FREE_DEEP_COPY GrB_Matrix_free_(&C) ; - GET_DEEP_COPY ; - // C = diag (v,k), using either GrB_Matrix_diag or GxB_Matrix_diag. // The two methods do the same thing. This is just to test. - if (k % 2 == 0) + if (k % 2 == 0 && ctype == vtype) { - METHOD (GrB_Matrix_diag (C, (GrB_Vector) V, k)) ; + // GrB_Matrix_diag does not handle typecasting + METHOD (GrB_Matrix_diag (&C, (GrB_Vector) V, k)) ; } else { + #undef GET_DEEP_COPY + #define GET_DEEP_COPY \ + GrB_Matrix_new (&C, ctype, n, n) ; \ + GxB_Matrix_Option_set (C, GxB_FORMAT, fmt) ; + GET_DEEP_COPY ; METHOD (GxB_Matrix_diag (C, (GrB_Vector) V, k, NULL)) ; } diff --git a/GraphBLAS/Test/Template/GB_mx_dot_iterator_template.c b/GraphBLAS/Test/Template/GB_mx_dot_iterator_template.c index 88323cf8b4..002cf1120d 100644 --- a/GraphBLAS/Test/Template/GB_mx_dot_iterator_template.c +++ b/GraphBLAS/Test/Template/GB_mx_dot_iterator_template.c @@ -129,16 +129,19 @@ if (i < j) { // consume x(i) + // printf ("skip x, i = %ld\n", i) ; X_info = GxB_Vector_Iterator_next (X_iterator) ; } else if (i > j) { // consume y(j) + // printf ("skip y, j = %ld\n", j) ; Y_info = GxB_Vector_Iterator_next (Y_iterator) ; } else // i == j { // s += x(i) * y(i) + // printf ("multadd at i = %ld\n", i) ; MULTADD ; // consume both x(i) and y(i) X_info = GxB_Vector_Iterator_next (X_iterator) ; diff --git a/GraphBLAS/Test/test232.m b/GraphBLAS/Test/test232.m index 2aa35c5f13..d667755c73 100644 --- a/GraphBLAS/Test/test232.m +++ b/GraphBLAS/Test/test232.m @@ -20,58 +20,73 @@ accum.optype = type ; m = 10 ; - I1 = [1 3 5] ; - I0 = uint64 (I1) - 1 ; - for n = [1 9] - if (n == 1) - J1 = 1 ; + + for trial = 0:3 + + if (trial == 0 || trial == 3) + I1 = [1 3 5] ; else - J1 = [1 2 5] ; + I1 = 3 ; end - J0 = uint64 (J1) - 1 ; + I0 = uint64 (I1) - 1 ; + + for n = [1 9] + + if (n == 1) + J1 = 1 ; + elseif (trial == 1 || trial == 3) + J1 = [1 2 5] ; + else + J1 = 2 ; + end - fprintf ('.') ; + J0 = uint64 (J1) - 1 ; + fprintf ('.') ; - C = GB_spec_random (m, n, 0.8, 100, type) ; - S1.matrix = sparse (1) ; - S1.class = type ; - S0.matrix = sparse (0) ; - S0.class = type ; + C = GB_spec_random (m, n, 0.8, 100, type) ; + S1.matrix = sparse (1) ; + S1.class = type ; + S0.matrix = sparse (0) ; + S0.class = type ; - C1 = GB_mex_assign_scalar (C, [ ], accum, S1, I0, J0, [ ]) ; - C2 = GB_spec_assign (C, [ ], accum, S1, I1, J1, [ ], 1) ; - GB_spec_compare (C1, C2) ; + if (n > 1 && length (I0) == 1 && length (J0) == 1) + fprintf ("#") ; + end - C1 = GB_mex_assign_scalar (C, [ ], accum, S0, I0, J0, [ ]) ; - C2 = GB_spec_assign (C, [ ], accum, S0, I1, J1, [ ], 1) ; - GB_spec_compare (C1, C2) ; + C1 = GB_mex_assign_scalar (C, [ ], accum, S1, I0, J0, [ ]) ; + C2 = GB_spec_assign (C, [ ], accum, S1, I1, J1, [ ], 1) ; + GB_spec_compare (C1, C2) ; - C1 = GB_mex_subassign_scalar (C, [ ], accum, S1, I0, J0, [ ]) ; - C2 = GB_spec_subassign (C, [ ], accum, S1, I1, J1, [ ], 1) ; - GB_spec_compare (C1, C2) ; + C1 = GB_mex_assign_scalar (C, [ ], accum, S0, I0, J0, [ ]) ; + C2 = GB_spec_assign (C, [ ], accum, S0, I1, J1, [ ], 1) ; + GB_spec_compare (C1, C2) ; - C1 = GB_mex_subassign_scalar (C, [ ], accum, S0, I0, J0, [ ]) ; - C2 = GB_spec_subassign (C, [ ], accum, S0, I1, J1, [ ], 1) ; - GB_spec_compare (C1, C2) ; + C1 = GB_mex_subassign_scalar (C, [ ], accum, S1, I0, J0, [ ]) ; + C2 = GB_spec_subassign (C, [ ], accum, S1, I1, J1, [ ], 1) ; + GB_spec_compare (C1, C2) ; - C1 = GB_mex_assign_scalar (C, [ ], [ ], S1, I0, J0, [ ]) ; - C2 = GB_spec_assign (C, [ ], [ ], S1, I1, J1, [ ], 1) ; - GB_spec_compare (C1, C2) ; + C1 = GB_mex_subassign_scalar (C, [ ], accum, S0, I0, J0, [ ]) ; + C2 = GB_spec_subassign (C, [ ], accum, S0, I1, J1, [ ], 1) ; + GB_spec_compare (C1, C2) ; - C1 = GB_mex_assign_scalar (C, [ ], [ ], S0, I0, J0, [ ]) ; - C2 = GB_spec_assign (C, [ ], [ ], S0, I1, J1, [ ], 1) ; - GB_spec_compare (C1, C2) ; + C1 = GB_mex_assign_scalar (C, [ ], [ ], S1, I0, J0, [ ]) ; + C2 = GB_spec_assign (C, [ ], [ ], S1, I1, J1, [ ], 1) ; + GB_spec_compare (C1, C2) ; - C1 = GB_mex_subassign_scalar (C, [ ], [ ], S1, I0, J0, [ ]) ; - C2 = GB_spec_subassign (C, [ ], [ ], S1, I1, J1, [ ], 1) ; - GB_spec_compare (C1, C2) ; + C1 = GB_mex_assign_scalar (C, [ ], [ ], S0, I0, J0, [ ]) ; + C2 = GB_spec_assign (C, [ ], [ ], S0, I1, J1, [ ], 1) ; + GB_spec_compare (C1, C2) ; - C1 = GB_mex_subassign_scalar (C, [ ], [ ], S0, I0, J0, [ ]) ; - C2 = GB_spec_subassign (C, [ ], [ ], S0, I1, J1, [ ], 1) ; - GB_spec_compare (C1, C2) ; + C1 = GB_mex_subassign_scalar (C, [ ], [ ], S1, I0, J0, [ ]) ; + C2 = GB_spec_subassign (C, [ ], [ ], S1, I1, J1, [ ], 1) ; + GB_spec_compare (C1, C2) ; + C1 = GB_mex_subassign_scalar (C, [ ], [ ], S0, I0, J0, [ ]) ; + C2 = GB_spec_subassign (C, [ ], [ ], S0, I1, J1, [ ], 1) ; + GB_spec_compare (C1, C2) ; + + end end end - fprintf ('\ntest232: all tests passed\n') ; diff --git a/GraphBLAS/Test/testall.m b/GraphBLAS/Test/testall.m index 72bc312ba9..352fc0ba83 100644 --- a/GraphBLAS/Test/testall.m +++ b/GraphBLAS/Test/testall.m @@ -163,7 +163,6 @@ function testall (threads,longtests) logstat ('test165',t) ; % test C=A*B' where A is diagonal and B becomes bitmap logstat ('test01' ,t) ; % error handling -logstat ('test07b',t) ; % quick test GB_mex_assign logstat ('test83' ,t) ; % GrB_assign with C_replace and empty J logstat ('test176',t) ; % test GrB_assign, method 09, 11 @@ -227,6 +226,7 @@ function testall (threads,longtests) logstat ('test158',t) ; % test colscale and rowscale logstat ('test84' ,t) ; % GrB_assign (row and column with C in CSR/CSC format) logstat ('test130',t) ; % GrB_apply, hypersparse cases + logstat ('test19b',t) ; % GrB_assign, many pending operators logstat ('test19b',s) ; % GrB_assign, many pending operators logstat ('test133',t) ; % test mask operations (GB_masker) @@ -297,6 +297,7 @@ function testall (threads,longtests) logstat ('test06(936)',t); % % performance test GrB_mxm on all semirings logstat ('test07',t) ; % 0 % quick test GB_mex_subassign logstat ('test07',s) ; % 0 % quick test GB_mex_subassign +logstat ('test07b',t) ; % % quick test GB_mex_assign logstat ('test09b',t) ; % % duplicate I,J test of GB_mex_assign logstat ('test13',t) ; % % simple tests of GB_mex_transpose diff --git a/GraphBLAS/alternative/Makefile b/GraphBLAS/alternative/Makefile index 213552d1ad..f2f83a1e98 100644 --- a/GraphBLAS/alternative/Makefile +++ b/GraphBLAS/alternative/Makefile @@ -19,9 +19,9 @@ default: library # This version info must match ../CMakeLists.txt -VER1 = 6 -VER2 = 2 -VER3 = 5 +VER1 = 7 +VER2 = 0 +VER3 = 3 # pick your compiler: CC = gcc diff --git a/GraphBLAS/rmm_wrap/CMakeLists.txt b/GraphBLAS/rmm_wrap/CMakeLists.txt index e1dcb304f1..69644a2c0a 100644 --- a/GraphBLAS/rmm_wrap/CMakeLists.txt +++ b/GraphBLAS/rmm_wrap/CMakeLists.txt @@ -33,6 +33,14 @@ endif() include_directories(${CMAKE_CURRENT_BINARY_DIR}/external_includes/rmm/include) add_library(rmm_wrap rmm_wrap.cpp rmm_wrap.hpp rmm_wrap.h) + +SET_TARGET_PROPERTIES (rmm_wrap PROPERTIES + VERSION ${GraphBLAS_VERSION_MAJOR}.${GraphBLAS_VERSION_MINOR}.${GraphBLAS_VERSION_SUB} + SOVERSION ${GraphBLAS_VERSION_MAJOR} + C_STANDARD_REQUIRED 11 + PUBLIC_HEADER "rmm_wrap.h" ) + + add_executable(rmm_wrap_test rmm_wrap_test.c rmm_wrap.cpp rmm_wrap.hpp rmm_wrap.h) set(RMM_WRAP_LIBS ${EXTRA_LIBS} ${CUDA_LIBRARIES}) @@ -44,3 +52,7 @@ target_include_directories(rmm_wrap PUBLIC "${RMM_WRAP_INCLUDES}") target_link_libraries(rmm_wrap_test PUBLIC ${RMM_WRAP_LIBS}) target_include_directories(rmm_wrap_test PUBLIC "${RMM_WRAP_INCLUDES}") + +install (TARGETS rmm_wrap + PUBLIC_HEADER DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} ) diff --git a/README.md b/README.md index 7bc56fef38..dee3f18143 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ SuiteSparse: A Suite of Sparse matrix packages at http://suitesparse.com ----------------------------------------------------------------------------- -Mar 14, 2022. SuiteSparse VERSION 5.11.0 +Apr 10, 2022. SuiteSparse VERSION 5.12.0 Now includes GraphBLAS, SLIP_LU, and a new interface to the SuiteSparse Matrix Collection (ssget), via MATLAB and a Java GUI, to @@ -674,3 +674,13 @@ Step-by-step details: from the place they were installed. If you pass INSTALL_***= options to 'make install', you must pass the same to 'make uninstall'. +----------------------------------------------------------------------------- +Python interface +----------------------------------------------------------------------------- + +See scikit-sparse and scikit-umfpack for the Python interface via SciPy: + +https://github.com/scikit-sparse/scikit-sparse + +https://github.com/scikit-umfpack/scikit-umfpack + diff --git a/SuiteSparse_config/Makefile b/SuiteSparse_config/Makefile index 0545f6e8a1..8eb60fd673 100644 --- a/SuiteSparse_config/Makefile +++ b/SuiteSparse_config/Makefile @@ -7,7 +7,7 @@ export SUITESPARSE # version of SuiteSparse_config is also version of SuiteSparse meta-package LIBRARY = libsuitesparseconfig -VERSION = 5.11.0 +VERSION = 5.12.0 SO_VERSION = 5 default: library diff --git a/SuiteSparse_config/SuiteSparse_config.h b/SuiteSparse_config/SuiteSparse_config.h index 922add4eda..618a43fd4d 100644 --- a/SuiteSparse_config/SuiteSparse_config.h +++ b/SuiteSparse_config/SuiteSparse_config.h @@ -202,10 +202,10 @@ int SuiteSparse_version /* returns SUITESPARSE_VERSION */ */ #define SUITESPARSE_HAS_VERSION_FUNCTION -#define SUITESPARSE_DATE "Mar 14, 2022" +#define SUITESPARSE_DATE "Apr 10, 2022" #define SUITESPARSE_VER_CODE(main,sub) ((main) * 1000 + (sub)) #define SUITESPARSE_MAIN_VERSION 5 -#define SUITESPARSE_SUB_VERSION 11 +#define SUITESPARSE_SUB_VERSION 12 #define SUITESPARSE_SUBSUB_VERSION 0 #define SUITESPARSE_VERSION \ SUITESPARSE_VER_CODE(SUITESPARSE_MAIN_VERSION,SUITESPARSE_SUB_VERSION) diff --git a/SuiteSparse_config/SuiteSparse_config.mk b/SuiteSparse_config/SuiteSparse_config.mk index 9ca0a06553..a95366431f 100644 --- a/SuiteSparse_config/SuiteSparse_config.mk +++ b/SuiteSparse_config/SuiteSparse_config.mk @@ -7,7 +7,7 @@ # and GraphBLAS. The configuration settings for GraphBLAS are determined by # GraphBLAS/CMakeLists.txt -SUITESPARSE_VERSION = 5.11.0 +SUITESPARSE_VERSION = 5.12.0 #--------------------------------------------------------------------------- # determine what system we are on